Skip to content

API Reference

Auto-generated reference for every public CDS module. Each entry below is rendered from the module's own docstrings by mkdocstrings.

Core Data Models

The shared Domain, Hypothesis, and HypothesisStatus types used throughout CDS — the foundation the hypothesis engine builds on.

cds.core

Core data models for CDS.

Classes

Domain

Bases: str, Enum

Broad scientific domains supported by CDS.

Source code in src\cds\core\models.py
class Domain(str, Enum):
    """Broad scientific domains supported by CDS."""

    PHYSICS = "physics"
    COSMOLOGY = "cosmology"
    MATHEMATICS = "mathematics"
    BIOLOGY = "biology"
    CHEMISTRY = "chemistry"
    GENERAL_SCIENCE = "general_science"

Hypothesis

Bases: BaseModel

A scientific hypothesis with metadata and traceability.

Source code in src\cds\core\models.py
class Hypothesis(BaseModel):
    """A scientific hypothesis with metadata and traceability."""

    id: str = Field(..., description="Unique identifier")
    statement: str = Field(..., description="The core hypothesis statement")
    domain: Domain
    research_question: str
    rationale: str | None = None
    assumptions: list[str] = Field(default_factory=list)
    predictions: list[str] = Field(default_factory=list)
    status: HypothesisStatus = HypothesisStatus.NEW
    confidence: float = Field(0.5, ge=0.0, le=1.0)
    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
    tags: list[str] = Field(default_factory=list)
    sources: list[str] = Field(default_factory=list, description="References or retrieval sources")
    metadata: dict[str, str] = Field(default_factory=dict)

    def to_markdown(self) -> str:
        """Render this hypothesis as a structured Markdown document."""
        lines = [
            f"# Hypothesis: {self.id}",
            "",
            f"**Statement**: {self.statement}",
            "",
            f"**Domain**: {self.domain.value}",
            f"**Research Question**: {self.research_question}",
            f"**Status**: {self.status.value} | **Confidence**: {self.confidence:.2f}",
            "",
        ]
        if self.rationale:
            lines += ["## Rationale", self.rationale, ""]
        if self.assumptions:
            lines += ["## Assumptions"] + [f"- {a}" for a in self.assumptions] + [""]
        if self.predictions:
            preds = [f"- {p}" for p in self.predictions]
            lines += ["## Predictions / Testable Consequences"] + preds + [""]
        if self.tags:
            lines += [f"**Tags**: {', '.join(self.tags)}"]
        return "\n".join(lines)
Methods:
to_markdown
to_markdown() -> str

Render this hypothesis as a structured Markdown document.

Source code in src\cds\core\models.py
def to_markdown(self) -> str:
    """Render this hypothesis as a structured Markdown document."""
    lines = [
        f"# Hypothesis: {self.id}",
        "",
        f"**Statement**: {self.statement}",
        "",
        f"**Domain**: {self.domain.value}",
        f"**Research Question**: {self.research_question}",
        f"**Status**: {self.status.value} | **Confidence**: {self.confidence:.2f}",
        "",
    ]
    if self.rationale:
        lines += ["## Rationale", self.rationale, ""]
    if self.assumptions:
        lines += ["## Assumptions"] + [f"- {a}" for a in self.assumptions] + [""]
    if self.predictions:
        preds = [f"- {p}" for p in self.predictions]
        lines += ["## Predictions / Testable Consequences"] + preds + [""]
    if self.tags:
        lines += [f"**Tags**: {', '.join(self.tags)}"]
    return "\n".join(lines)

HypothesisStatus

Bases: str, Enum

Lifecycle states for a Hypothesis.

Source code in src\cds\core\models.py
class HypothesisStatus(str, Enum):
    """Lifecycle states for a Hypothesis."""

    NEW = "new"
    REFINED = "refined"
    CRITIQUED = "critiqued"
    TESTABLE = "testable"
    VALIDATED = "validated"
    REJECTED = "rejected"
    ARCHIVED = "archived"

Hypothesis Generation

The cognitive-discovery centrepiece: structured hypothesis generation from a research question, plus a statistical evaluator.

cds.hypothesis

Hypothesis generation and evaluation module for Cognitive Discovery.

Provides tools to generate structured scientific hypotheses from research questions. Includes prompt templates and an offline generator for immediate use, plus a clear Protocol for supplying custom generator implementations for specialized research needs.

The focus is on making hypotheses falsifiable, with explicit assumptions, predictions, and confidence estimates.

Example

from cds.hypothesis import generate_hypotheses

hypos = generate_hypotheses( "Why do we observe the Hubble tension?", domain="cosmology", n=3 )

Classes

Domain

Bases: str, Enum

Broad scientific domains supported by CDS.

Source code in src\cds\core\models.py
class Domain(str, Enum):
    """Broad scientific domains supported by CDS."""

    PHYSICS = "physics"
    COSMOLOGY = "cosmology"
    MATHEMATICS = "mathematics"
    BIOLOGY = "biology"
    CHEMISTRY = "chemistry"
    GENERAL_SCIENCE = "general_science"

Hypothesis

Bases: BaseModel

A scientific hypothesis with metadata and traceability.

Source code in src\cds\core\models.py
class Hypothesis(BaseModel):
    """A scientific hypothesis with metadata and traceability."""

    id: str = Field(..., description="Unique identifier")
    statement: str = Field(..., description="The core hypothesis statement")
    domain: Domain
    research_question: str
    rationale: str | None = None
    assumptions: list[str] = Field(default_factory=list)
    predictions: list[str] = Field(default_factory=list)
    status: HypothesisStatus = HypothesisStatus.NEW
    confidence: float = Field(0.5, ge=0.0, le=1.0)
    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
    tags: list[str] = Field(default_factory=list)
    sources: list[str] = Field(default_factory=list, description="References or retrieval sources")
    metadata: dict[str, str] = Field(default_factory=dict)

    def to_markdown(self) -> str:
        """Render this hypothesis as a structured Markdown document."""
        lines = [
            f"# Hypothesis: {self.id}",
            "",
            f"**Statement**: {self.statement}",
            "",
            f"**Domain**: {self.domain.value}",
            f"**Research Question**: {self.research_question}",
            f"**Status**: {self.status.value} | **Confidence**: {self.confidence:.2f}",
            "",
        ]
        if self.rationale:
            lines += ["## Rationale", self.rationale, ""]
        if self.assumptions:
            lines += ["## Assumptions"] + [f"- {a}" for a in self.assumptions] + [""]
        if self.predictions:
            preds = [f"- {p}" for p in self.predictions]
            lines += ["## Predictions / Testable Consequences"] + preds + [""]
        if self.tags:
            lines += [f"**Tags**: {', '.join(self.tags)}"]
        return "\n".join(lines)
Methods:
to_markdown
to_markdown() -> str

Render this hypothesis as a structured Markdown document.

Source code in src\cds\core\models.py
def to_markdown(self) -> str:
    """Render this hypothesis as a structured Markdown document."""
    lines = [
        f"# Hypothesis: {self.id}",
        "",
        f"**Statement**: {self.statement}",
        "",
        f"**Domain**: {self.domain.value}",
        f"**Research Question**: {self.research_question}",
        f"**Status**: {self.status.value} | **Confidence**: {self.confidence:.2f}",
        "",
    ]
    if self.rationale:
        lines += ["## Rationale", self.rationale, ""]
    if self.assumptions:
        lines += ["## Assumptions"] + [f"- {a}" for a in self.assumptions] + [""]
    if self.predictions:
        preds = [f"- {p}" for p in self.predictions]
        lines += ["## Predictions / Testable Consequences"] + preds + [""]
    if self.tags:
        lines += [f"**Tags**: {', '.join(self.tags)}"]
    return "\n".join(lines)

HypothesisStatus

Bases: str, Enum

Lifecycle states for a Hypothesis.

Source code in src\cds\core\models.py
class HypothesisStatus(str, Enum):
    """Lifecycle states for a Hypothesis."""

    NEW = "new"
    REFINED = "refined"
    CRITIQUED = "critiqued"
    TESTABLE = "testable"
    VALIDATED = "validated"
    REJECTED = "rejected"
    ARCHIVED = "archived"

ChiSquareGofPayload

Bases: TypedDict

Nested payload under the chi_square_gof dispatch key.

expected is optional at the call site: the evaluator falls back to a uniform distribution over the categories when it is missing. total=False makes both fields optional so callers can supply only observed; the in-guards in :meth:HypothesisEvaluator.evaluate handle presence.

Source code in src\cds\hypothesis\evaluator.py
class ChiSquareGofPayload(TypedDict, total=False):
    """Nested payload under the ``chi_square_gof`` dispatch key.

    ``expected`` is optional at the call site: the evaluator falls back to a
    uniform distribution over the categories when it is missing. ``total=False``
    makes both fields optional so callers can supply only ``observed``; the
    ``in``-guards in :meth:`HypothesisEvaluator.evaluate` handle presence.
    """

    observed: list[float]
    expected: list[float]

EvaluationData

Bases: TypedDict

Tagged-union payload selecting which statistical test evaluate runs.

Exactly one of the dispatch keys below should be set; evaluate checks them in documented order and raises ValueError if none match. total=False mirrors the established AdamState convention (optimization.minimize): every field is optional and presence is the dispatch signal, checked via if "<key>" in data: in the method body (mypy narrows those accesses).

  • groups -> t-test (2) or ANOVA (3+); optional labels
  • one_sample + popmean-> one-sample t-test vs a reference mean
  • chi_square_gof -> {"observed": [...], "expected": [...]}
  • chi_square_independence -> 2D contingency table
  • paired -> tuple of two paired samples
Source code in src\cds\hypothesis\evaluator.py
class EvaluationData(TypedDict, total=False):
    """Tagged-union payload selecting which statistical test ``evaluate`` runs.

    Exactly one of the dispatch keys below should be set; ``evaluate`` checks
    them in documented order and raises ``ValueError`` if none match. ``total=False``
    mirrors the established ``AdamState`` convention (``optimization.minimize``):
    every field is optional and presence is the dispatch signal, checked via
    ``if "<key>" in data:`` in the method body (mypy narrows those accesses).

    - ``groups``                  -> t-test (2) or ANOVA (3+); optional ``labels``
    - ``one_sample`` + ``popmean``-> one-sample t-test vs a reference mean
    - ``chi_square_gof``          -> ``{"observed": [...], "expected": [...]}``
    - ``chi_square_independence`` -> 2D contingency table
    - ``paired``                  -> tuple of two paired samples
    """

    groups: list[list[float]]
    labels: list[str]
    one_sample: list[float]
    popmean: float
    chi_square_gof: ChiSquareGofPayload
    chi_square_independence: list[list[float]]
    paired: tuple[list[float], list[float]]

EvaluationResult dataclass

Detailed result of a hypothesis evaluation.

Source code in src\cds\hypothesis\evaluator.py
@dataclass
class EvaluationResult:
    """Detailed result of a hypothesis evaluation."""

    hypothesis_id: str
    test_name: str
    statistic: float
    p_value: float
    is_significant: bool
    conclusion: str

HypothesisEvaluator

Autonomous evaluator that matches hypotheses with statistical tests.

Source code in src\cds\hypothesis\evaluator.py
class HypothesisEvaluator:
    """Autonomous evaluator that matches hypotheses with statistical tests."""

    def __init__(self, alpha: float = 0.05):
        self.alpha = alpha

    def _build_result(
        self,
        hypothesis: Hypothesis,
        test_name: str,
        statistic: float,
        p_value: float,
    ) -> EvaluationResult:
        """Format the outcome and update the hypothesis status."""
        is_sig = p_value < self.alpha
        if is_sig:
            conclusion = (
                f"Hypothesis supported at alpha={self.alpha}. "
                f"Significant result found ({test_name})."
            )
            hypothesis.status = HypothesisStatus.VALIDATED
        else:
            conclusion = (
                f"Failed to support hypothesis at alpha={self.alpha}. "
                f"No significant result ({test_name})."
            )
            hypothesis.status = HypothesisStatus.REJECTED
        return EvaluationResult(
            hypothesis_id=hypothesis.id,
            test_name=test_name,
            statistic=statistic,
            p_value=p_value,
            is_significant=is_sig,
            conclusion=conclusion,
        )

    def compare_groups(
        self,
        hypothesis: Hypothesis,
        groups: list[list[float]],
        labels: list[str] | None = None,
    ) -> EvaluationResult:
        """Evaluate a hypothesis by comparing multiple numeric groups.

        Uses t-test for 2 groups, ANOVA for more.
        """
        if len(groups) < 2:
            raise ValueError("Evaluation requires at least 2 groups of data.")

        if len(groups) == 2:
            res = two_sample_ttest(groups[0], groups[1])
            test_name = "Two-sample t-test"
        else:
            res = one_way_anova(*groups)
            test_name = "One-way ANOVA"

        return self._build_result(hypothesis, test_name, res.statistic, res.p_value)

    def compare_to_reference(
        self,
        hypothesis: Hypothesis,
        sample: list[float],
        popmean: float,
    ) -> EvaluationResult:
        """One-sample t-test: does the sample differ from a reference mean?"""
        if len(sample) < 2:
            raise ValueError("One-sample evaluation requires at least 2 observations.")
        res = one_sample_ttest(sample, popmean)
        return self._build_result(hypothesis, "One-sample t-test", res.statistic, res.p_value)

    def goodness_of_fit(
        self,
        hypothesis: Hypothesis,
        observed: list[float],
        expected: list[float] | None = None,
    ) -> EvaluationResult:
        """Chi-square goodness-of-fit: observed vs expected category counts.

        If ``expected`` is omitted, a uniform distribution over the categories
        is assumed (all categories equally likely).
        """
        if len(observed) < 2:
            raise ValueError("Goodness-of-fit requires at least 2 categories.")
        if expected is None:
            total = sum(observed)
            n = len(observed)
            expected = [total / n] * n
        res = chi_square_gof(observed, expected)
        return self._build_result(
            hypothesis, "Chi-square goodness-of-fit", res.statistic, res.p_value
        )

    def test_independence(
        self,
        hypothesis: Hypothesis,
        table: list[list[float]],
    ) -> EvaluationResult:
        """Chi-square test of independence on a contingency table."""
        if len(table) < 2 or any(len(row) < 2 for row in table):
            raise ValueError("Independence test requires a 2x2 or larger contingency table.")
        res = chi_square_independence(table)
        return self._build_result(hypothesis, "Chi-square independence", res.statistic, res.p_value)

    def evaluate(self, hypothesis: Hypothesis, data: EvaluationData) -> EvaluationResult:
        """General evaluation entry point dispatching on the data format.

        Supported keys (checked in order):

        - ``groups``            : list of numeric groups (t-test / ANOVA)
        - ``one_sample`` + ``popmean`` : sample and reference mean
        - ``chi_square_gof``    : ``{"observed": [...], "expected": [...]}``
        - ``chi_square_independence`` : 2D contingency table
        - ``paired``            : tuple of two paired samples (treated as groups)
        """
        if "groups" in data:
            return self.compare_groups(hypothesis, data["groups"], data.get("labels"))

        if "one_sample" in data:
            return self.compare_to_reference(hypothesis, data["one_sample"], data["popmean"])

        if "chi_square_gof" in data:
            payload = data["chi_square_gof"]
            return self.goodness_of_fit(
                hypothesis,
                payload["observed"],
                payload.get("expected"),
            )

        if "chi_square_independence" in data:
            return self.test_independence(hypothesis, data["chi_square_independence"])

        if "paired" in data:
            a, b = data["paired"]
            return self.compare_groups(hypothesis, [list(a), list(b)])

        raise ValueError(
            "Unsupported data format for evaluation. "
            "Provide one of: 'groups', 'one_sample' (with 'popmean'), "
            "'chi_square_gof', 'chi_square_independence', or 'paired'."
        )
Methods:
compare_groups
compare_groups(
    hypothesis: Hypothesis,
    groups: list[list[float]],
    labels: list[str] | None = None,
) -> EvaluationResult

Evaluate a hypothesis by comparing multiple numeric groups.

Uses t-test for 2 groups, ANOVA for more.

Source code in src\cds\hypothesis\evaluator.py
def compare_groups(
    self,
    hypothesis: Hypothesis,
    groups: list[list[float]],
    labels: list[str] | None = None,
) -> EvaluationResult:
    """Evaluate a hypothesis by comparing multiple numeric groups.

    Uses t-test for 2 groups, ANOVA for more.
    """
    if len(groups) < 2:
        raise ValueError("Evaluation requires at least 2 groups of data.")

    if len(groups) == 2:
        res = two_sample_ttest(groups[0], groups[1])
        test_name = "Two-sample t-test"
    else:
        res = one_way_anova(*groups)
        test_name = "One-way ANOVA"

    return self._build_result(hypothesis, test_name, res.statistic, res.p_value)
compare_to_reference
compare_to_reference(
    hypothesis: Hypothesis,
    sample: list[float],
    popmean: float,
) -> EvaluationResult

One-sample t-test: does the sample differ from a reference mean?

Source code in src\cds\hypothesis\evaluator.py
def compare_to_reference(
    self,
    hypothesis: Hypothesis,
    sample: list[float],
    popmean: float,
) -> EvaluationResult:
    """One-sample t-test: does the sample differ from a reference mean?"""
    if len(sample) < 2:
        raise ValueError("One-sample evaluation requires at least 2 observations.")
    res = one_sample_ttest(sample, popmean)
    return self._build_result(hypothesis, "One-sample t-test", res.statistic, res.p_value)
goodness_of_fit
goodness_of_fit(
    hypothesis: Hypothesis,
    observed: list[float],
    expected: list[float] | None = None,
) -> EvaluationResult

Chi-square goodness-of-fit: observed vs expected category counts.

If expected is omitted, a uniform distribution over the categories is assumed (all categories equally likely).

Source code in src\cds\hypothesis\evaluator.py
def goodness_of_fit(
    self,
    hypothesis: Hypothesis,
    observed: list[float],
    expected: list[float] | None = None,
) -> EvaluationResult:
    """Chi-square goodness-of-fit: observed vs expected category counts.

    If ``expected`` is omitted, a uniform distribution over the categories
    is assumed (all categories equally likely).
    """
    if len(observed) < 2:
        raise ValueError("Goodness-of-fit requires at least 2 categories.")
    if expected is None:
        total = sum(observed)
        n = len(observed)
        expected = [total / n] * n
    res = chi_square_gof(observed, expected)
    return self._build_result(
        hypothesis, "Chi-square goodness-of-fit", res.statistic, res.p_value
    )
test_independence
test_independence(
    hypothesis: Hypothesis, table: list[list[float]]
) -> EvaluationResult

Chi-square test of independence on a contingency table.

Source code in src\cds\hypothesis\evaluator.py
def test_independence(
    self,
    hypothesis: Hypothesis,
    table: list[list[float]],
) -> EvaluationResult:
    """Chi-square test of independence on a contingency table."""
    if len(table) < 2 or any(len(row) < 2 for row in table):
        raise ValueError("Independence test requires a 2x2 or larger contingency table.")
    res = chi_square_independence(table)
    return self._build_result(hypothesis, "Chi-square independence", res.statistic, res.p_value)
evaluate
evaluate(
    hypothesis: Hypothesis, data: EvaluationData
) -> EvaluationResult

General evaluation entry point dispatching on the data format.

Supported keys (checked in order):

  • groups : list of numeric groups (t-test / ANOVA)
  • one_sample + popmean : sample and reference mean
  • chi_square_gof : {"observed": [...], "expected": [...]}
  • chi_square_independence : 2D contingency table
  • paired : tuple of two paired samples (treated as groups)
Source code in src\cds\hypothesis\evaluator.py
def evaluate(self, hypothesis: Hypothesis, data: EvaluationData) -> EvaluationResult:
    """General evaluation entry point dispatching on the data format.

    Supported keys (checked in order):

    - ``groups``            : list of numeric groups (t-test / ANOVA)
    - ``one_sample`` + ``popmean`` : sample and reference mean
    - ``chi_square_gof``    : ``{"observed": [...], "expected": [...]}``
    - ``chi_square_independence`` : 2D contingency table
    - ``paired``            : tuple of two paired samples (treated as groups)
    """
    if "groups" in data:
        return self.compare_groups(hypothesis, data["groups"], data.get("labels"))

    if "one_sample" in data:
        return self.compare_to_reference(hypothesis, data["one_sample"], data["popmean"])

    if "chi_square_gof" in data:
        payload = data["chi_square_gof"]
        return self.goodness_of_fit(
            hypothesis,
            payload["observed"],
            payload.get("expected"),
        )

    if "chi_square_independence" in data:
        return self.test_independence(hypothesis, data["chi_square_independence"])

    if "paired" in data:
        a, b = data["paired"]
        return self.compare_groups(hypothesis, [list(a), list(b)])

    raise ValueError(
        "Unsupported data format for evaluation. "
        "Provide one of: 'groups', 'one_sample' (with 'popmean'), "
        "'chi_square_gof', 'chi_square_independence', or 'paired'."
    )

HypothesisGenerator

Bases: Protocol

Interface for hypothesis generators.

Source code in src\cds\hypothesis\generator.py
class HypothesisGenerator(Protocol):
    """Interface for hypothesis generators."""

    def generate(
        self,
        research_question: str,
        domain: Domain | str = Domain.GENERAL_SCIENCE,
        n: int = 3,
        **kwargs: object,
    ) -> list[Hypothesis]:
        """Generate `n` hypotheses for the given research question."""
Methods:
generate
generate(
    research_question: str,
    domain: Domain | str = Domain.GENERAL_SCIENCE,
    n: int = 3,
    **kwargs: object
) -> list[Hypothesis]

Generate n hypotheses for the given research question.

Source code in src\cds\hypothesis\generator.py
def generate(
    self,
    research_question: str,
    domain: Domain | str = Domain.GENERAL_SCIENCE,
    n: int = 3,
    **kwargs: object,
) -> list[Hypothesis]:
    """Generate `n` hypotheses for the given research question."""

PromptTemplate

Prompt templates for different providers / use cases.

Source code in src\cds\hypothesis\generator.py
class PromptTemplate:
    """Prompt templates for different providers / use cases."""

    SYSTEM = (
        "You are an expert research scientist and rigorous thinker. "
        "Your goal is to propose high-quality, falsifiable, "
        "novel-yet-grounded scientific hypotheses. "
        "Always make assumptions explicit. Prioritize testability and clarity. "
        "Respond ONLY in the requested structured format."
    )

    USER_BASE = """Research Question: {research_question}

Domain focus: {domain}

Generate {n} distinct hypotheses.

For each hypothesis provide:
- Clear one-sentence statement
- Short rationale (2-4 sentences) connecting to known science
- Key assumptions (bullet list)
- Specific, measurable predictions or consequences (bullet list)
- Estimated confidence (0-1) with brief justification

Format each as:
ID: H-<number>
Statement: ...
Rationale: ...
Assumptions:
- ...
Predictions:
- ...
Confidence: 0.xx
"""

    @classmethod
    def render(cls, research_question: str, domain: Domain, n: int = 3) -> str:
        """Format the user-side prompt for a hypothesis generation request."""
        return cls.USER_BASE.format(
            research_question=research_question,
            domain=domain.value,
            n=n,
        )
Methods:
render classmethod
render(
    research_question: str, domain: Domain, n: int = 3
) -> str

Format the user-side prompt for a hypothesis generation request.

Source code in src\cds\hypothesis\generator.py
@classmethod
def render(cls, research_question: str, domain: Domain, n: int = 3) -> str:
    """Format the user-side prompt for a hypothesis generation request."""
    return cls.USER_BASE.format(
        research_question=research_question,
        domain=domain.value,
        n=n,
    )

SimpleOfflineGenerator

A deterministic offline generator for demos and early development.

It creates plausible but generic hypotheses. Researchers can replace or wrap it with a custom implementation of HypothesisGenerator tailored to their domain or data sources.

Source code in src\cds\hypothesis\generator.py
class SimpleOfflineGenerator:
    """
    A deterministic offline generator for demos and early development.

    It creates plausible but generic hypotheses. Researchers can replace
    or wrap it with a custom implementation of HypothesisGenerator
    tailored to their domain or data sources.
    """

    def __init__(self) -> None:
        self.templates = {
            Domain.COSMOLOGY: [
                (
                    "Late-time modifications to gravity can mimic "
                    "dark energy while altering structure growth."
                ),
                (
                    "A time-varying dark energy equation of state "
                    "w(a) with a sharp transition at z~0.5 "
                    "explains current tensions."
                ),
                (
                    "Primordial non-Gaussianity of local type at "
                    "f_NL ~ 5-10 is detectable with next-gen "
                    "surveys and resolves sigma8 tension."
                ),
            ],
            Domain.PHYSICS: [
                (
                    "A hidden sector with light mediators can "
                    "resolve the muon g-2 anomaly without "
                    "conflicting with collider bounds."
                ),
                (
                    "Modified dispersion relations at Planck scale "
                    "suppress high-energy cosmic rays in a "
                    "characteristic energy-dependent way."
                ),
            ],
            Domain.MATHEMATICS: [
                (
                    "A new family of special functions between "
                    "hypergeometric and q-hypergeometric satisfies "
                    "a novel functional equation."
                ),
            ],
        }

    def generate(
        self,
        research_question: str,
        domain: Domain | str = Domain.GENERAL_SCIENCE,
        n: int = 3,
        **kwargs: object,
    ) -> list[Hypothesis]:
        """Generate `n` hypotheses from the built-in domain templates."""
        # Ensure domain is a Domain enum instance. ``Domain`` subclasses
        # ``str``, so the isinstance guard is True for both plain strings and
        # enum members; the False branch (skip mapping) is therefore
        # unreachable from typed callers — it remains as a defensive seam for
        # hypothetical non-str subclasses and is excluded from coverage.
        if isinstance(domain, str):  # pragma: no branch
            try:
                # Case-insensitive mapping for better UX
                domain = Domain(domain.lower())
            except ValueError:
                domain = Domain.GENERAL_SCIENCE

        ideas = self.templates.get(domain, self.templates[Domain.PHYSICS])[:n]
        if len(ideas) < n:
            # The built-in templates only cover a few domains. For any other
            # domain (or when more hypotheses are requested than templates
            # exist), fall back to a generic template derived from the
            # research question rather than a domain-specific claim, so the
            # output stays a usable starting point for a custom generator.
            ideas += [
                f"A yet-untested factor influencing {research_question} "
                f"produces a measurable, reproducible effect.",
            ] * (n - len(ideas))

        hypos: list[Hypothesis] = []
        for i, idea in enumerate(ideas[:n], 1):
            h = Hypothesis(
                id=f"H-{uuid.uuid4().hex[:8]}",
                statement=idea,
                domain=domain,
                research_question=research_question,
                rationale=(
                    "Builds on known tensions in the literature "
                    "and proposes a falsifiable deviation."
                ),
                assumptions=[
                    "Background model is approximately correct at low energies.",
                    "New physics at observable scales doesn't violate existing constraints.",
                ],
                predictions=[
                    "A measurable deviation in observable O at scale S with amplitude A.",
                    "Correlation between two previously uncorrelated datasets D1 and D2.",
                ],
                status=HypothesisStatus.NEW,
                confidence=0.45 + (i * 0.05),
                tags=[domain.value, "early-draft"],
            )
            hypos.append(h)
        return hypos
Methods:
generate
generate(
    research_question: str,
    domain: Domain | str = Domain.GENERAL_SCIENCE,
    n: int = 3,
    **kwargs: object
) -> list[Hypothesis]

Generate n hypotheses from the built-in domain templates.

Source code in src\cds\hypothesis\generator.py
def generate(
    self,
    research_question: str,
    domain: Domain | str = Domain.GENERAL_SCIENCE,
    n: int = 3,
    **kwargs: object,
) -> list[Hypothesis]:
    """Generate `n` hypotheses from the built-in domain templates."""
    # Ensure domain is a Domain enum instance. ``Domain`` subclasses
    # ``str``, so the isinstance guard is True for both plain strings and
    # enum members; the False branch (skip mapping) is therefore
    # unreachable from typed callers — it remains as a defensive seam for
    # hypothetical non-str subclasses and is excluded from coverage.
    if isinstance(domain, str):  # pragma: no branch
        try:
            # Case-insensitive mapping for better UX
            domain = Domain(domain.lower())
        except ValueError:
            domain = Domain.GENERAL_SCIENCE

    ideas = self.templates.get(domain, self.templates[Domain.PHYSICS])[:n]
    if len(ideas) < n:
        # The built-in templates only cover a few domains. For any other
        # domain (or when more hypotheses are requested than templates
        # exist), fall back to a generic template derived from the
        # research question rather than a domain-specific claim, so the
        # output stays a usable starting point for a custom generator.
        ideas += [
            f"A yet-untested factor influencing {research_question} "
            f"produces a measurable, reproducible effect.",
        ] * (n - len(ideas))

    hypos: list[Hypothesis] = []
    for i, idea in enumerate(ideas[:n], 1):
        h = Hypothesis(
            id=f"H-{uuid.uuid4().hex[:8]}",
            statement=idea,
            domain=domain,
            research_question=research_question,
            rationale=(
                "Builds on known tensions in the literature "
                "and proposes a falsifiable deviation."
            ),
            assumptions=[
                "Background model is approximately correct at low energies.",
                "New physics at observable scales doesn't violate existing constraints.",
            ],
            predictions=[
                "A measurable deviation in observable O at scale S with amplitude A.",
                "Correlation between two previously uncorrelated datasets D1 and D2.",
            ],
            status=HypothesisStatus.NEW,
            confidence=0.45 + (i * 0.05),
            tags=[domain.value, "early-draft"],
        )
        hypos.append(h)
    return hypos

Functions:

generate_hypotheses

generate_hypotheses(
    research_question: str,
    domain: Domain = Domain.GENERAL_SCIENCE,
    n: int = 3,
    generator: HypothesisGenerator | None = None,
) -> list[Hypothesis]

Convenience entrypoint.

Source code in src\cds\hypothesis\generator.py
def generate_hypotheses(
    research_question: str,
    domain: Domain = Domain.GENERAL_SCIENCE,
    n: int = 3,
    generator: HypothesisGenerator | None = None,
) -> list[Hypothesis]:
    """Convenience entrypoint."""
    gen = generator or SimpleOfflineGenerator()
    return gen.generate(research_question=research_question, domain=domain, n=n)

Statistics

Descriptive statistics, regression, and frequentist hypothesis tests (t-test, chi-square, ANOVA, Mann-Whitney, …).

cds.stats

Statistical analysis tools.

Classes

TestResult dataclass

Result of a hypothesis test: test statistic, degrees of freedom, p.

Source code in src\cds\stats\hypothesis_tests.py
@dataclass
class TestResult:
    """Result of a hypothesis test: test statistic, degrees of freedom, p."""

    statistic: float
    df: float
    p_value: float

RegressionResult dataclass

Fitted linear-regression parameters and goodness-of-fit.

Source code in src\cds\stats\regression.py
@dataclass
class RegressionResult:
    """Fitted linear-regression parameters and goodness-of-fit."""

    slope: float
    intercept: float
    r_squared: float

    def predict(self, x: float) -> float:
        """Predict the response y for a given x using the fitted line."""
        return self.slope * x + self.intercept
Methods:
predict
predict(x: float) -> float

Predict the response y for a given x using the fitted line.

Source code in src\cds\stats\regression.py
def predict(self, x: float) -> float:
    """Predict the response y for a given x using the fitted line."""
    return self.slope * x + self.intercept

Functions:

correlation

correlation(x: list[float], y: list[float]) -> float

Calculate the Pearson correlation coefficient between two lists.

Parameters:

Name Type Description Default
x list[float]

first list of values

required
y list[float]

second list of values

required

Returns:

Type Description
float

Pearson correlation coefficient.

Raises:

Type Description
ValueError

if lengths mismatch or lists are too short.

Source code in src\cds\stats\descriptive.py
def correlation(x: list[float], y: list[float]) -> float:
    """Calculate the Pearson correlation coefficient between two lists.

    Args:
        x: first list of values
        y: second list of values

    Returns:
        Pearson correlation coefficient.

    Raises:
        ValueError: if lengths mismatch or lists are too short.
    """
    if len(x) != len(y):
        raise ValueError("lists must be the same length")
    if len(x) < 2:
        raise ValueError("correlation requires at least two data points")

    mx, my = mean(x), mean(y)
    num = sum((xi - mx) * (yi - my) for xi, yi in zip(x, y))
    den = math.sqrt(sum((xi - mx) ** 2 for xi in x) * sum((yi - my) ** 2 for yi in y))

    return num / den if den > NEAR_ZERO else 0.0

mean

mean(data: list[float]) -> float

Calculate the arithmetic mean of a list of numbers.

Parameters:

Name Type Description Default
data list[float]

List of numeric values.

required

Returns:

Type Description
float

Arithmetic mean (sum / N).

Raises:

Type Description
ValueError

if data is empty.

Source code in src\cds\stats\descriptive.py
def mean(data: list[float]) -> float:
    """Calculate the arithmetic mean of a list of numbers.

    Args:
        data: List of numeric values.

    Returns:
        Arithmetic mean (sum / N).

    Raises:
        ValueError: if data is empty.
    """
    if not data:
        raise ValueError("mean requires at least one data point")
    return sum(data) / len(data)

median

median(data: list[float]) -> float

Calculate the median (middle value) of a list of numbers.

Parameters:

Name Type Description Default
data list[float]

List of numeric values.

required

Returns:

Type Description
float

Median value.

Source code in src\cds\stats\descriptive.py
def median(data: list[float]) -> float:
    """Calculate the median (middle value) of a list of numbers.

    Args:
        data: List of numeric values.

    Returns:
        Median value.
    """
    if not data:
        return 0.0
    sorted_data = sorted(data)
    n = len(sorted_data)
    mid = n // 2
    if n % 2 == 0:
        return (sorted_data[mid - 1] + sorted_data[mid]) / 2
    return float(sorted_data[mid])

stdev

stdev(data: list[float], ddof: int = 1) -> float

Calculate the standard deviation of a list of numbers.

Parameters:

Name Type Description Default
data list[float]

List of numeric values.

required
ddof int

Delta Degrees of Freedom.

1

Returns:

Type Description
float

Standard deviation.

Source code in src\cds\stats\descriptive.py
def stdev(data: list[float], ddof: int = 1) -> float:
    """Calculate the standard deviation of a list of numbers.

    Args:
        data: List of numeric values.
        ddof: Delta Degrees of Freedom.

    Returns:
        Standard deviation.
    """
    return math.sqrt(variance(data, ddof))

variance

variance(data: list[float], ddof: int = 1) -> float

Calculate the sample variance of a list of numbers.

Parameters:

Name Type Description Default
data list[float]

List of numeric values.

required
ddof int

Delta Degrees of Freedom (1 for sample, 0 for population).

1

Returns:

Type Description
float

Sample or population variance.

Raises:

Type Description
ValueError

if data size is <= ddof.

Source code in src\cds\stats\descriptive.py
def variance(data: list[float], ddof: int = 1) -> float:
    """Calculate the sample variance of a list of numbers.

    Args:
        data: List of numeric values.
        ddof: Delta Degrees of Freedom (1 for sample, 0 for population).

    Returns:
        Sample or population variance.

    Raises:
        ValueError: if data size is <= ddof.
    """
    if len(data) <= ddof:
        raise ValueError(f"variance requires more than {ddof} data points")
    m = mean(data)
    return sum((x - m) ** 2 for x in data) / (len(data) - ddof)

chi2_sf

chi2_sf(x: float, df: float) -> float

Upper-tail probability for the chi-square distribution: P(X >= x).

Equals Q(df/2, x/2) with the regularized upper incomplete gamma.

Reference: Pearson (1900); Abramowitz & Stegun §26.4.

Source code in src\cds\stats\hypothesis_tests.py
def chi2_sf(x: float, df: float) -> float:
    """Upper-tail probability for the chi-square distribution: P(X >= x).

    Equals Q(df/2, x/2) with the regularized upper incomplete gamma.

    Reference: Pearson (1900); Abramowitz & Stegun §26.4.
    """
    if x <= 0.0:
        return 1.0
    return _gammq(df / 2.0, x / 2.0)

chi_square_gof

chi_square_gof(
    observed: list[float], expected: list[float]
) -> TestResult

Pearson's chi-square goodness-of-fit test.

Statistic chi2 = sum((O_i - E_i)^2 / E_i) with len-1 degrees of freedom.

Reference: Pearson, K. (1900), Philosophical Magazine 50(302), 157-175.

Parameters:

Name Type Description Default
observed list[float]

observed counts

required
expected list[float]

expected counts (same length, all > 0)

required

Returns:

Type Description
TestResult

TestResult with chi2 statistic, df = k-1, upper-tail p-value

Source code in src\cds\stats\hypothesis_tests.py
def chi_square_gof(
    observed: list[float],
    expected: list[float],
) -> TestResult:
    """Pearson's chi-square goodness-of-fit test.

    Statistic chi2 = sum((O_i - E_i)^2 / E_i) with len-1 degrees of freedom.

    Reference: Pearson, K. (1900), Philosophical Magazine 50(302), 157-175.

    Args:
        observed: observed counts
        expected: expected counts (same length, all > 0)

    Returns:
        TestResult with chi2 statistic, df = k-1, upper-tail p-value
    """
    if len(observed) != len(expected):
        raise ValueError("observed and expected must have same length")
    if len(observed) < 2:
        raise ValueError("need at least 2 categories")
    if any(e <= 0 for e in expected):
        raise ValueError("expected counts must be positive")
    chi2 = sum((o - e) ** 2 / e for o, e in zip(observed, expected))
    df = len(observed) - 1
    return TestResult(statistic=chi2, df=df, p_value=chi2_sf(chi2, df))

chi_square_independence

chi_square_independence(
    table: list[list[float]],
) -> TestResult

Pearson's chi-square test of independence for a contingency table.

Expected counts E_ij = (row_i total)(col_j total) / grand total; degrees of freedom (rows-1)(cols-1).

Reference: Pearson, K. (1900), Philosophical Magazine 50(302), 157-175.

Parameters:

Name Type Description Default
table list[list[float]]

r x c contingency table of non-negative counts

required

Returns:

Type Description
TestResult

TestResult with chi2 statistic, df, upper-tail p-value

Source code in src\cds\stats\hypothesis_tests.py
def chi_square_independence(table: list[list[float]]) -> TestResult:
    """Pearson's chi-square test of independence for a contingency table.

    Expected counts E_ij = (row_i total)(col_j total) / grand total;
    degrees of freedom (rows-1)(cols-1).

    Reference: Pearson, K. (1900), Philosophical Magazine 50(302), 157-175.

    Args:
        table: r x c contingency table of non-negative counts

    Returns:
        TestResult with chi2 statistic, df, upper-tail p-value
    """
    rows = len(table)
    if rows < 2:
        raise ValueError("need at least 2 rows")
    cols = len(table[0])
    if cols < 2 or any(len(r) != cols for r in table):
        raise ValueError("need a rectangular table with at least 2 columns")
    row_tot = [sum(r) for r in table]
    col_tot = [sum(table[i][j] for i in range(rows)) for j in range(cols)]
    grand = sum(row_tot)
    if grand == 0:
        raise ValueError("table total must be positive")
    chi2 = 0.0
    for i in range(rows):
        for j in range(cols):
            exp = row_tot[i] * col_tot[j] / grand
            if exp > 0:
                chi2 += (table[i][j] - exp) ** 2 / exp
    df = (rows - 1) * (cols - 1)
    return TestResult(statistic=chi2, df=df, p_value=chi2_sf(chi2, df))

f_sf

f_sf(f: float, df1: float, df2: float) -> float

Upper-tail probability for the F distribution: P(F >= f).

Equals I_{df2/(df2+df1 f)}(df2/2, df1/2).

Reference: Fisher (1925); Numerical Recipes §6.14.

Source code in src\cds\stats\hypothesis_tests.py
def f_sf(f: float, df1: float, df2: float) -> float:
    """Upper-tail probability for the F distribution: P(F >= f).

    Equals I_{df2/(df2+df1 f)}(df2/2, df1/2).

    Reference: Fisher (1925); Numerical Recipes §6.14.
    """
    if f <= 0.0:
        return 1.0
    x = df2 / (df2 + df1 * f)
    return _betai(df2 / 2.0, df1 / 2.0, x)

one_sample_ttest

one_sample_ttest(
    data: list[float], popmean: float = 0.0
) -> TestResult

One-sample Student's t-test against a population mean.

Tests H0: mean(data) == popmean. The statistic is t = (x_bar - mu) / (s / sqrt(n)) with n-1 degrees of freedom.

Reference: Student [Gosset] (1908), Biometrika 6(1), 1-25.

Parameters:

Name Type Description Default
data list[float]

sample observations (n >= 2)

required
popmean float

hypothesized population mean

0.0

Returns:

Type Description
TestResult

TestResult with t statistic, df = n-1, two-tailed p-value

Source code in src\cds\stats\hypothesis_tests.py
def one_sample_ttest(data: list[float], popmean: float = 0.0) -> TestResult:
    """One-sample Student's t-test against a population mean.

    Tests H0: mean(data) == popmean. The statistic is
    t = (x_bar - mu) / (s / sqrt(n)) with n-1 degrees of freedom.

    Reference: Student [Gosset] (1908), Biometrika 6(1), 1-25.

    Args:
        data: sample observations (n >= 2)
        popmean: hypothesized population mean

    Returns:
        TestResult with t statistic, df = n-1, two-tailed p-value
    """
    n = len(data)
    if n < 2:
        raise ValueError("need at least 2 observations")
    df = n - 1
    se = math.sqrt(variance(data, ddof=1) / n)
    if se == 0.0:
        raise ValueError("zero variance; t-test undefined")
    t = (mean(data) - popmean) / se
    return TestResult(statistic=t, df=df, p_value=t_sf(t, df))

one_way_anova

one_way_anova(*groups: list[float]) -> TestResult

Fisher's one-way analysis of variance (ANOVA F-test).

Partitions total variability into between-group and within-group sums of squares and forms F = MS_between / MS_within with (k-1, N-k) degrees of freedom.

Reference: Fisher, R. A. (1925). "Statistical Methods for Research Workers," Oliver & Boyd.

Parameters:

Name Type Description Default
*groups list[float]

two or more samples, each with at least one observation

()

Returns:

Type Description
TestResult

TestResult with F statistic, df = k-1 (stored), upper-tail p-value.

TestResult

The within-group degrees of freedom (N-k) are used internally for p.

Source code in src\cds\stats\hypothesis_tests.py
def one_way_anova(*groups: list[float]) -> TestResult:
    """Fisher's one-way analysis of variance (ANOVA F-test).

    Partitions total variability into between-group and within-group sums of
    squares and forms F = MS_between / MS_within with (k-1, N-k) degrees of
    freedom.

    Reference: Fisher, R. A. (1925). "Statistical Methods for Research
    Workers," Oliver & Boyd.

    Args:
        *groups: two or more samples, each with at least one observation

    Returns:
        TestResult with F statistic, df = k-1 (stored), upper-tail p-value.
        The within-group degrees of freedom (N-k) are used internally for p.
    """
    k = len(groups)
    if k < 2:
        raise ValueError("need at least 2 groups")
    if any(len(g) < 1 for g in groups):
        raise ValueError("each group needs at least one observation")
    n_total = sum(len(g) for g in groups)
    if n_total <= k:
        raise ValueError("need more observations than groups")
    grand_mean = sum(sum(g) for g in groups) / n_total
    ss_between = sum(len(g) * (mean(g) - grand_mean) ** 2 for g in groups)
    ss_within = sum(sum((x - mean(g)) ** 2 for x in g) for g in groups)
    df_between = k - 1
    df_within = n_total - k
    ms_between = ss_between / df_between
    ms_within = ss_within / df_within
    if ms_within == 0.0:
        raise ValueError("zero within-group variance; F undefined")
    f = ms_between / ms_within
    return TestResult(
        statistic=f,
        df=df_between,
        p_value=f_sf(f, df_between, df_within),
    )

t_sf

t_sf(t: float, df: float) -> float

Two-tailed survival probability for Student's t distribution.

Returns P(|T| >= |t|) for T ~ t(df), via the incomplete beta function: p = I_{df/(df+t^2)}(df/2, 1/2).

Reference: Student (1908); Numerical Recipes §6.14.

Source code in src\cds\stats\hypothesis_tests.py
def t_sf(t: float, df: float) -> float:
    """Two-tailed survival probability for Student's t distribution.

    Returns P(|T| >= |t|) for T ~ t(df), via the incomplete beta function:
    p = I_{df/(df+t^2)}(df/2, 1/2).

    Reference: Student (1908); Numerical Recipes §6.14.
    """
    x = df / (df + t * t)
    return _betai(df / 2.0, 0.5, x)

two_sample_ttest

two_sample_ttest(
    a: list[float], b: list[float], equal_var: bool = True
) -> TestResult

Two-sample t-test for equality of means.

With equal_var=True uses the pooled-variance (Student) t-test; with equal_var=False uses Welch's t-test with the Welch-Satterthwaite degrees of freedom.

References
  • Student [Gosset] (1908), Biometrika 6(1), 1-25.
  • Welch, B. L. (1947). "The generalization of 'Student's' problem when several different population variances are involved." Biometrika, 34(1-2), 28-35.

Parameters:

Name Type Description Default
a list[float]

first sample (n >= 2)

required
b list[float]

second sample (n >= 2)

required
equal_var bool

pooled-variance test if True, Welch's test otherwise

True

Returns:

Type Description
TestResult

TestResult with t statistic, degrees of freedom, two-tailed p-value

Source code in src\cds\stats\hypothesis_tests.py
def two_sample_ttest(
    a: list[float],
    b: list[float],
    equal_var: bool = True,
) -> TestResult:
    """Two-sample t-test for equality of means.

    With ``equal_var=True`` uses the pooled-variance (Student) t-test; with
    ``equal_var=False`` uses Welch's t-test with the Welch-Satterthwaite
    degrees of freedom.

    References:
        - Student [Gosset] (1908), Biometrika 6(1), 1-25.
        - Welch, B. L. (1947). "The generalization of 'Student's' problem
          when several different population variances are involved."
          Biometrika, 34(1-2), 28-35.

    Args:
        a: first sample (n >= 2)
        b: second sample (n >= 2)
        equal_var: pooled-variance test if True, Welch's test otherwise

    Returns:
        TestResult with t statistic, degrees of freedom, two-tailed p-value
    """
    na, nb = len(a), len(b)
    if na < 2 or nb < 2:
        raise ValueError("each sample needs at least 2 observations")
    va, vb = variance(a, ddof=1), variance(b, ddof=1)
    diff = mean(a) - mean(b)
    if equal_var:
        df = na + nb - 2
        sp2 = ((na - 1) * va + (nb - 1) * vb) / df
        se = math.sqrt(sp2 * (1.0 / na + 1.0 / nb))
        df_eff = float(df)
    else:
        se = math.sqrt(va / na + vb / nb)
        num = (va / na + vb / nb) ** 2
        den = (va / na) ** 2 / (na - 1) + (vb / nb) ** 2 / (nb - 1)
        df_eff = num / den
    if se == 0.0:
        raise ValueError("zero variance; t-test undefined")
    t = diff / se
    return TestResult(statistic=t, df=df_eff, p_value=t_sf(t, df_eff))

linear_regression

linear_regression(
    x: list[float], y: list[float]
) -> RegressionResult

Fit y = slope*x + intercept by ordinary least squares.

Returns:

Type Description
RegressionResult

RegressionResult with slope, intercept, and R^2.

Raises:

Type Description
ValueError

if x and y have different lengths, fewer than 2 points, or all x values are identical (zero variance).

Source code in src\cds\stats\regression.py
def linear_regression(x: list[float], y: list[float]) -> RegressionResult:
    """Fit y = slope*x + intercept by ordinary least squares.

    Returns:
        RegressionResult with slope, intercept, and R^2.

    Raises:
        ValueError: if `x` and `y` have different lengths, fewer than 2 points,
            or all x values are identical (zero variance).
    """
    if len(x) != len(y) or len(x) < 2:
        raise ValueError("need matching lists with at least 2 points")
    mx = mean(x)
    my = mean(y)

    num = sum((xi - mx) * (yi - my) for xi, yi in zip(x, y))
    den = sum((xi - mx) ** 2 for xi in x)
    if den == 0:
        raise ValueError("all x values are identical")

    slope = num / den
    intercept = my - slope * mx

    # r-squared
    ss_res = sum((yi - (slope * xi + intercept)) ** 2 for xi, yi in zip(x, y))
    ss_tot = sum((yi - my) ** 2 for yi in y)
    r_sq = 1 - ss_res / ss_tot if ss_tot > 0 else 0.0

    return RegressionResult(slope=slope, intercept=intercept, r_squared=r_sq)

Probability

Continuous PDFs (Gaussian, uniform, exponential) and discrete PMFs (binomial, Poisson) with reproducible sampling.

cds.probability

Probability distributions and sampling.

Functions:

binomial_pmf

binomial_pmf(k: int, n: int, p: float) -> float

Binomial distribution probability mass function.

P(X=k) = C(n,k) * p^k * (1-p)^(n-k)

Parameters:

Name Type Description Default
k int

number of successes

required
n int

number of trials

required
p float

probability of success per trial

required

Raises:

Type Description
ValueError

if parameters are invalid

Source code in src\cds\probability\distributions.py
def binomial_pmf(k: int, n: int, p: float) -> float:
    """Binomial distribution probability mass function.

    P(X=k) = C(n,k) * p^k * (1-p)^(n-k)

    Args:
        k: number of successes
        n: number of trials
        p: probability of success per trial

    Raises:
        ValueError: if parameters are invalid
    """
    if not (0 <= p <= 1):
        raise ValueError("p must be in [0, 1]")
    if k < 0 or k > n:
        return 0.0
    coeff = math.comb(n, k)
    return coeff * (p**k) * ((1 - p) ** (n - k))

exponential_pdf

exponential_pdf(x: float, lam: float = 1.0) -> float

Exponential distribution PDF.

Parameters:

Name Type Description Default
x float

point to evaluate (must be >= 0)

required
lam float

rate parameter (lambda)

1.0

Raises:

Type Description
ValueError

if lam <= 0

Source code in src\cds\probability\distributions.py
def exponential_pdf(x: float, lam: float = 1.0) -> float:
    """Exponential distribution PDF.

    Args:
        x: point to evaluate (must be >= 0)
        lam: rate parameter (lambda)

    Raises:
        ValueError: if lam <= 0
    """
    if lam <= 0:
        raise ValueError("lambda must be positive")
    if x < 0:
        return 0.0
    return lam * math.exp(-lam * x)

gaussian_pdf

gaussian_pdf(
    x: float, mu: float = 0.0, sigma: float = 1.0
) -> float

Gaussian (normal) probability density function.

Parameters:

Name Type Description Default
x float

point to evaluate

required
mu float

mean

0.0
sigma float

standard deviation

1.0

Raises:

Type Description
ValueError

if sigma <= 0

Source code in src\cds\probability\distributions.py
def gaussian_pdf(x: float, mu: float = 0.0, sigma: float = 1.0) -> float:
    """Gaussian (normal) probability density function.

    Args:
        x: point to evaluate
        mu: mean
        sigma: standard deviation

    Raises:
        ValueError: if sigma <= 0
    """
    if sigma <= 0:
        raise ValueError("sigma must be positive")
    coeff = 1 / (sigma * math.sqrt(2 * math.pi))
    exponent = -0.5 * ((x - mu) / sigma) ** 2
    return coeff * math.exp(exponent)

poisson_pmf

poisson_pmf(k: int, lam: float) -> float

Poisson distribution probability mass function.

P(X=k) = (lambda^k * e^-lambda) / k!

Parameters:

Name Type Description Default
k int

number of events

required
lam float

expected rate (lambda)

required

Raises:

Type Description
ValueError

if lam < 0 or k < 0

Source code in src\cds\probability\distributions.py
def poisson_pmf(k: int, lam: float) -> float:
    """Poisson distribution probability mass function.

    P(X=k) = (lambda^k * e^-lambda) / k!

    Args:
        k: number of events
        lam: expected rate (lambda)

    Raises:
        ValueError: if lam < 0 or k < 0
    """
    if lam < 0:
        raise ValueError("lambda must be non-negative")
    if k < 0:
        return 0.0
    return (lam**k) * math.exp(-lam) / math.factorial(k)

uniform_pdf

uniform_pdf(
    x: float, a: float = 0.0, b: float = 1.0
) -> float

Uniform distribution PDF on [a, b].

Parameters:

Name Type Description Default
x float

point to evaluate

required
a float

lower bound

0.0
b float

upper bound

1.0

Raises:

Type Description
ValueError

if a >= b

Source code in src\cds\probability\distributions.py
def uniform_pdf(
    x: float,
    a: float = 0.0,
    b: float = 1.0,
) -> float:
    """Uniform distribution PDF on [a, b].

    Args:
        x: point to evaluate
        a: lower bound
        b: upper bound

    Raises:
        ValueError: if a >= b
    """
    if a >= b:
        raise ValueError("a must be less than b")
    if a <= x <= b:
        return 1 / (b - a)
    return 0.0

uniform_sample

uniform_sample(
    a: float, b: float, n: int, seed: int | None = None
) -> list[float]

Generate n uniform random samples from [a, b].

Parameters:

Name Type Description Default
a float

lower bound

required
b float

upper bound

required
n int

number of samples

required
seed int | None

optional random seed

None
Source code in src\cds\probability\distributions.py
def uniform_sample(
    a: float,
    b: float,
    n: int,
    seed: int | None = None,
) -> list[float]:
    """Generate n uniform random samples from [a, b].

    Args:
        a: lower bound
        b: upper bound
        n: number of samples
        seed: optional random seed
    """
    rng = random.Random(seed)
    return [rng.uniform(a, b) for _ in range(n)]

Mathematical Utilities

Calculus (derivative, integral, gradient) and a compact linear-algebra toolkit (PLU, QR, Cholesky, eigenvalues via power iteration).

cds.math_utils

Math helper functions.

Functions:

derivative

derivative(
    f: Callable[[float], float],
    x: float,
    h_base: float = 1e-07,
) -> float

Central difference approximation with adaptive step size.

Source code in src\cds\math_utils\calculus.py
def derivative(f: Callable[[float], float], x: float, h_base: float = 1e-7) -> float:
    """Central difference approximation with adaptive step size."""
    h = h_base * max(1.0, abs(x))
    return (f(x + h) - f(x - h)) / (2 * h)

gradient

gradient(
    f: Callable[..., float],
    point: list[float],
    h_base: float = 1e-07,
) -> list[float]

Numerical gradient for multivariable functions with adaptive scaling.

Source code in src\cds\math_utils\calculus.py
def gradient(f: Callable[..., float], point: list[float], h_base: float = 1e-7) -> list[float]:
    """Numerical gradient for multivariable functions with adaptive scaling."""
    grad = []
    for i in range(len(point)):
        # Scale step size h relative to point magnitude to maintain precision
        h = h_base * max(1.0, abs(point[i]))

        def partial(val: float, idx: int = i) -> float:
            """Return f evaluated with coordinate `idx` set to `val`."""
            p = point.copy()
            p[idx] = val
            return f(*p)

        grad.append((partial(point[i] + h) - partial(point[i] - h)) / (2 * h))
    return grad

integral

integral(
    f: Callable[[float], float],
    a: float,
    b: float,
    n: int = 1000,
) -> float

Simpson's rule for numerical integration.

Source code in src\cds\math_utils\calculus.py
def integral(f: Callable[[float], float], a: float, b: float, n: int = 1000) -> float:
    """Simpson's rule for numerical integration."""
    if n % 2 != 0:
        n += 1
    h = (b - a) / n
    s = f(a) + f(b)
    for i in range(1, n):
        coeff = 4 if i % 2 != 0 else 2
        s += coeff * f(a + i * h)
    return s * h / 3

cholesky

cholesky(m: Matrix) -> Matrix

Cholesky decomposition of a symmetric positive-definite matrix.

Returns the lower-triangular L such that A = L L^T. Roughly twice as efficient as LU for SPD systems and numerically stable.

Reference

Benoît, C. (1924). "Note sur une méthode de résolution des équations normales... (Procédé du Commandant Cholesky)." Bulletin Géodésique, 2, 67-77. See also Golub & Van Loan, §4.2.

Parameters:

Name Type Description Default
m Matrix

symmetric positive-definite matrix

required

Returns:

Type Description
Matrix

lower-triangular matrix L with A = L L^T

Raises:

Type Description
ValueError

if the matrix is not positive definite

Source code in src\cds\math_utils\linalg.py
def cholesky(m: Matrix) -> Matrix:
    """Cholesky decomposition of a symmetric positive-definite matrix.

    Returns the lower-triangular L such that A = L L^T. Roughly twice as
    efficient as LU for SPD systems and numerically stable.

    Reference:
        Benoît, C. (1924). "Note sur une méthode de résolution des équations
        normales... (Procédé du Commandant Cholesky)." Bulletin Géodésique,
        2, 67-77. See also Golub & Van Loan, §4.2.

    Args:
        m: symmetric positive-definite matrix

    Returns:
        lower-triangular matrix L with A = L L^T

    Raises:
        ValueError: if the matrix is not positive definite
    """
    n = len(m)
    L = [[0.0] * n for _ in range(n)]
    for i in range(n):
        for j in range(i + 1):
            s = sum(L[i][k] * L[j][k] for k in range(j))
            if i == j:
                diag = m[i][i] - s
                if diag <= 0.0:
                    raise ValueError(
                        "matrix is not positive definite — Cholesky decomposition requires symmetric positive definite input; check that the matrix is symmetric and all eigenvalues > 0"
                    )
                L[i][j] = math.sqrt(diag)
            else:
                L[i][j] = (m[i][j] - s) / L[j][j]
    return L

determinant

determinant(m: Matrix) -> float

Compute matrix determinant using PLU decomposition (O(N^3)).

Avoids the O(N!) complexity of minor expansion.

Source code in src\cds\math_utils\linalg.py
def determinant(m: Matrix) -> float:
    """Compute matrix determinant using PLU decomposition (O(N^3)).

    Avoids the O(N!) complexity of minor expansion.
    """
    n = len(m)
    if n == 0:
        return 1.0
    if n == 1:
        return m[0][0]

    try:
        P, L, U = lu_decomposition(m)
    except ValueError:
        # If matrix is singular, determinant is 0
        return 0.0

    # Determinant of LU is product of diag(U)
    # (diag(L) is all 1s).
    det = 1.0
    for i in range(n):
        det *= U[i][i]

    # Determinant of P is (-1)^s where s is number of row swaps.
    # We compute it using cycle decomposition: s = n - number_of_cycles.
    num_cycles = 0
    p_indices = [row.index(1.0) for row in P]
    visited = [False] * n
    for i in range(n):
        if not visited[i]:
            num_cycles += 1
            curr = i
            while not visited[curr]:
                visited[curr] = True
                curr = p_indices[curr]

    return float(det * ((-1) ** (n - num_cycles)))

dot

dot(a: Vector, b: Vector) -> float

Inner product of two equal-length vectors.

Raises:

Type Description
ValueError

if a and b have different lengths.

Source code in src\cds\math_utils\linalg.py
def dot(a: Vector, b: Vector) -> float:
    """Inner product of two equal-length vectors.

    Raises:
        ValueError: if `a` and `b` have different lengths.
    """
    if len(a) != len(b):
        raise ValueError(f"vectors a and b must have the same length (got {len(a)} and {len(b)})")
    return sum(x * y for x, y in zip(a, b))

gram_schmidt

gram_schmidt(vectors: list[Vector]) -> list[Vector]

Gram-Schmidt orthonormalization.

Produces an orthonormal set from the input vectors. [Trefethen & Bau, Lecture 8]

Parameters:

Name Type Description Default
vectors list[Vector]

list of linearly independent vectors

required

Returns:

Type Description
list[Vector]

orthonormal basis vectors

Source code in src\cds\math_utils\linalg.py
def gram_schmidt(vectors: list[Vector]) -> list[Vector]:
    """Gram-Schmidt orthonormalization.

    Produces an orthonormal set from the input vectors.
    [Trefethen & Bau, Lecture 8]

    Args:
        vectors: list of linearly independent vectors

    Returns:
        orthonormal basis vectors
    """
    ortho: list[Vector] = []
    for v in vectors:
        u = v[:]
        for q in ortho:
            proj = sum(u[i] * q[i] for i in range(len(u)))
            u = [u[i] - proj * q[i] for i in range(len(u))]
        norm = math.sqrt(sum(x * x for x in u))
        if norm < NEAR_ZERO:
            continue
        ortho.append([x / norm for x in u])
    return ortho

identity

identity(n: int) -> Matrix

Create n×n identity matrix.

Source code in src\cds\math_utils\linalg.py
def identity(n: int) -> Matrix:
    """Create n×n identity matrix."""
    return [[1.0 if i == j else 0.0 for j in range(n)] for i in range(n)]

lu_decomposition

lu_decomposition(
    m: Matrix,
) -> tuple[Matrix, Matrix, Matrix]

LU decomposition with partial pivoting (PA = LU).

A = P_inv * L * U where P_inv is a permutation matrix, L is lower triangular (ones on diagonal) and U is upper triangular.

Returns:

Type Description
tuple[Matrix, Matrix, Matrix]

P, L, U matrices.

Raises:

Type Description
ValueError

if matrix is singular

Source code in src\cds\math_utils\linalg.py
def lu_decomposition(m: Matrix) -> tuple[Matrix, Matrix, Matrix]:
    """LU decomposition with partial pivoting (PA = LU).

    A = P_inv * L * U where P_inv is a permutation matrix,
    L is lower triangular (ones on diagonal) and U is upper triangular.

    Returns:
        P, L, U matrices.

    Raises:
        ValueError: if matrix is singular
    """
    n = len(m)
    P = identity(n)
    L = [[0.0] * n for _ in range(n)]
    U = [row[:] for row in m]

    for k in range(n):
        # Partial pivoting
        pivot_idx = k
        max_val = abs(U[k][k])
        for i in range(k + 1, n):
            if abs(U[i][k]) > max_val:
                max_val = abs(U[i][k])
                pivot_idx = i

        if max_val < NEAR_ZERO:
            raise ValueError(
                f"zero pivot at column {k} — the input matrix is singular or nearly singular; try regularizing or checking your data"
            )

        if pivot_idx != k:
            U[k], U[pivot_idx] = U[pivot_idx], U[k]
            P[k], P[pivot_idx] = P[pivot_idx], P[k]
            L[k], L[pivot_idx] = L[pivot_idx], L[k]

        L[k][k] = 1.0
        for i in range(k + 1, n):
            factor = U[i][k] / U[k][k]
            L[i][k] = factor
            for j in range(k, n):
                U[i][j] -= factor * U[k][j]

    return P, L, U

mat_mul

mat_mul(a: Matrix, b: Matrix) -> Matrix

Matrix multiplication A * B.

Pre-transposes B so that columns are read as contiguous rows, which keeps memory access row-major and lets the inner loops run over Python's C-implemented zip/sum rather than indexed lookups.

Source code in src\cds\math_utils\linalg.py
def mat_mul(a: Matrix, b: Matrix) -> Matrix:
    """Matrix multiplication A * B.

    Pre-transposes B so that columns are read as contiguous rows, which
    keeps memory access row-major and lets the inner loops run over
    Python's C-implemented ``zip``/``sum`` rather than indexed lookups.
    """
    rows_a, cols_a = len(a), len(a[0])
    rows_b, cols_b = len(b), len(b[0])
    if cols_a != rows_b:
        raise ValueError(f"incompatible shapes: {rows_a}x{cols_a} and {rows_b}x{cols_b}")

    # Transpose B once up front: each output column becomes a row we can
    # iterate cheaply, instead of striding through B column-by-column.
    b_T = list(zip(*b))

    return [[sum(ai * bi for ai, bi in zip(row_a, col_b)) for col_b in b_T] for row_a in a]

matrix_inverse

matrix_inverse(m: Matrix) -> Matrix

Compute matrix inverse via PLU decomposition.

Reuses a single P, L, U factorization and solves A * x_i = e_i for each column of the identity matrix to build the inverse.

Raises:

Type Description
ValueError

if matrix is singular

Source code in src\cds\math_utils\linalg.py
def matrix_inverse(m: Matrix) -> Matrix:
    """Compute matrix inverse via PLU decomposition.

    Reuses a single P, L, U factorization and solves A * x_i = e_i
    for each column of the identity matrix to build the inverse.

    Raises:
        ValueError: if matrix is singular
    """
    n = len(m)
    P, L, U = lu_decomposition(m)
    inv = [[0.0] * n for _ in range(n)]

    for col in range(n):
        # e is the standard basis vector
        b = [0.0] * n
        b[col] = 1.0

        # Apply permutation: Pb
        pb = [sum(P[i][j] * b[j] for j in range(n)) for i in range(n)]

        # forward: Ly = Pb
        y = [0.0] * n
        for i in range(n):
            y[i] = pb[i] - sum(L[i][j] * y[j] for j in range(i))

        # backward: Ux = y
        x = [0.0] * n
        for i in range(n - 1, -1, -1):
            if abs(U[i][i]) < NEAR_ZERO:
                raise ValueError(
                    f"singular matrix — LU backward substitution failed at row {i} (during inverse computation); matrix has no unique inverse"
                )
            x[i] = (y[i] - sum(U[i][j] * x[j] for j in range(i + 1, n))) / U[i][i]

        for row in range(n):
            inv[row][col] = x[row]

    return inv

power_iteration

power_iteration(
    m: Matrix,
    max_iter: int = 1000,
    tol: float = NEWTON_TOLERANCE,
) -> tuple[float, Vector]

Find dominant eigenvalue and eigenvector using power iteration.

Von Mises iteration (1929). Optimized with scaling to prevent overflow.

Parameters:

Name Type Description Default
m Matrix

square matrix

required
max_iter int

iteration limit

1000
tol float

convergence tolerance

NEWTON_TOLERANCE

Returns:

Type Description
tuple[float, Vector]

(eigenvalue, eigenvector) tuple

Source code in src\cds\math_utils\linalg.py
def power_iteration(
    m: Matrix,
    max_iter: int = 1000,
    tol: float = NEWTON_TOLERANCE,
) -> tuple[float, Vector]:
    """Find dominant eigenvalue and eigenvector using power iteration.

    Von Mises iteration (1929). Optimized with scaling to prevent overflow.

    Args:
        m: square matrix
        max_iter: iteration limit
        tol: convergence tolerance

    Returns:
        (eigenvalue, eigenvector) tuple
    """
    n = len(m)
    v = [1.0] * n

    # Initial scaling
    max_val = max(abs(x) for x in v)
    v = [x / max_val for x in v]

    eigenvalue = 0.0
    for _ in range(max_iter):
        # w = A * v
        w = [sum(m[i][j] * v[j] for j in range(n)) for i in range(n)]

        # Scaling to prevent overflow in large systems
        # norm = sqrt(sum(w_i^2)). CPython floats overflow to inf rather
        # than raising OverflowError, so we detect both cases and fall back
        # to absolute-max scaling, which is safe for any magnitude.
        squared_sum = sum(x * x for x in w)
        if math.isinf(squared_sum):
            norm = max(abs(x) for x in w)
        else:
            try:
                norm = math.sqrt(squared_sum)
            except OverflowError:  # pragma: no cover - defensive for non-CPython libm
                # Defensive: still raised on some platforms for subnormal inputs
                norm = max(abs(x) for x in w)

        if norm < NEAR_ZERO:
            break

        v_new = [x / norm for x in w]

        # Rayleigh quotient: (v^T * A * v) / (v^T * v)
        # Accurate for any normalization (L2 or L-inf)
        numerator = sum(v_new[i] * sum(m[i][j] * v_new[j] for j in range(n)) for i in range(n))
        denominator = sum(vi * vi for vi in v_new)
        new_eigenvalue = numerator / denominator if denominator > NEAR_ZERO else 0.0

        if abs(new_eigenvalue - eigenvalue) < tol:
            return new_eigenvalue, v_new
        eigenvalue = new_eigenvalue
        v = v_new

    return eigenvalue, v

qr_decomposition

qr_decomposition(m: Matrix) -> tuple[Matrix, Matrix]

QR decomposition via Householder reflections.

Factorizes A (n×n) into an orthogonal matrix Q and upper-triangular R such that A = Q R. Householder triangularization is backward stable and preferred over classical Gram-Schmidt for numerical work.

Reference

Householder, A. S. (1958). "Unitary triangularization of a nonsymmetric matrix." Journal of the ACM, 5(4), 339-342. See also Golub & Van Loan, §5.2; Trefethen & Bau, Lecture 10.

Parameters:

Name Type Description Default
m Matrix

square matrix

required

Returns:

Type Description
tuple[Matrix, Matrix]

(Q, R) with Q orthogonal and R upper triangular

Source code in src\cds\math_utils\linalg.py
def qr_decomposition(m: Matrix) -> tuple[Matrix, Matrix]:
    """QR decomposition via Householder reflections.

    Factorizes A (n×n) into an orthogonal matrix Q and upper-triangular R
    such that A = Q R. Householder triangularization is backward stable and
    preferred over classical Gram-Schmidt for numerical work.

    Reference:
        Householder, A. S. (1958). "Unitary triangularization of a
        nonsymmetric matrix." Journal of the ACM, 5(4), 339-342.
        See also Golub & Van Loan, §5.2; Trefethen & Bau, Lecture 10.

    Args:
        m: square matrix

    Returns:
        (Q, R) with Q orthogonal and R upper triangular
    """
    n = len(m)
    R = [row[:] for row in m]
    Q = identity(n)

    for k in range(n - 1):
        # column vector x = R[k:, k]
        x = [R[i][k] for i in range(k, n)]
        norm_x = math.sqrt(sum(xi * xi for xi in x))
        if norm_x < NEAR_ZERO:
            continue
        # Householder vector v
        alpha = -norm_x if x[0] >= 0 else norm_x
        v = x[:]
        v[0] -= alpha
        norm_v = math.sqrt(sum(vi * vi for vi in v))
        if norm_v < NEAR_ZERO:  # pragma: no cover - unreachable: norm_x>0 implies norm_v>0
            continue
        v = [vi / norm_v for vi in v]

        # apply H = I - 2 v v^T to R (rows k..n-1)
        for j in range(n):
            dot_vr = sum(v[i] * R[k + i][j] for i in range(n - k))
            for i in range(n - k):
                R[k + i][j] -= 2.0 * v[i] * dot_vr

        # accumulate Q = Q H (columns k..n-1)
        for i in range(n):
            dot_qv = sum(Q[i][k + j] * v[j] for j in range(n - k))
            for j in range(n - k):
                Q[i][k + j] -= 2.0 * dot_qv * v[j]

    return Q, R

solve_linear

solve_linear(A: Matrix, b: Vector) -> Vector

Solve Ax = b using PLU decomposition.

Solves LUx = Pb.

Raises:

Type Description
ValueError

if matrix is singular

Source code in src\cds\math_utils\linalg.py
def solve_linear(A: Matrix, b: Vector) -> Vector:
    """Solve Ax = b using PLU decomposition.

    Solves LUx = Pb.

    Raises:
        ValueError: if matrix is singular
    """
    n = len(A)
    P, L, U = lu_decomposition(A)

    # Apply permutation: Pb
    pb = [sum(P[i][j] * b[j] for j in range(n)) for i in range(n)]

    # forward: Ly = Pb
    y = [0.0] * n
    for i in range(n):
        y[i] = pb[i] - sum(L[i][j] * y[j] for j in range(i))

    # backward: Ux = y
    x = [0.0] * n
    for i in range(n - 1, -1, -1):
        if abs(U[i][i]) < NEAR_ZERO:
            raise ValueError(
                f"singular matrix — LU backward substitution failed at row {i}; matrix has no unique inverse"
            )
        x[i] = (y[i] - sum(U[i][j] * x[j] for j in range(i + 1, n))) / U[i][i]

    return x

transpose

transpose(m: Matrix) -> Matrix

Return the transpose of a 2-D matrix (rows <-> columns).

Source code in src\cds\math_utils\linalg.py
def transpose(m: Matrix) -> Matrix:
    """Return the transpose of a 2-D matrix (rows <-> columns)."""
    if not m:
        return []
    return [[m[i][j] for i in range(len(m))] for j in range(len(m[0]))]

Numerical Integration

Deterministic quadrature rules: trapezoid, Simpson 1/3 and 3/8, Gauss–Legendre, Romberg, and adaptive Simpson.

cds.numerical_integration

Deterministic numerical quadrature — Newton-Cotes, Romberg, Gauss-Legendre.

Complements :mod:cds.montecarlo (stochastic integration) and :mod:cds.diffeq (ODE integration) with classical deterministic integration rules.

Classes

QuadratureResult dataclass

Result of an adaptive numerical integration.

Attributes:

Name Type Description
value float

computed approximation of the integral

method str

name of the quadrature rule used

n_eval int

number of integrand evaluations performed

error_estimate float

internal estimate of the truncation error (nan if unavailable for the chosen rule)

Source code in src\cds\numerical_integration\quadrature.py
@dataclass
class QuadratureResult:
    """Result of an adaptive numerical integration.

    Attributes:
        value: computed approximation of the integral
        method: name of the quadrature rule used
        n_eval: number of integrand evaluations performed
        error_estimate: internal estimate of the truncation error (``nan`` if
            unavailable for the chosen rule)
    """

    value: float
    method: str
    n_eval: int
    error_estimate: float

Functions:

adaptive_simpson

adaptive_simpson(
    f: Callable[[float], float],
    a: float,
    b: float,
    tol: float = 1e-10,
    max_depth: int = 50,
) -> QuadratureResult

Adaptive recursive Simpson quadrature.

Recursively bisects subintervals where the local error estimate (the difference between Simpson over the whole interval and over its halves) exceeds tol, concentrating work where the integrand is hard. [Lyness 1969]

Parameters:

Name Type Description Default
f Callable[[float], float]

integrand

required
a float

lower limit

required
b float

upper limit

required
tol float

desired absolute tolerance

1e-10
max_depth int

maximum recursion depth to bound cost on hard integrands

50

Returns:

Type Description
QuadratureResult

class:QuadratureResult carrying the number of integrand evaluations.

Raises:

Type Description
ValueError

if max_depth < 1.

RuntimeError

if max_depth is exhausted before convergence.

Source code in src\cds\numerical_integration\quadrature.py
def adaptive_simpson(
    f: Callable[[float], float],
    a: float,
    b: float,
    tol: float = 1e-10,
    max_depth: int = 50,
) -> QuadratureResult:
    """Adaptive recursive Simpson quadrature.

    Recursively bisects subintervals where the local error estimate (the
    difference between Simpson over the whole interval and over its halves)
    exceeds ``tol``, concentrating work where the integrand is hard. [Lyness 1969]

    Args:
        f: integrand
        a: lower limit
        b: upper limit
        tol: desired absolute tolerance
        max_depth: maximum recursion depth to bound cost on hard integrands

    Returns:
        :class:`QuadratureResult` carrying the number of integrand evaluations.

    Raises:
        ValueError: if ``max_depth < 1``.
        RuntimeError: if ``max_depth`` is exhausted before convergence.
    """

    def _simpson(fa: float, fm: float, fb: float, a: float, b: float) -> float:
        return (b - a) / 6.0 * (fa + 4.0 * fm + fb)

    if max_depth < 1:
        raise ValueError("max_depth must be >= 1")

    counter = {"n": 0}

    def _eval(x: float) -> float:
        counter["n"] += 1
        return f(x)

    def _recurse(
        a: float, b: float, fa: float, fb: float, fm: float, whole: float, depth: int, eps: float
    ) -> float:
        m = 0.5 * (a + b)
        lm = 0.5 * (a + m)
        rm = 0.5 * (m + b)
        flm = _eval(lm)
        frm = _eval(rm)
        left = _simpson(fa, flm, fm, a, m)
        right = _simpson(fm, frm, fb, m, b)
        diff = left + right - whole
        # A NaN diff means the integrand produced NaN on this subinterval
        # (e.g. a divergent/undefined integrand). Stop recursing immediately so
        # the NaN propagates to the top-level guard instead of branching until
        # max_depth is exhausted (2**max_depth calls -> hang).
        if math.isnan(diff):
            return left + right + diff / 15.0
        # Standard Lyness error estimate (scaled by 1/15).
        if depth <= 0 or abs(diff) <= 15.0 * eps:
            return left + right + diff / 15.0
        return _recurse(a, m, fa, fm, flm, left, depth - 1, 0.5 * eps) + _recurse(
            m, b, fm, fb, frm, right, depth - 1, 0.5 * eps
        )

    fa = _eval(a)
    fb = _eval(b)
    fm = _eval(0.5 * (a + b))
    whole = _simpson(fa, fm, fb, a, b)

    value = _recurse(a, b, fa, fb, fm, whole, max_depth, tol)
    if math.isnan(value):
        raise RuntimeError("adaptive_simpson produced NaN (likely divergent integrand)")

    return QuadratureResult(
        value=value,
        method="adaptive_simpson",
        n_eval=counter["n"],
        error_estimate=math.nan,
    )

gaussian_quadrature

gaussian_quadrature(
    f: Callable[[float], float],
    a: float,
    b: float,
    n: int = 5,
) -> float

Gauss-Legendre quadrature with n nodes.

Exact for polynomials of degree up to 2n - 1. The [-1, 1] rule is affinely mapped onto [a, b]. [Gauss 1814]

Parameters:

Name Type Description Default
f Callable[[float], float]

integrand

required
a float

lower limit

required
b float

upper limit (may be less than a)

required
n int

number of Gauss-Legendre nodes (>= 1)

5

Returns:

Type Description
float

Approximation of the integral.

Raises:

Type Description
ValueError

if n < 1.

Source code in src\cds\numerical_integration\quadrature.py
def gaussian_quadrature(
    f: Callable[[float], float],
    a: float,
    b: float,
    n: int = 5,
) -> float:
    """Gauss-Legendre quadrature with ``n`` nodes.

    Exact for polynomials of degree up to ``2n - 1``. The ``[-1, 1]`` rule is
    affinely mapped onto ``[a, b]``. [Gauss 1814]

    Args:
        f: integrand
        a: lower limit
        b: upper limit (may be less than ``a``)
        n: number of Gauss-Legendre nodes (``>= 1``)

    Returns:
        Approximation of the integral.

    Raises:
        ValueError: if ``n < 1``.
    """
    if n < 1:
        raise ValueError("n must be >= 1")
    half = 0.5 * (b - a)
    mid = 0.5 * (a + b)
    total = 0.0
    for node, weight in _gauss_legendre_nodes(n):
        total += weight * f(half * node + mid)
    return half * total

romberg

romberg(
    f: Callable[[float], float],
    a: float,
    b: float,
    tol: float = 1e-10,
    max_iter: int = 20,
) -> QuadratureResult

Romberg integration via Richardson extrapolation on the trapezoidal rule.

Builds a triangular table where column k is O(h^{2k}) accurate. Halting is driven by the relative/absolute change in the extrapolated diagonal. [Romberg 1955]

Parameters:

Name Type Description Default
f Callable[[float], float]

integrand

required
a float

lower limit

required
b float

upper limit

required
tol float

convergence tolerance on successive diagonal estimates

1e-10
max_iter int

maximum number of extrapolation levels (each adds one row)

20

Returns:

Type Description
QuadratureResult

class:QuadratureResult with an internal error estimate.

Raises:

Type Description
ValueError

if max_iter < 1.

Source code in src\cds\numerical_integration\quadrature.py
def romberg(
    f: Callable[[float], float],
    a: float,
    b: float,
    tol: float = 1e-10,
    max_iter: int = 20,
) -> QuadratureResult:
    """Romberg integration via Richardson extrapolation on the trapezoidal rule.

    Builds a triangular table where column ``k`` is ``O(h^{2k})`` accurate.
    Halting is driven by the relative/absolute change in the extrapolated
    diagonal. [Romberg 1955]

    Args:
        f: integrand
        a: lower limit
        b: upper limit
        tol: convergence tolerance on successive diagonal estimates
        max_iter: maximum number of extrapolation levels (each adds one row)

    Returns:
        :class:`QuadratureResult` with an internal error estimate.

    Raises:
        ValueError: if ``max_iter < 1``.
    """
    if max_iter < 1:
        raise ValueError("max_iter must be >= 1")

    width = b - a
    # Total integrand evaluations across all levels: 1 + 1 + 2 + 4 + ... + 2^(m-1)
    n_eval = 1

    # R[0][0]: single trapezoid
    r: list[list[float]] = [[0.5 * width * (f(a) + f(b))]]
    best = r[0][0]
    error_est = math.inf

    for k in range(1, max_iter):
        # Trapezoid with 2^k panels reusing the 2^(k-1) level
        panels = 1 << (k - 1)
        h = width / (1 << k)
        total = 0.0
        for i in range(1, panels + 1):
            total += f(a + (2 * i - 1) * h)
        n_eval += panels
        t_k = 0.5 * r[k - 1][0] + h * total

        row = [t_k]
        for j in range(1, k + 1):
            # Richardson extrapolation factor 4^j / (4^j - 1)
            factor = 1 << (2 * j)  # 4^j
            row.append((factor * row[j - 1] - r[k - 1][j - 1]) / (factor - 1))
        r.append(row)

        error_est = abs(row[k] - best)
        best = row[k]
        if error_est <= tol * max(1.0, abs(best)):
            break

    return QuadratureResult(
        value=best,
        method="romberg",
        n_eval=n_eval,
        error_estimate=error_est,
    )

simpson

simpson(
    f: Callable[[float], float],
    a: float,
    b: float,
    n: int = 1000,
) -> float

Composite Simpson's 1/3 rule.

Closed Newton-Cotes of order 2; error O(h⁴). Requires an even number of panels so that every group of two panels spans one parabola. [Simpson 1743]

Parameters:

Name Type Description Default
f Callable[[float], float]

integrand

required
a float

lower limit

required
b float

upper limit (may be less than a)

required
n int

number of panels (must be even and >= 2)

1000

Returns:

Type Description
float

Approximation of the integral.

Raises:

Type Description
ValueError

if n is not an even number >= 2.

Source code in src\cds\numerical_integration\quadrature.py
def simpson(
    f: Callable[[float], float],
    a: float,
    b: float,
    n: int = 1000,
) -> float:
    """Composite Simpson's 1/3 rule.

    Closed Newton-Cotes of order 2; error ``O(h⁴)``. Requires an even number of
    panels so that every group of two panels spans one parabola. [Simpson 1743]

    Args:
        f: integrand
        a: lower limit
        b: upper limit (may be less than ``a``)
        n: number of panels (must be even and ``>= 2``)

    Returns:
        Approximation of the integral.

    Raises:
        ValueError: if ``n`` is not an even number ``>= 2``.
    """
    if n < 2 or n % 2 != 0:
        raise ValueError("n must be an even integer >= 2")
    h = (b - a) / n
    s = f(a) + f(b)
    for i in range(1, n):
        s += (4.0 if i % 2 == 1 else 2.0) * f(a + i * h)
    return (h / 3.0) * s

simpson_38

simpson_38(
    f: Callable[[float], float],
    a: float,
    b: float,
    n: int = 999,
) -> float

Composite Simpson's 3/8 rule.

Closed Newton-Cotes of order 3 over groups of three panels; error O(h⁴). Useful as a companion to the 1/3 rule when n is a multiple of 3.

Parameters:

Name Type Description Default
f Callable[[float], float]

integrand

required
a float

lower limit

required
b float

upper limit (may be less than a)

required
n int

number of panels (must be a multiple of 3 and >= 3)

999

Returns:

Type Description
float

Approximation of the integral.

Raises:

Type Description
ValueError

if n is not a multiple of 3 >= 3.

Source code in src\cds\numerical_integration\quadrature.py
def simpson_38(
    f: Callable[[float], float],
    a: float,
    b: float,
    n: int = 999,
) -> float:
    """Composite Simpson's 3/8 rule.

    Closed Newton-Cotes of order 3 over groups of three panels; error ``O(h⁴)``.
    Useful as a companion to the 1/3 rule when ``n`` is a multiple of 3.

    Args:
        f: integrand
        a: lower limit
        b: upper limit (may be less than ``a``)
        n: number of panels (must be a multiple of 3 and ``>= 3``)

    Returns:
        Approximation of the integral.

    Raises:
        ValueError: if ``n`` is not a multiple of 3 ``>= 3``.
    """
    if n < 3 or n % 3 != 0:
        raise ValueError("n must be a multiple of 3 and >= 3")
    h = (b - a) / n
    s = f(a) + f(b)
    for i in range(1, n):
        s += (3.0 if i % 3 != 0 else 2.0) * f(a + i * h)
    return (3.0 * h / 8.0) * s

trapezoid

trapezoid(
    f: Callable[[float], float],
    a: float,
    b: float,
    n: int = 1000,
) -> float

Composite trapezoidal rule.

Approximates ∫_a^b f(x) dx with n equal panels. Closed Newton-Cotes of order 1; error O(h²) for twice-differentiable integrands. [Cotes 1722]

Parameters:

Name Type Description Default
f Callable[[float], float]

integrand

required
a float

lower limit

required
b float

upper limit (may be less than a)

required
n int

number of panels (n >= 1)

1000

Returns:

Type Description
float

Approximation of the integral.

Raises:

Type Description
ValueError

if n < 1.

Source code in src\cds\numerical_integration\quadrature.py
def trapezoid(
    f: Callable[[float], float],
    a: float,
    b: float,
    n: int = 1000,
) -> float:
    """Composite trapezoidal rule.

    Approximates ``∫_a^b f(x) dx`` with ``n`` equal panels. Closed Newton-Cotes
    of order 1; error ``O(h²)`` for twice-differentiable integrands. [Cotes 1722]

    Args:
        f: integrand
        a: lower limit
        b: upper limit (may be less than ``a``)
        n: number of panels (``n >= 1``)

    Returns:
        Approximation of the integral.

    Raises:
        ValueError: if ``n < 1``.
    """
    if n < 1:
        raise ValueError("n must be >= 1")
    h = (b - a) / n
    s = 0.5 * (f(a) + f(b))
    for i in range(1, n):
        s += f(a + i * h)
    return h * s

Differential Equations

Initial-value-problem solvers: Euler, midpoint, RK4, adaptive RK45, and a system-of-ODEs integrator.

cds.diffeq

Ordinary differential equation solvers — Euler, RK4, RK45, leapfrog.

Classes

ODESolution dataclass

Result of an ODE integration.

Source code in src\cds\diffeq\solvers.py
@dataclass
class ODESolution:
    """Result of an ODE integration."""

    t: list[float]
    y: list[float]
    method: str
    steps: int

Functions:

euler_method

euler_method(
    f: Callable[[float, float], float],
    t0: float,
    y0: float,
    t_end: float,
    dt: float = RK45_DEFAULT_DT,
) -> ODESolution

Euler's method for dy/dt = f(t, y).

First-order explicit method. Local truncation error O(dt²), global error O(dt). [Euler 1768]

Parameters:

Name Type Description Default
f Callable[[float, float], float]

right-hand side function f(t, y)

required
t0 float

initial time

required
y0 float

initial value y(t0)

required
t_end float

end time

required
dt float

time step

RK45_DEFAULT_DT
Source code in src\cds\diffeq\solvers.py
def euler_method(
    f: Callable[[float, float], float],
    t0: float,
    y0: float,
    t_end: float,
    dt: float = RK45_DEFAULT_DT,
) -> ODESolution:
    """Euler's method for dy/dt = f(t, y).

    First-order explicit method. Local truncation error O(dt²),
    global error O(dt). [Euler 1768]

    Args:
        f: right-hand side function f(t, y)
        t0: initial time
        y0: initial value y(t0)
        t_end: end time
        dt: time step
    """
    t_vals = [t0]
    y_vals = [y0]
    t, y = t0, y0
    steps = 0

    while t < t_end - LOOP_EPSILON:
        h = min(dt, t_end - t)
        y = y + h * f(t, y)
        t = t + h
        t_vals.append(t)
        y_vals.append(y)
        steps += 1

    return ODESolution(t=t_vals, y=y_vals, method="euler", steps=steps)

midpoint_method

midpoint_method(
    f: Callable[[float, float], float],
    t0: float,
    y0: float,
    t_end: float,
    dt: float = RK45_DEFAULT_DT,
) -> ODESolution

Explicit midpoint method (2nd-order Runge-Kutta).

Local truncation error O(dt³), global error O(dt²).

Parameters:

Name Type Description Default
f Callable[[float, float], float]

right-hand side function f(t, y)

required
t0 float

initial time

required
y0 float

initial value y(t0)

required
t_end float

end time

required
dt float

time step

RK45_DEFAULT_DT
Source code in src\cds\diffeq\solvers.py
def midpoint_method(
    f: Callable[[float, float], float],
    t0: float,
    y0: float,
    t_end: float,
    dt: float = RK45_DEFAULT_DT,
) -> ODESolution:
    """Explicit midpoint method (2nd-order Runge-Kutta).

    Local truncation error O(dt³), global error O(dt²).

    Args:
        f: right-hand side function f(t, y)
        t0: initial time
        y0: initial value y(t0)
        t_end: end time
        dt: time step
    """
    t_vals = [t0]
    y_vals = [y0]
    t, y = t0, y0
    steps = 0

    while t < t_end - LOOP_EPSILON:
        h = min(dt, t_end - t)
        k1 = f(t, y)
        k2 = f(t + h / 2, y + h * k1 / 2)
        y = y + h * k2
        t = t + h
        t_vals.append(t)
        y_vals.append(y)
        steps += 1

    return ODESolution(t=t_vals, y=y_vals, method="midpoint", steps=steps)

rk4

rk4(
    f: Callable[[float, float], float],
    t0: float,
    y0: float,
    t_end: float,
    dt: float = RK45_DEFAULT_DT,
) -> ODESolution

Classical 4th-order Runge-Kutta method.

Local truncation error O(dt⁵), global error O(dt⁴). [Runge 1895, Kutta 1901]

The standard Butcher tableau

0 | 1/2 | 1/2 1/2 | 0 1/2 1 | 0 0 1 ----|---------------- | 1/6 1/3 1/3 1/6

Parameters:

Name Type Description Default
f Callable[[float, float], float]

right-hand side function f(t, y)

required
t0 float

initial time

required
y0 float

initial value y(t0)

required
t_end float

end time

required
dt float

time step

RK45_DEFAULT_DT
Source code in src\cds\diffeq\solvers.py
def rk4(
    f: Callable[[float, float], float],
    t0: float,
    y0: float,
    t_end: float,
    dt: float = RK45_DEFAULT_DT,
) -> ODESolution:
    """Classical 4th-order Runge-Kutta method.

    Local truncation error O(dt⁵), global error O(dt⁴). [Runge 1895, Kutta 1901]

    The standard Butcher tableau:
        0   |
        1/2 | 1/2
        1/2 | 0   1/2
        1   | 0   0   1
        ----|----------------
            | 1/6 1/3 1/3 1/6

    Args:
        f: right-hand side function f(t, y)
        t0: initial time
        y0: initial value y(t0)
        t_end: end time
        dt: time step
    """
    t_vals = [t0]
    y_vals = [y0]
    t, y = t0, y0
    steps = 0

    while t < t_end - LOOP_EPSILON:
        h = min(dt, t_end - t)
        k1 = f(t, y)
        k2 = f(t + h / 2, y + h * k1 / 2)
        k3 = f(t + h / 2, y + h * k2 / 2)
        k4 = f(t + h, y + h * k3)
        y = y + (h / 6) * (k1 + 2 * k2 + 2 * k3 + k4)
        t = t + h
        t_vals.append(t)
        y_vals.append(y)
        steps += 1

    return ODESolution(t=t_vals, y=y_vals, method="rk4", steps=steps)

rk45

rk45(
    f: Callable[[float, float], float],
    t0: float,
    y0: float,
    t_end: float,
    dt: float = RK45_DEFAULT_DT,
    atol: float = RK45_DEFAULT_ATOL,
    rtol: float = RK45_DEFAULT_RTOL,
) -> ODESolution

Dormand-Prince (RK45) adaptive step-size method.

Computes 4th and 5th order estimates to approximate local error and adjust the step size automatically. [Dormand & Prince 1980]

Parameters:

Name Type Description Default
f Callable[[float, float], float]

right-hand side f(t, y)

required
t0 float

initial time

required
y0 float

initial value

required
t_end float

end time

required
dt float

initial time step

RK45_DEFAULT_DT
atol float

absolute tolerance

RK45_DEFAULT_ATOL
rtol float

relative tolerance

RK45_DEFAULT_RTOL
Source code in src\cds\diffeq\solvers.py
def rk45(
    f: Callable[[float, float], float],
    t0: float,
    y0: float,
    t_end: float,
    dt: float = RK45_DEFAULT_DT,
    atol: float = RK45_DEFAULT_ATOL,
    rtol: float = RK45_DEFAULT_RTOL,
) -> ODESolution:
    """Dormand-Prince (RK45) adaptive step-size method.

    Computes 4th and 5th order estimates to approximate local error
    and adjust the step size automatically. [Dormand & Prince 1980]

    Args:
        f: right-hand side f(t, y)
        t0: initial time
        y0: initial value
        t_end: end time
        dt: initial time step
        atol: absolute tolerance
        rtol: relative tolerance
    """
    # Dormand-Prince Butcher Tableau coefficients
    a = [0, 1 / 5, 3 / 10, 4 / 5, 8 / 9, 1, 1]
    b = [
        [],
        [1 / 5],
        [3 / 40, 9 / 40],
        [44 / 45, -56 / 15, 32 / 9],
        [19372 / 6561, -25360 / 2187, 64448 / 6561, -212 / 729],
        [9017 / 3168, -355 / 33, 46732 / 5247, 49 / 176, -5103 / 18656],
        [35 / 384, 0, 500 / 1113, 125 / 192, -2187 / 6784, 11 / 84],
    ]
    c5 = [35 / 384, 0, 500 / 1113, 125 / 192, -2187 / 6784, 11 / 84, 0]
    c4 = [5179 / 57600, 0, 7571 / 16695, 393 / 640, -92097 / 339200, 187 / 2100, 1 / 40]

    t, y = t0, y0
    t_vals = [t]
    y_vals = [y]
    h = dt
    steps = 0

    # Absolute step-size floor, scaled to the integration span, below which no
    # further progress can be made (a "machine precision floor"). Prevents the
    # adaptive loop from spinning forever on stiff/diverging problems.
    span = abs(t_end - t0) if t_end != t0 else 1.0
    eps_floor = 16 * sys.float_info.epsilon * max(abs(t), span)

    while t < t_end - LOOP_EPSILON:
        if t + h > t_end:
            h = t_end - t

        k = [0.0] * 7
        k[0] = f(t, y)
        for i in range(1, 7):
            y_next = y + h * sum(b[i][j] * k[j] for j in range(i))
            k[i] = f(t + a[i] * h, y_next)

        # Estimate 5th and 4th order solutions
        y5 = y + h * sum(c5[i] * k[i] for i in range(7))
        y4 = y + h * sum(c4[i] * k[i] for i in range(7))

        # Local error estimate
        error = abs(y5 - y4)
        tolerance = atol + rtol * abs(y)

        if error <= tolerance:
            # Step accepted
            t += h
            y = y5
            t_vals.append(t)
            y_vals.append(y)
            steps += 1

        # Adjust step size
        if error > 0:
            h_opt = h * (tolerance / error) ** 0.2
            h = min(max(RK45_STEP_SHRINK * h, RK45_STEP_SAFETY * h_opt), RK45_STEP_GROW * h)
        else:
            h *= 10.0  # Error is zero, aggressively increase step up to max scale

        # Precision floor to prevent infinite loop: either the step size has
        # shrunk below the span-scaled epsilon floor, or it has become so small
        # that adding it to t makes no progress (t + h == t).
        if h < eps_floor or t + h == t:
            raise RuntimeError("Step size h reached machine precision floor.")

    return ODESolution(t=t_vals, y=y_vals, method="rk45", steps=steps)

solve_system

solve_system(
    f: Callable[[float, list[float]], list[float]],
    t0: float,
    y0: list[float],
    t_end: float,
    dt: float = RK45_DEFAULT_DT,
) -> tuple[list[float], list[list[float]]]

RK4 for systems of ODEs: dy/dt = f(t, y) where y is a vector.

Parameters:

Name Type Description Default
f Callable[[float, list[float]], list[float]]

right-hand side f(t, y) returning a list of derivatives

required
t0 float

initial time

required
y0 list[float]

initial state vector

required
t_end float

end time

required
dt float

time step

RK45_DEFAULT_DT

Returns:

Type Description
tuple[list[float], list[list[float]]]

(t_values, y_values) where y_values[i] is the state vector at t_values[i]

Source code in src\cds\diffeq\solvers.py
def solve_system(
    f: Callable[[float, list[float]], list[float]],
    t0: float,
    y0: list[float],
    t_end: float,
    dt: float = RK45_DEFAULT_DT,
) -> tuple[list[float], list[list[float]]]:
    """RK4 for systems of ODEs: dy/dt = f(t, y) where y is a vector.

    Args:
        f: right-hand side f(t, y) returning a list of derivatives
        t0: initial time
        y0: initial state vector
        t_end: end time
        dt: time step

    Returns:
        (t_values, y_values) where y_values[i] is the state vector at t_values[i]
    """
    n = len(y0)
    t_vals = [t0]
    y_vals = [list(y0)]
    t = t0
    y = list(y0)

    while t < t_end - LOOP_EPSILON:
        h = min(dt, t_end - t)

        k1 = f(t, y)
        y_tmp = [y[i] + h * k1[i] / 2 for i in range(n)]
        k2 = f(t + h / 2, y_tmp)
        y_tmp = [y[i] + h * k2[i] / 2 for i in range(n)]
        k3 = f(t + h / 2, y_tmp)
        y_tmp = [y[i] + h * k3[i] for i in range(n)]
        k4 = f(t + h, y_tmp)

        y = [y[i] + (h / 6) * (k1[i] + 2 * k2[i] + 2 * k3[i] + k4[i]) for i in range(n)]
        t = t + h
        t_vals.append(t)
        y_vals.append(list(y))

    return t_vals, y_vals

Monte Carlo Methods

Stochastic integration: π estimation, generic Monte-Carlo integration, 1D/2D random walks, and Buffon's needle.

cds.montecarlo

Monte Carlo methods — estimation, integration, random walks.

Functions:

buffon_needle

buffon_needle(
    needle_length: float = 1.0,
    line_spacing: float = 2.0,
    n_throws: int = 100000,
    seed: int | None = None,
) -> MCResult

Buffon's needle experiment for estimating π.

Drop a needle of length L onto parallel lines spaced D apart. P(crossing) = 2L / (πD), so π ≈ 2L / (D * P(crossing)).

Reference: Buffon (1777).

Parameters:

Name Type Description Default
needle_length float

length of the needle (must be <= line_spacing)

1.0
line_spacing float

distance between parallel lines

2.0
n_throws int

number of needle drops

100000
seed int | None

optional random seed

None

Raises:

Type Description
ValueError

if needle_length > line_spacing

Source code in src\cds\montecarlo\methods.py
def buffon_needle(
    needle_length: float = 1.0,
    line_spacing: float = 2.0,
    n_throws: int = 100_000,
    seed: int | None = None,
) -> MCResult:
    """Buffon's needle experiment for estimating π.

    Drop a needle of length L onto parallel lines spaced D apart.
    P(crossing) = 2L / (πD), so π ≈ 2L / (D * P(crossing)).

    Reference: Buffon (1777).

    Args:
        needle_length: length of the needle (must be <= line_spacing)
        line_spacing: distance between parallel lines
        n_throws: number of needle drops
        seed: optional random seed

    Raises:
        ValueError: if needle_length > line_spacing
    """
    if needle_length > line_spacing:
        raise ValueError("needle must be shorter than line spacing")
    rng = random.Random(seed)

    crossings = 0
    for _ in range(n_throws):
        center = rng.uniform(0, line_spacing / 2)
        angle = rng.uniform(0, math.pi)
        tip = (needle_length / 2) * math.sin(angle)
        if tip >= center:
            crossings += 1

    if crossings == 0:
        return MCResult(estimate=0.0, samples=n_throws, std_error=0.0)

    p = crossings / n_throws
    estimate = (2 * needle_length) / (line_spacing * p)
    se_p = math.sqrt(p * (1 - p) / n_throws)
    se = (2 * needle_length * se_p) / (line_spacing * p * p) if p > 0 else 0.0
    return MCResult(estimate=estimate, samples=n_throws, std_error=se)

estimate_pi

estimate_pi(
    n_samples: int = 100000, seed: int | None = None
) -> MCResult

Estimate π using the unit-circle method (Parallelized).

Throw random points into the unit square [0,1]×[0,1]. Fraction inside the quarter-circle ≈ π/4.

Parameters:

Name Type Description Default
n_samples int

number of random points

100000
seed int | None

optional random seed

None
Source code in src\cds\montecarlo\methods.py
def estimate_pi(n_samples: int = 100_000, seed: int | None = None) -> MCResult:
    """Estimate π using the unit-circle method (Parallelized).

    Throw random points into the unit square [0,1]×[0,1].
    Fraction inside the quarter-circle ≈ π/4.

    Args:
        n_samples: number of random points
        seed: optional random seed
    """
    if n_samples <= 0:
        return MCResult(0.0, n_samples, 0.0)

    cores = min(multiprocessing.cpu_count(), n_samples)
    chunk_size = n_samples // cores
    chunks = [chunk_size] * cores
    chunks[-1] += n_samples - sum(chunks)  # add remainder to last chunk

    if seed is None:
        import os
        import sys

        seed = int.from_bytes(os.urandom(4), sys.byteorder)

    seeds = [seed + i for i in range(cores)]
    tasks = list(zip(chunks, seeds))

    inside = 0
    with ProcessPoolExecutor(max_workers=cores) as executor:
        for result in executor.map(_pi_worker, tasks):
            inside += result

    p = inside / n_samples
    estimate = 4.0 * p
    se = 4.0 * math.sqrt(p * (1 - p) / n_samples) if n_samples > 1 else 0.0
    return MCResult(estimate=estimate, samples=n_samples, std_error=se)

mc_integrate

mc_integrate(
    f: Callable[[float], float],
    a: float,
    b: float,
    n_samples: int = 100000,
    seed: int | None = None,
) -> MCResult

Monte Carlo integration of f over [a, b].

E[f(X)] * (b-a) where X ~ Uniform(a, b).

Parameters:

Name Type Description Default
f Callable[[float], float]

function to integrate

required
a float

lower bound

required
b float

upper bound

required
n_samples int

number of random evaluations

100000
seed int | None

optional random seed

None
Source code in src\cds\montecarlo\methods.py
def mc_integrate(
    f: Callable[[float], float],
    a: float,
    b: float,
    n_samples: int = 100_000,
    seed: int | None = None,
) -> MCResult:
    """Monte Carlo integration of f over [a, b].

    E[f(X)] * (b-a) where X ~ Uniform(a, b).

    Args:
        f: function to integrate
        a: lower bound
        b: upper bound
        n_samples: number of random evaluations
        seed: optional random seed
    """
    rng = random.Random(seed)
    total = 0.0
    total_sq = 0.0
    width = b - a
    for _ in range(n_samples):
        x = a + rng.random() * width
        val = f(x)
        total += val
        total_sq += val * val

    mean_val = total / n_samples
    estimate = mean_val * width
    var = (total_sq / n_samples - mean_val**2) if n_samples > 1 else 0.0
    se = width * math.sqrt(var / n_samples) if var > 0 else 0.0
    return MCResult(estimate=estimate, samples=n_samples, std_error=se)

random_walk_1d

random_walk_1d(
    steps: int,
    step_size: float = 1.0,
    seed: int | None = None,
) -> list[float]

1D symmetric random walk.

At each step, move +step_size or -step_size with equal probability.

Parameters:

Name Type Description Default
steps int

number of steps

required
step_size float

size of each step

1.0
seed int | None

optional random seed

None

Returns:

Type Description
list[float]

list of positions at each step (length = steps + 1)

Source code in src\cds\montecarlo\methods.py
def random_walk_1d(
    steps: int,
    step_size: float = 1.0,
    seed: int | None = None,
) -> list[float]:
    """1D symmetric random walk.

    At each step, move +step_size or -step_size with equal probability.

    Args:
        steps: number of steps
        step_size: size of each step
        seed: optional random seed

    Returns:
        list of positions at each step (length = steps + 1)
    """
    rng = random.Random(seed)
    positions = [0.0]
    pos = 0.0
    for _ in range(steps):
        pos += step_size if rng.random() < 0.5 else -step_size
        positions.append(pos)
    return positions

random_walk_2d

random_walk_2d(
    steps: int,
    step_size: float = 1.0,
    seed: int | None = None,
) -> list[tuple[float, float]]

2D random walk on a plane.

At each step, move in a random direction (uniform angle).

Parameters:

Name Type Description Default
steps int

number of steps

required
step_size float

size of each step

1.0
seed int | None

optional random seed

None

Returns:

Type Description
list[tuple[float, float]]

list of (x, y) positions at each step (length = steps + 1)

Source code in src\cds\montecarlo\methods.py
def random_walk_2d(
    steps: int,
    step_size: float = 1.0,
    seed: int | None = None,
) -> list[tuple[float, float]]:
    """2D random walk on a plane.

    At each step, move in a random direction (uniform angle).

    Args:
        steps: number of steps
        step_size: size of each step
        seed: optional random seed

    Returns:
        list of (x, y) positions at each step (length = steps + 1)
    """
    rng = random.Random(seed)
    positions: list[tuple[float, float]] = [(0.0, 0.0)]
    x, y = 0.0, 0.0
    for _ in range(steps):
        angle = rng.uniform(0, 2 * math.pi)
        x += step_size * math.cos(angle)
        y += step_size * math.sin(angle)
        positions.append((x, y))
    return positions

Optimization

Gradient descent, Newton's method, Adam, and golden-section line search.

cds.optimization

Numerical optimization algorithms.

Functions:

adam

adam(
    f: Callable[..., float],
    x0: float,
    lr: float = ADAM_DEFAULT_LR,
    beta1: float = ADAM_DEFAULT_BETAS[0],
    beta2: float = ADAM_DEFAULT_BETAS[1],
    eps: float = ADAM_DEFAULT_EPS,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 10000,
    h: float = DEFAULT_FD_STEP,
    state: AdamState | None = None,
    grad_f: (
        Callable[..., float | list[float]] | None
    ) = None,
) -> OptResult[float]
adam(
    f: Callable[..., float],
    x0: list[float],
    lr: float = ADAM_DEFAULT_LR,
    beta1: float = ADAM_DEFAULT_BETAS[0],
    beta2: float = ADAM_DEFAULT_BETAS[1],
    eps: float = ADAM_DEFAULT_EPS,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 10000,
    h: float = DEFAULT_FD_STEP,
    state: AdamState | None = None,
    grad_f: (
        Callable[..., float | list[float]] | None
    ) = None,
) -> OptResult[list[float]]
adam(
    f: Callable[..., float],
    x0: float | list[float],
    lr: float = ADAM_DEFAULT_LR,
    beta1: float = ADAM_DEFAULT_BETAS[0],
    beta2: float = ADAM_DEFAULT_BETAS[1],
    eps: float = ADAM_DEFAULT_EPS,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 10000,
    h: float = DEFAULT_FD_STEP,
    state: AdamState | None = None,
    grad_f: (
        Callable[..., float | list[float]] | None
    ) = None,
) -> OptResult[float] | OptResult[list[float]]

Minimize using Adam optimizer (adaptive learning rate) for scalars or vectors.

Parameters:

Name Type Description Default
f Callable[..., float]

objective function

required
x0 float | list[float]

starting point

required
lr float

learning rate

ADAM_DEFAULT_LR
beta1 float

first moment decay

ADAM_DEFAULT_BETAS[0]
beta2 float

second moment decay

ADAM_DEFAULT_BETAS[1]
eps float

numerical stability constant

ADAM_DEFAULT_EPS
tol float

convergence tolerance

DEFAULT_TOLERANCE
max_iter int

iteration limit

10000
h float

step for numerical gradient

DEFAULT_FD_STEP
state AdamState | None

optional dictionary to resume optimization (contains m, v, t)

None
grad_f Callable[..., float | list[float]] | None

optional gradient function. If None, numerical gradient is used.

None
Source code in src\cds\optimization\minimize.py
def adam(
    f: Callable[..., float],
    x0: float | list[float],
    lr: float = ADAM_DEFAULT_LR,
    beta1: float = ADAM_DEFAULT_BETAS[0],
    beta2: float = ADAM_DEFAULT_BETAS[1],
    eps: float = ADAM_DEFAULT_EPS,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 10000,
    h: float = DEFAULT_FD_STEP,
    state: AdamState | None = None,
    grad_f: Callable[..., float | list[float]] | None = None,
) -> OptResult[float] | OptResult[list[float]]:
    """Minimize using Adam optimizer (adaptive learning rate) for scalars or vectors.

    Args:
        f: objective function
        x0: starting point
        lr: learning rate
        beta1: first moment decay
        beta2: second moment decay
        eps: numerical stability constant
        tol: convergence tolerance
        max_iter: iteration limit
        h: step for numerical gradient
        state: optional dictionary to resume optimization (contains m, v, t)
        grad_f: optional gradient function. If None, numerical gradient is used.
    """
    if isinstance(x0, (int, float)):
        x_scalar: float = float(x0)

        if state is None:
            m_s = 0.0
            v_s = 0.0
            t_start = 1
        else:
            m_s = float(cast(float, state["m"]))
            v_s = float(cast(float, state["v"]))
            t_start = int(state["t"]) + 1

        last_t = t_start - 1
        for i in range(t_start, t_start + max_iter):
            last_t = i
            if grad_f:
                grad_s: float = float(cast(float, grad_f(x_scalar)))
            else:
                grad_s = _compute_gradient(f, x_scalar, h)

            if abs(grad_s) < tol:
                return OptResult(
                    x=x_scalar,
                    value=f(x_scalar),
                    iterations=i - t_start + 1,
                    converged=True,
                    state={"m": m_s, "v": v_s, "t": i},
                )
            m_s = beta1 * m_s + (1 - beta1) * grad_s
            v_s = beta2 * v_s + (1 - beta2) * grad_s**2
            m_hat = m_s / (1 - beta1**i)
            v_hat = v_s / (1 - beta2**i)
            x_scalar -= lr * m_hat / (math.sqrt(v_hat) + eps)

        return OptResult(
            x=x_scalar,
            value=f(x_scalar),
            iterations=max_iter,
            converged=False,
            state={"m": m_s, "v": v_s, "t": last_t},
        )
    else:
        x_list: list[float] = list(x0)

        if state is None:
            m_l = [0.0] * len(x_list)
            v_l = [0.0] * len(x_list)
            t_start = 1
        else:
            m_l = list(cast(list[float], state["m"]))
            v_l = list(cast(list[float], state["v"]))
            t_start = int(state["t"]) + 1

        last_t = t_start - 1
        for i in range(t_start, t_start + max_iter):
            last_t = i
            if grad_f:
                grad_l: list[float] = list(cast(list[float], grad_f(x_list)))
            else:
                grad_l = _compute_gradient(f, x_list, h)

            if _magnitude(grad_l) < tol:
                return OptResult(
                    x=x_list,
                    value=f(x_list),
                    iterations=i - t_start + 1,
                    converged=True,
                    state={"m": m_l, "v": v_l, "t": i},
                )

            for j in range(len(x_list)):
                m_l[j] = beta1 * m_l[j] + (1 - beta1) * grad_l[j]
                v_l[j] = beta2 * v_l[j] + (1 - beta2) * grad_l[j] ** 2
                m_hat = m_l[j] / (1 - beta1**i)
                v_hat = v_l[j] / (1 - beta2**i)
                x_list[j] -= lr * m_hat / (math.sqrt(v_hat) + eps)

        return OptResult(
            x=x_list,
            value=f(x_list),
            iterations=max_iter,
            converged=False,
            state={"m": m_l, "v": v_l, "t": last_t},
        )

gradient_descent

gradient_descent(
    f: Callable[..., float],
    x0: float,
    lr: float = GD_DEFAULT_LR,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 10000,
    h: float = DEFAULT_FD_STEP,
) -> OptResult[float]
gradient_descent(
    f: Callable[..., float],
    x0: list[float],
    lr: float = GD_DEFAULT_LR,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 10000,
    h: float = DEFAULT_FD_STEP,
) -> OptResult[list[float]]
gradient_descent(
    f: Callable[..., float],
    x0: float | list[float],
    lr: float = GD_DEFAULT_LR,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 10000,
    h: float = DEFAULT_FD_STEP,
) -> OptResult[float] | OptResult[list[float]]

Minimize a scalar or vector function using gradient descent.

Parameters:

Name Type Description Default
f Callable[..., float]

objective function

required
x0 float | list[float]

starting point (scalar or list of floats)

required
lr float

learning rate

GD_DEFAULT_LR
tol float

convergence tolerance on gradient magnitude

DEFAULT_TOLERANCE
max_iter int

iteration limit

10000
h float

step size for numerical gradient

DEFAULT_FD_STEP
Source code in src\cds\optimization\minimize.py
def gradient_descent(
    f: Callable[..., float],
    x0: float | list[float],
    lr: float = GD_DEFAULT_LR,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 10000,
    h: float = DEFAULT_FD_STEP,
) -> OptResult[float] | OptResult[list[float]]:
    """Minimize a scalar or vector function using gradient descent.

    Args:
        f: objective function
        x0: starting point (scalar or list of floats)
        lr: learning rate
        tol: convergence tolerance on gradient magnitude
        max_iter: iteration limit
        h: step size for numerical gradient
    """
    if isinstance(x0, (int, float)):
        # Scalar branch — typed so OptResult[float] is returned without a cast.
        x: float = x0
        for i in range(max_iter):
            grad = _compute_gradient(f, x, h)
            if _magnitude(grad) < tol:
                return OptResult(x=x, value=f(x), iterations=i, converged=True)
            x = _update_x(x, grad, lr)
        return OptResult(x=x, value=f(x), iterations=max_iter, converged=False)

    # Vector branch — typed so OptResult[list[float]] is returned without a cast.
    # ``grad_vec`` (not ``grad``) so mypy keeps the scalar/vector types separate:
    # the scalar branch above binds the name ``grad`` to ``float``, and a single
    # function scope can't hold both ``float`` and ``list[float]`` for one name.
    x_vec: list[float] = list(x0)
    for i in range(max_iter):
        grad_vec = _compute_gradient(f, x_vec, h)
        if _magnitude(grad_vec) < tol:
            return OptResult(x=x_vec, value=f(x_vec), iterations=i, converged=True)
        x_vec = _update_x(x_vec, grad_vec, lr)
    return OptResult(x=x_vec, value=f(x_vec), iterations=max_iter, converged=False)
line_search(
    f: Callable[[float], float],
    a: float,
    b: float,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 100,
) -> OptResult[float]

Golden section search for minimum in [a, b].

Parameters:

Name Type Description Default
f Callable[[float], float]

unimodal function to minimize

required
a float

left bound

required
b float

right bound

required
tol float

convergence tolerance on interval width

DEFAULT_TOLERANCE
max_iter int

iteration limit

100
Source code in src\cds\optimization\minimize.py
def line_search(
    f: Callable[[float], float],
    a: float,
    b: float,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 100,
) -> OptResult[float]:
    """Golden section search for minimum in [a, b].

    Args:
        f: unimodal function to minimize
        a: left bound
        b: right bound
        tol: convergence tolerance on interval width
        max_iter: iteration limit
    """
    phi = (math.sqrt(5) - 1) / 2
    for i in range(max_iter):
        if abs(b - a) < tol:
            mid = (a + b) / 2
            return OptResult(
                x=mid,
                value=f(mid),
                iterations=i,
                converged=True,
            )
        x1 = b - phi * (b - a)
        x2 = a + phi * (b - a)
        if f(x1) < f(x2):
            b = x2
        else:
            a = x1
    mid = (a + b) / 2
    return OptResult(
        x=mid,
        value=f(mid),
        iterations=max_iter,
        converged=False,
    )

newton_method

newton_method(
    f: Callable[[float], float],
    x0: float,
    tol: float = NEWTON_TOLERANCE,
    max_iter: int = 1000,
    h_base: float = NEWTON_DERIVATIVE_STEP,
) -> OptResult[float]

Find a root of f using Newton-Raphson method with adaptive step size.

Parameters:

Name Type Description Default
f Callable[[float], float]

function whose root to find

required
x0 float

starting point

required
tol float

convergence tolerance

NEWTON_TOLERANCE
max_iter int

iteration limit

1000
h_base float

base step for numerical derivative

NEWTON_DERIVATIVE_STEP
Source code in src\cds\optimization\minimize.py
def newton_method(
    f: Callable[[float], float],
    x0: float,
    tol: float = NEWTON_TOLERANCE,
    max_iter: int = 1000,
    h_base: float = NEWTON_DERIVATIVE_STEP,
) -> OptResult[float]:
    """Find a root of f using Newton-Raphson method with adaptive step size.

    Args:
        f: function whose root to find
        x0: starting point
        tol: convergence tolerance
        max_iter: iteration limit
        h_base: base step for numerical derivative
    """
    x = float(x0)
    for i in range(max_iter):
        fx = f(x)
        if abs(fx) < tol:
            return OptResult(x=x, value=fx, iterations=i, converged=True)

        # Newton's derivative comes from the same central-difference kernel as
        # the gradient methods (``h_base`` defaults to NEWTON_DERIVATIVE_STEP,
        # finer than the gradient DEFAULT_FD_STEP).
        dfx = _compute_gradient(f, x, h_base)

        if abs(dfx) < NEAR_ZERO:
            break
        x -= fx / dfx
    return OptResult(
        x=x,
        value=f(x),
        iterations=max_iter,
        converged=False,
    )

Machine Learning

Pure-Python neural networks: an MLP with Adam-based training.

cds.ml

Machine Learning module for CDS.

Classes

MLP

Multi-Layer Perceptron (Pure Python).

Source code in src\cds\ml\neural.py
class MLP:
    """Multi-Layer Perceptron (Pure Python)."""

    def __init__(self, layers: list[Layer]):
        self.layers = layers
        self.optimizer_state: AdamState | None = None

    def predict(self, x: list[float]) -> list[float]:
        """Compute the network output."""
        curr = x
        for layer in self.layers:
            curr = layer.forward(curr)
        return curr

    def get_parameters(self) -> list[float]:
        """Flatten all weights and biases into a single list."""
        params = []
        for layer in self.layers:
            for row in layer.weights:
                params.extend(row)
            params.extend(layer.biases)
        return params

    def set_parameters(self, params: list[float]) -> None:
        """Unflatten parameters back into weights and biases."""
        idx = 0
        for layer in self.layers:
            for i in range(len(layer.weights)):
                for j in range(len(layer.weights[i])):
                    layer.weights[i][j] = params[idx]
                    idx += 1
            for i in range(len(layer.biases)):
                layer.biases[i] = params[idx]
                idx += 1

    def get_gradients(self) -> list[float]:
        """Flatten all accumulated gradients into a single list."""
        grads = []
        for layer in self.layers:
            for row in layer.grad_weights:
                grads.extend(row)
            grads.extend(layer.grad_biases)
        return grads

    def zero_grads(self) -> None:
        """Reset all parameter gradients to zero."""
        for layer in self.layers:
            for i in range(len(layer.grad_weights)):
                for j in range(len(layer.grad_weights[i])):
                    layer.grad_weights[i][j] = 0.0
            for i in range(len(layer.grad_biases)):
                layer.grad_biases[i] = 0.0

    def train(
        self,
        X: list[list[float]],
        y: list[list[float]],
        epochs: int = 100,
        lr: float = GD_DEFAULT_LR,
    ) -> dict[str, float | bool]:
        """Train the network using the Adam optimizer with backpropagation and state persistence."""

        def loss_fn(params: list[float]) -> float:
            """Mean squared error over the training set for these parameters."""
            self.set_parameters(params)
            total_loss = 0.0
            for xi, yi in zip(X, y):
                pred = self.predict(xi)
                total_loss += sum((p - target) ** 2 for p, target in zip(pred, yi))
            return total_loss / len(X)

        def grad_fn(params: list[float]) -> list[float]:
            """Parameter gradients via backpropagation over the training set."""
            self.set_parameters(params)
            self.zero_grads()
            for xi, yi in zip(X, y):
                pred = self.predict(xi)
                # MSE gradient: dL/dp = 2/N * (p - y)
                grad_out = [2.0 * (p - target) / len(X) for p, target in zip(pred, yi)]
                curr_grad = grad_out
                for layer in reversed(self.layers):
                    curr_grad = layer.backward(curr_grad)
            return self.get_gradients()

        p0 = self.get_parameters()
        res = adam(loss_fn, p0, lr=lr, max_iter=epochs, state=self.optimizer_state, grad_f=grad_fn)

        # adam()'s overload for a list x0 returns OptResult[list[float]], so
        # res.x is statically a list[float] — no cast or isinstance narrowing.
        final_params = res.x
        self.set_parameters(final_params)
        self.optimizer_state = res.state  # Store state for next training call

        return {"final_loss": res.value, "iterations": res.iterations, "converged": res.converged}
Methods:
predict
predict(x: list[float]) -> list[float]

Compute the network output.

Source code in src\cds\ml\neural.py
def predict(self, x: list[float]) -> list[float]:
    """Compute the network output."""
    curr = x
    for layer in self.layers:
        curr = layer.forward(curr)
    return curr
get_parameters
get_parameters() -> list[float]

Flatten all weights and biases into a single list.

Source code in src\cds\ml\neural.py
def get_parameters(self) -> list[float]:
    """Flatten all weights and biases into a single list."""
    params = []
    for layer in self.layers:
        for row in layer.weights:
            params.extend(row)
        params.extend(layer.biases)
    return params
set_parameters
set_parameters(params: list[float]) -> None

Unflatten parameters back into weights and biases.

Source code in src\cds\ml\neural.py
def set_parameters(self, params: list[float]) -> None:
    """Unflatten parameters back into weights and biases."""
    idx = 0
    for layer in self.layers:
        for i in range(len(layer.weights)):
            for j in range(len(layer.weights[i])):
                layer.weights[i][j] = params[idx]
                idx += 1
        for i in range(len(layer.biases)):
            layer.biases[i] = params[idx]
            idx += 1
get_gradients
get_gradients() -> list[float]

Flatten all accumulated gradients into a single list.

Source code in src\cds\ml\neural.py
def get_gradients(self) -> list[float]:
    """Flatten all accumulated gradients into a single list."""
    grads = []
    for layer in self.layers:
        for row in layer.grad_weights:
            grads.extend(row)
        grads.extend(layer.grad_biases)
    return grads
zero_grads
zero_grads() -> None

Reset all parameter gradients to zero.

Source code in src\cds\ml\neural.py
def zero_grads(self) -> None:
    """Reset all parameter gradients to zero."""
    for layer in self.layers:
        for i in range(len(layer.grad_weights)):
            for j in range(len(layer.grad_weights[i])):
                layer.grad_weights[i][j] = 0.0
        for i in range(len(layer.grad_biases)):
            layer.grad_biases[i] = 0.0
train
train(
    X: list[list[float]],
    y: list[list[float]],
    epochs: int = 100,
    lr: float = GD_DEFAULT_LR,
) -> dict[str, float | bool]

Train the network using the Adam optimizer with backpropagation and state persistence.

Source code in src\cds\ml\neural.py
def train(
    self,
    X: list[list[float]],
    y: list[list[float]],
    epochs: int = 100,
    lr: float = GD_DEFAULT_LR,
) -> dict[str, float | bool]:
    """Train the network using the Adam optimizer with backpropagation and state persistence."""

    def loss_fn(params: list[float]) -> float:
        """Mean squared error over the training set for these parameters."""
        self.set_parameters(params)
        total_loss = 0.0
        for xi, yi in zip(X, y):
            pred = self.predict(xi)
            total_loss += sum((p - target) ** 2 for p, target in zip(pred, yi))
        return total_loss / len(X)

    def grad_fn(params: list[float]) -> list[float]:
        """Parameter gradients via backpropagation over the training set."""
        self.set_parameters(params)
        self.zero_grads()
        for xi, yi in zip(X, y):
            pred = self.predict(xi)
            # MSE gradient: dL/dp = 2/N * (p - y)
            grad_out = [2.0 * (p - target) / len(X) for p, target in zip(pred, yi)]
            curr_grad = grad_out
            for layer in reversed(self.layers):
                curr_grad = layer.backward(curr_grad)
        return self.get_gradients()

    p0 = self.get_parameters()
    res = adam(loss_fn, p0, lr=lr, max_iter=epochs, state=self.optimizer_state, grad_f=grad_fn)

    # adam()'s overload for a list x0 returns OptResult[list[float]], so
    # res.x is statically a list[float] — no cast or isinstance narrowing.
    final_params = res.x
    self.set_parameters(final_params)
    self.optimizer_state = res.state  # Store state for next training call

    return {"final_loss": res.value, "iterations": res.iterations, "converged": res.converged}

Layer

A basic dense (fully-connected) neural network layer.

Source code in src\cds\ml\neural.py
class Layer:
    """A basic dense (fully-connected) neural network layer."""

    def __init__(self, input_size: int, output_size: int, activation: str = "relu"):
        # Xavier/Glorot initialization for weights
        limit = (6.0 / (input_size + output_size)) ** 0.5
        self.weights = [
            [random.uniform(-limit, limit) for _ in range(input_size)] for _ in range(output_size)
        ]
        self.biases = [0.0] * output_size
        self.activation = activation

        # State for backpropagation
        self.last_x: list[float] = []
        self.last_z: list[float] = []
        self.last_a: list[float] = []
        self.grad_weights = [[0.0 for _ in range(input_size)] for _ in range(output_size)]
        self.grad_biases = [0.0] * output_size

    def forward(self, x: list[float]) -> list[float]:
        """Compute layer output for a single input vector and store state for backward pass."""
        self.last_x = x
        self.last_z = []
        self.last_a = []
        for i in range(len(self.weights)):
            z = sum(w * xi for w, xi in zip(self.weights[i], x)) + self.biases[i]
            self.last_z.append(z)
            self.last_a.append(self._activate(z))
        return self.last_a

    def backward(self, grad_out: list[float]) -> list[float]:
        """Backpropagate error gradient through the layer."""
        # dL/dz = dL/da * da/dz
        grad_z = [
            go * self._activate_derivative(z, a)
            for go, z, a in zip(grad_out, self.last_z, self.last_a)
        ]

        # dL/dw_ij = dL/dz_i * x_j
        for i in range(len(self.weights)):
            gz_i = grad_z[i]
            for j in range(len(self.weights[i])):
                self.grad_weights[i][j] += gz_i * self.last_x[j]

        # dL/db_i = dL/dz_i
        for i in range(len(self.biases)):
            self.grad_biases[i] += grad_z[i]

        # dL/dx_j = sum_i (dL/dz_i * w_ij)
        grad_in = [0.0] * len(self.last_x)
        for j in range(len(self.last_x)):
            grad_in[j] = sum(grad_z[i] * self.weights[i][j] for i in range(len(self.weights)))

        return grad_in

    def _activate(self, z: float) -> float:
        if self.activation == "relu":
            return max(0.0, z)
        if self.activation == "sigmoid":
            # Numerically stable logistic sigmoid. The two branches keep the
            # argument to exp() non-positive so it never overflows; for very
            # large |z| the exp() underflows to 0.0, which we map to the
            # asymptotic limits 1.0 (z -> +inf) / 0.0 (z -> -inf). The
            # OverflowError guard is kept defensively for platforms whose
            # libm raises on subnormal results rather than returning 0.0.
            if z >= 0:
                try:
                    return 1.0 / (1.0 + math.exp(-z))
                except (OverflowError, ValueError):  # pragma: no cover - non-CPython libm
                    return 1.0
            else:
                try:
                    ez = math.exp(z)
                except (OverflowError, ValueError):  # pragma: no cover - non-CPython libm
                    return 0.0
                return ez / (1.0 + ez)
        return z  # identity

    def _activate_derivative(self, z: float, a: float) -> float:
        if self.activation == "relu":
            return 1.0 if z > 0 else 0.0
        if self.activation == "sigmoid":
            return a * (1.0 - a)
        return 1.0  # identity
Methods:
forward
forward(x: list[float]) -> list[float]

Compute layer output for a single input vector and store state for backward pass.

Source code in src\cds\ml\neural.py
def forward(self, x: list[float]) -> list[float]:
    """Compute layer output for a single input vector and store state for backward pass."""
    self.last_x = x
    self.last_z = []
    self.last_a = []
    for i in range(len(self.weights)):
        z = sum(w * xi for w, xi in zip(self.weights[i], x)) + self.biases[i]
        self.last_z.append(z)
        self.last_a.append(self._activate(z))
    return self.last_a
backward
backward(grad_out: list[float]) -> list[float]

Backpropagate error gradient through the layer.

Source code in src\cds\ml\neural.py
def backward(self, grad_out: list[float]) -> list[float]:
    """Backpropagate error gradient through the layer."""
    # dL/dz = dL/da * da/dz
    grad_z = [
        go * self._activate_derivative(z, a)
        for go, z, a in zip(grad_out, self.last_z, self.last_a)
    ]

    # dL/dw_ij = dL/dz_i * x_j
    for i in range(len(self.weights)):
        gz_i = grad_z[i]
        for j in range(len(self.weights[i])):
            self.grad_weights[i][j] += gz_i * self.last_x[j]

    # dL/db_i = dL/dz_i
    for i in range(len(self.biases)):
        self.grad_biases[i] += grad_z[i]

    # dL/dx_j = sum_i (dL/dz_i * w_ij)
    grad_in = [0.0] * len(self.last_x)
    for j in range(len(self.last_x)):
        grad_in[j] = sum(grad_z[i] * self.weights[i][j] for i in range(len(self.weights)))

    return grad_in

Signal Processing

DFT, radix-2 FFT/IFFT, convolution, and digital filters.

cds.signals

Signal processing tools.

Functions:

convolve

convolve(a: list[float], b: list[float]) -> list[float]

Linear convolution using the FFT Theorem (O(N log N)).

Source code in src\cds\signals\processing.py
def convolve(a: list[float], b: list[float]) -> list[float]:
    """Linear convolution using the FFT Theorem (O(N log N))."""
    if not a or not b:
        return []

    na, nb = len(a), len(b)
    n_out = na + nb - 1

    # Next power of 2 for FFT speed
    n_fft = 1 << (n_out - 1).bit_length()

    # Transform to frequency domain
    fa = fft(list(a) + [0j] * (n_fft - na))
    fb = fft(list(b) + [0j] * (n_fft - nb))

    # Multiplication in frequency domain
    fc = [xa * xb for xa, xb in zip(fa, fb)]

    # Inverse transform
    full_conv = ifft(fc)

    # Return truncated to correct length
    return [x.real for x in full_conv[:n_out]]

dft

dft(signal: list[float | complex]) -> list[complex]

Discrete Fourier Transform (direct computation).

Parameters:

Name Type Description Default
signal list[float | complex]

input signal of length N

required

Returns:

Type Description
list[complex]

list of N complex frequency components

Source code in src\cds\signals\processing.py
def dft(signal: list[float | complex]) -> list[complex]:
    """Discrete Fourier Transform (direct computation).

    Args:
        signal: input signal of length N

    Returns:
        list of N complex frequency components
    """
    n = len(signal)
    result = []
    for k in range(n):
        s = 0 + 0j
        for t in range(n):
            angle = -2 * math.pi * k * t / n
            s += signal[t] * cmath.exp(1j * angle)
        result.append(s)
    return result

fft2

fft2(
    matrix: list[list[float | complex]],
) -> list[list[complex]]

2-D Discrete Fourier Transform (O(N log N)).

Source code in src\cds\signals\processing.py
def fft2(matrix: list[list[float | complex]]) -> list[list[complex]]:
    """2-D Discrete Fourier Transform (O(N log N))."""
    rows = len(matrix)
    if rows == 0:
        raise ValueError("matrix must be non-empty")
    cols = len(matrix[0])
    if any(len(row) != cols for row in matrix):
        raise ValueError("all rows must have the same length (ragged matrix detected)")

    # Row-wise FFT
    row_fft = [fft(list(row)) for row in matrix]

    # Column-wise FFT
    transposed = list(zip(*row_fft))
    col_fft = [fft(list(col)) for col in transposed]

    return [list(row) for row in zip(*col_fft)]

fft_radix2

fft_radix2(signal: list[float | complex]) -> list[complex]

Cooley-Tukey radix-2 FFT. Input length must be a power of 2.

Parameters:

Name Type Description Default
signal list[float | complex]

input signal (length must be power of 2)

required

Returns:

Type Description
list[complex]

list of complex frequency components

Raises:

Type Description
ValueError

if length is not a power of 2

Source code in src\cds\signals\processing.py
def fft_radix2(signal: list[float | complex]) -> list[complex]:
    """Cooley-Tukey radix-2 FFT. Input length must be a power of 2.

    Args:
        signal: input signal (length must be power of 2)

    Returns:
        list of complex frequency components

    Raises:
        ValueError: if length is not a power of 2
    """
    n = len(signal)
    if n == 0:
        return []
    if n & (n - 1) != 0:
        raise ValueError(
            f"signal length must be a power of 2 for FFT (got {n}); pad with zeros or use dft() for arbitrary lengths"
        )
    if n == 1:
        return list(signal)

    even = fft_radix2(signal[0::2])
    odd = fft_radix2(signal[1::2])

    result = [0 + 0j] * n
    for k in range(n // 2):
        w = cmath.exp(-2j * math.pi * k / n)
        result[k] = even[k] + w * odd[k]
        result[k + n // 2] = even[k] - w * odd[k]
    return result

idft

idft(spectrum: list[float | complex]) -> list[complex]

Inverse Discrete Fourier Transform.

Parameters:

Name Type Description Default
spectrum list[float | complex]

frequency-domain signal of length N

required

Returns:

Type Description
list[complex]

list of N complex time-domain samples

Source code in src\cds\signals\processing.py
def idft(spectrum: list[float | complex]) -> list[complex]:
    """Inverse Discrete Fourier Transform.

    Args:
        spectrum: frequency-domain signal of length N

    Returns:
        list of N complex time-domain samples
    """
    n = len(spectrum)
    result = []
    for t in range(n):
        s = 0 + 0j
        for k in range(n):
            angle = 2 * math.pi * k * t / n
            s += spectrum[k] * cmath.exp(1j * angle)
        result.append(s / n)
    return result

ifft2

ifft2(
    spectrum: list[list[float | complex]],
) -> list[list[complex]]

Inverse 2-D DFT (O(N log N)).

Source code in src\cds\signals\processing.py
def ifft2(spectrum: list[list[float | complex]]) -> list[list[complex]]:
    """Inverse 2-D DFT (O(N log N))."""
    rows = len(spectrum)
    if rows == 0:
        raise ValueError("matrix must be non-empty")
    cols = len(spectrum[0])
    if any(len(row) != cols for row in spectrum):
        raise ValueError("all rows must have the same length (ragged matrix detected)")

    # Row-wise IFFT
    row_inv = [ifft(list(row)) for row in spectrum]

    # Column-wise IFFT
    transposed = list(zip(*row_inv))
    col_inv = [ifft(list(col)) for col in transposed]

    return [list(row) for row in zip(*col_inv)]

low_pass_filter

low_pass_filter(
    signal: list[float | complex], cutoff: int
) -> list[complex]

Simple frequency-domain low-pass filter.

Source code in src\cds\signals\processing.py
def low_pass_filter(signal: list[float | complex], cutoff: int) -> list[complex]:
    """Simple frequency-domain low-pass filter."""
    n = len(signal)
    if n == 0:
        return []

    # Choose best transform
    if (n & (n - 1) == 0) and n > 0:
        spectrum = fft_radix2(signal)
        inv_func = ifft
    else:
        spectrum = dft(signal)
        inv_func = idft

    for k in range(n):
        if cutoff <= k <= n - cutoff:
            spectrum[k] = 0 + 0j

    return inv_func(spectrum)

power_spectrum

power_spectrum(
    signal: list[float | complex],
) -> list[float]

Compute the power spectrum |X[k]|^2 / N.

Source code in src\cds\signals\processing.py
def power_spectrum(signal: list[float | complex]) -> list[float]:
    """Compute the power spectrum |X[k]|^2 / N."""
    n = len(signal)
    if n == 0:
        return []

    # Use FFT if possible (O(N log N))
    if (n & (n - 1) == 0) and n > 0:
        spectrum = fft_radix2(signal)
    else:
        spectrum = dft(signal)

    return [abs(x) ** 2 / n for x in spectrum]

Quantum Computing

Single- and multi-qubit state-vector simulation with O(1) sampling.

cds.quantum

Quantum computing simulation tools.

Classes

QuantumCircuit dataclass

Simple circuit that applies gates sequentially to a single qubit.

Source code in src\cds\quantum\circuit.py
@dataclass
class QuantumCircuit:
    """Simple circuit that applies gates sequentially to a single qubit."""

    gates: list[QuantumGate] = field(default_factory=list)

    def add(self, gate: QuantumGate) -> QuantumCircuit:
        """Append a gate to the circuit (returns self for fluent chaining)."""
        self.gates.append(gate)
        return self

    def run(self, initial: Qubit | None = None) -> Qubit:
        """Apply all gates sequentially; starts from `initial` or |0> if None."""
        q = initial or Qubit()
        for g in self.gates:
            q = g.apply(q)
        return q

    def __len__(self) -> int:
        return len(self.gates)
Methods:
add
add(gate: QuantumGate) -> QuantumCircuit

Append a gate to the circuit (returns self for fluent chaining).

Source code in src\cds\quantum\circuit.py
def add(self, gate: QuantumGate) -> QuantumCircuit:
    """Append a gate to the circuit (returns self for fluent chaining)."""
    self.gates.append(gate)
    return self
run
run(initial: Qubit | None = None) -> Qubit

Apply all gates sequentially; starts from initial or |0> if None.

Source code in src\cds\quantum\circuit.py
def run(self, initial: Qubit | None = None) -> Qubit:
    """Apply all gates sequentially; starts from `initial` or |0> if None."""
    q = initial or Qubit()
    for g in self.gates:
        q = g.apply(q)
    return q

QuantumGate dataclass

A 2x2 unitary gate stored as flat list [a, b, c, d].

Source code in src\cds\quantum\circuit.py
@dataclass
class QuantumGate:
    """A 2x2 unitary gate stored as flat list [a, b, c, d]."""

    name: str
    matrix: list[complex]

    def apply(self, q: Qubit) -> Qubit:
        """Apply this gate to `q` and return a new Qubit (state is not mutated)."""
        a, b, c, d = self.matrix
        new_alpha = a * q.alpha + b * q.beta
        new_beta = c * q.alpha + d * q.beta
        return Qubit(alpha=new_alpha, beta=new_beta)
Methods:
apply
apply(q: Qubit) -> Qubit

Apply this gate to q and return a new Qubit (state is not mutated).

Source code in src\cds\quantum\circuit.py
def apply(self, q: Qubit) -> Qubit:
    """Apply this gate to `q` and return a new Qubit (state is not mutated)."""
    a, b, c, d = self.matrix
    new_alpha = a * q.alpha + b * q.beta
    new_beta = c * q.alpha + d * q.beta
    return Qubit(alpha=new_alpha, beta=new_beta)

Qubit dataclass

Single qubit state as (alpha, beta) amplitudes.

Source code in src\cds\quantum\circuit.py
@dataclass
class Qubit:
    """Single qubit state as (alpha, beta) amplitudes."""

    alpha: complex = 1 + 0j
    beta: complex = 0 + 0j

    def probabilities(self) -> tuple[float, float]:
        """Return (P(|0>), P(|1>)) measurement probabilities for this qubit."""
        p0 = abs(self.alpha) ** 2
        p1 = abs(self.beta) ** 2
        return (p0, p1)

    def normalize(self) -> None:
        """Renormalize the state amplitudes in-place to unit length."""
        # Complex amplitudes: the norm is sqrt(|alpha|^2 + |beta|^2). We sum
        # the squared magnitudes first and take a single sqrt — numerically
        # equivalent to math.hypot for reals, but math.hypot rejects complex
        # inputs, so this is the correct hypotenuse for the complex plane.
        mag = (abs(self.alpha) ** 2) + (abs(self.beta) ** 2)
        norm = math.sqrt(mag)
        if norm > 0:
            self.alpha /= norm
            self.beta /= norm
Methods:
probabilities
probabilities() -> tuple[float, float]

Return (P(|0>), P(|1>)) measurement probabilities for this qubit.

Source code in src\cds\quantum\circuit.py
def probabilities(self) -> tuple[float, float]:
    """Return (P(|0>), P(|1>)) measurement probabilities for this qubit."""
    p0 = abs(self.alpha) ** 2
    p1 = abs(self.beta) ** 2
    return (p0, p1)
normalize
normalize() -> None

Renormalize the state amplitudes in-place to unit length.

Source code in src\cds\quantum\circuit.py
def normalize(self) -> None:
    """Renormalize the state amplitudes in-place to unit length."""
    # Complex amplitudes: the norm is sqrt(|alpha|^2 + |beta|^2). We sum
    # the squared magnitudes first and take a single sqrt — numerically
    # equivalent to math.hypot for reals, but math.hypot rejects complex
    # inputs, so this is the correct hypotenuse for the complex plane.
    mag = (abs(self.alpha) ** 2) + (abs(self.beta) ** 2)
    norm = math.sqrt(mag)
    if norm > 0:
        self.alpha /= norm
        self.beta /= norm

QuantumRegister dataclass

N-qubit state vector. Amplitudes stored as list of 2^n complex numbers.

Source code in src\cds\quantum\multi_qubit.py
@dataclass
class QuantumRegister:
    """N-qubit state vector. Amplitudes stored as list of 2^n complex numbers."""

    n_qubits: int
    amplitudes: list[complex]

    @classmethod
    def zeros(cls, n: int) -> QuantumRegister:
        """All qubits in |0> state."""
        amps: list[complex] = [0 + 0j] * (2**n)
        amps[0] = 1 + 0j
        return cls(n_qubits=n, amplitudes=amps)

    @classmethod
    def from_bits(cls, n: int, value: int) -> QuantumRegister:
        """Computational basis state |value>."""
        amps: list[complex] = [0 + 0j] * (2**n)
        amps[value] = 1 + 0j
        return cls(n_qubits=n, amplitudes=amps)

    @property
    def size(self) -> int:
        """Number of amplitudes in the state vector (= 2**n_qubits)."""
        return len(self.amplitudes)

    def probabilities(self) -> list[float]:
        """List of |amplitude|^2 for each computational basis state."""
        return [abs(a) ** 2 for a in self.amplitudes]

    def normalize(self) -> None:
        """Renormalize the state vector in-place to unit length."""
        norm = math.sqrt(sum(abs(a) ** 2 for a in self.amplitudes))
        if norm > 0:
            self.amplitudes = [a / norm for a in self.amplitudes]

    def measure(self, seed: int | None = None) -> int:
        """Measure the register and collapse its state vector."""
        rng = random.Random(seed)
        probs = self.probabilities()
        r = rng.random()
        cumulative = 0.0
        for i, p in enumerate(probs):
            cumulative += p
            if p > 0 and r <= cumulative:
                # State Collapse logic
                # All other amplitudes become 0, measured state becomes 1.0
                new_amps = [0.0 + 0j] * len(self.amplitudes)
                new_amps[i] = 1.0 + 0j
                self.amplitudes = new_amps
                return i

        # Fallback for floating point edge cases
        final_idx = len(probs) - 1
        new_amps = [0.0 + 0j] * len(self.amplitudes)
        new_amps[final_idx] = 1.0 + 0j
        self.amplitudes = new_amps
        return final_idx

    def measure_shots(
        self,
        shots: int = 1000,
        seed: int | None = None,
    ) -> dict[str, int]:
        """Run multiple measurements, return counts as binary strings."""
        rng = random.Random(seed)
        counts: dict[str, int] = {}
        probs = self.probabilities()
        for _ in range(shots):
            r = rng.random()
            cumulative = 0.0
            result = len(probs) - 1
            for i, p in enumerate(probs):
                cumulative += p
                if r < cumulative:
                    result = i
                    break
            label = format(result, f"0{self.n_qubits}b")
            counts[label] = counts.get(label, 0) + 1
        return counts

    def expectation(self) -> float:
        """Expected value treating basis index as eigenvalue."""
        return sum(i * abs(a) ** 2 for i, a in enumerate(self.amplitudes))
Attributes
size property
size: int

Number of amplitudes in the state vector (= 2**n_qubits).

Methods:
zeros classmethod
zeros(n: int) -> QuantumRegister

All qubits in |0> state.

Source code in src\cds\quantum\multi_qubit.py
@classmethod
def zeros(cls, n: int) -> QuantumRegister:
    """All qubits in |0> state."""
    amps: list[complex] = [0 + 0j] * (2**n)
    amps[0] = 1 + 0j
    return cls(n_qubits=n, amplitudes=amps)
from_bits classmethod
from_bits(n: int, value: int) -> QuantumRegister

Computational basis state |value>.

Source code in src\cds\quantum\multi_qubit.py
@classmethod
def from_bits(cls, n: int, value: int) -> QuantumRegister:
    """Computational basis state |value>."""
    amps: list[complex] = [0 + 0j] * (2**n)
    amps[value] = 1 + 0j
    return cls(n_qubits=n, amplitudes=amps)
probabilities
probabilities() -> list[float]

List of |amplitude|^2 for each computational basis state.

Source code in src\cds\quantum\multi_qubit.py
def probabilities(self) -> list[float]:
    """List of |amplitude|^2 for each computational basis state."""
    return [abs(a) ** 2 for a in self.amplitudes]
normalize
normalize() -> None

Renormalize the state vector in-place to unit length.

Source code in src\cds\quantum\multi_qubit.py
def normalize(self) -> None:
    """Renormalize the state vector in-place to unit length."""
    norm = math.sqrt(sum(abs(a) ** 2 for a in self.amplitudes))
    if norm > 0:
        self.amplitudes = [a / norm for a in self.amplitudes]
measure
measure(seed: int | None = None) -> int

Measure the register and collapse its state vector.

Source code in src\cds\quantum\multi_qubit.py
def measure(self, seed: int | None = None) -> int:
    """Measure the register and collapse its state vector."""
    rng = random.Random(seed)
    probs = self.probabilities()
    r = rng.random()
    cumulative = 0.0
    for i, p in enumerate(probs):
        cumulative += p
        if p > 0 and r <= cumulative:
            # State Collapse logic
            # All other amplitudes become 0, measured state becomes 1.0
            new_amps = [0.0 + 0j] * len(self.amplitudes)
            new_amps[i] = 1.0 + 0j
            self.amplitudes = new_amps
            return i

    # Fallback for floating point edge cases
    final_idx = len(probs) - 1
    new_amps = [0.0 + 0j] * len(self.amplitudes)
    new_amps[final_idx] = 1.0 + 0j
    self.amplitudes = new_amps
    return final_idx
measure_shots
measure_shots(
    shots: int = 1000, seed: int | None = None
) -> dict[str, int]

Run multiple measurements, return counts as binary strings.

Source code in src\cds\quantum\multi_qubit.py
def measure_shots(
    self,
    shots: int = 1000,
    seed: int | None = None,
) -> dict[str, int]:
    """Run multiple measurements, return counts as binary strings."""
    rng = random.Random(seed)
    counts: dict[str, int] = {}
    probs = self.probabilities()
    for _ in range(shots):
        r = rng.random()
        cumulative = 0.0
        result = len(probs) - 1
        for i, p in enumerate(probs):
            cumulative += p
            if r < cumulative:
                result = i
                break
        label = format(result, f"0{self.n_qubits}b")
        counts[label] = counts.get(label, 0) + 1
    return counts
expectation
expectation() -> float

Expected value treating basis index as eigenvalue.

Source code in src\cds\quantum\multi_qubit.py
def expectation(self) -> float:
    """Expected value treating basis index as eigenvalue."""
    return sum(i * abs(a) ** 2 for i, a in enumerate(self.amplitudes))

Functions:

hadamard

hadamard() -> QuantumGate

Hadamard gate H = (1/sqrt(2)) * [[1, 1], [1, -1]].

Source code in src\cds\quantum\circuit.py
def hadamard() -> QuantumGate:
    """Hadamard gate H = (1/sqrt(2)) * [[1, 1], [1, -1]]."""
    s = 1 / math.sqrt(2)
    return QuantumGate("H", [s, s, s, -s])

pauli_x

pauli_x() -> QuantumGate

Pauli-X (NOT) gate X = [[0, 1], [1, 0]].

Source code in src\cds\quantum\circuit.py
def pauli_x() -> QuantumGate:
    """Pauli-X (NOT) gate X = [[0, 1], [1, 0]]."""
    return QuantumGate("X", [0, 1, 1, 0])

pauli_z

pauli_z() -> QuantumGate

Pauli-Z gate Z = [[1, 0], [0, -1]].

Source code in src\cds\quantum\circuit.py
def pauli_z() -> QuantumGate:
    """Pauli-Z gate Z = [[1, 0], [0, -1]]."""
    return QuantumGate("Z", [1, 0, 0, -1])

phase_gate

phase_gate(theta: float) -> QuantumGate

Phase rotation gate P(theta) = diag(1, e^{i*theta}).

Source code in src\cds\quantum\circuit.py
def phase_gate(theta: float) -> QuantumGate:
    """Phase rotation gate P(theta) = diag(1, e^{i*theta})."""
    return QuantumGate(f"P({theta:.2f})", [1, 0, 0, complex(math.cos(theta), math.sin(theta))])

bell_state

bell_state(which: int = 0) -> QuantumRegister

Create one of the 4 Bell states (0-3).

0: |Φ+> = (|00> + |11>) / √2 1: |Φ-> = (|00> - |11>) / √2 2: |Ψ+> = (|01> + |10>) / √2 3: |Ψ-> = (|01> - |10>) / √2

Source code in src\cds\quantum\multi_qubit.py
def bell_state(which: int = 0) -> QuantumRegister:
    """Create one of the 4 Bell states (0-3).

    0: |Φ+> = (|00> + |11>) / √2
    1: |Φ-> = (|00> - |11>) / √2
    2: |Ψ+> = (|01> + |10>) / √2
    3: |Ψ-> = (|01> - |10>) / √2
    """
    reg = QuantumRegister.zeros(2)
    if which in (2, 3):
        reg = x_gate(reg, 1)
    reg = h_gate(reg, 0)
    reg = cnot(reg, 0, 1)
    if which in (1, 3):
        reg = z_gate(reg, 0)
    return reg

cnot

cnot(
    reg: QuantumRegister, control: int, target: int
) -> QuantumRegister

Controlled-NOT gate.

Source code in src\cds\quantum\multi_qubit.py
def cnot(
    reg: QuantumRegister,
    control: int,
    target: int,
) -> QuantumRegister:
    """Controlled-NOT gate."""
    n = reg.n_qubits
    new_amps = list(reg.amplitudes)
    for i in range(1 << n):
        if i & (1 << control):
            j = i ^ (1 << target)
            if j > i:
                new_amps[i], new_amps[j] = reg.amplitudes[j], reg.amplitudes[i]
    return QuantumRegister(n_qubits=n, amplitudes=new_amps)

cz

cz(
    reg: QuantumRegister, control: int, target: int
) -> QuantumRegister

Controlled-Z gate.

Source code in src\cds\quantum\multi_qubit.py
def cz(
    reg: QuantumRegister,
    control: int,
    target: int,
) -> QuantumRegister:
    """Controlled-Z gate."""
    n = reg.n_qubits
    new_amps = list(reg.amplitudes)
    for i in range(1 << n):
        if (i & (1 << control)) and (i & (1 << target)):
            new_amps[i] = -reg.amplitudes[i]
    return QuantumRegister(n_qubits=n, amplitudes=new_amps)

ghz_state

ghz_state(n: int) -> QuantumRegister

GHZ state: (|00...0> + |11...1>) / √2

Source code in src\cds\quantum\multi_qubit.py
def ghz_state(n: int) -> QuantumRegister:
    """GHZ state: (|00...0> + |11...1>) / √2"""
    reg = QuantumRegister.zeros(n)
    reg = h_gate(reg, 0)
    for i in range(1, n):
        reg = cnot(reg, 0, i)
    return reg

h_gate

h_gate(
    reg: QuantumRegister, target: int
) -> QuantumRegister

Hadamard on qubit target.

Source code in src\cds\quantum\multi_qubit.py
def h_gate(reg: QuantumRegister, target: int) -> QuantumRegister:
    """Hadamard on qubit `target`."""
    s = 1 / math.sqrt(2)
    return _gate_2x2(reg, target, [s, s, s, -s])

is_entangled

is_entangled(reg: QuantumRegister) -> bool

Check if a 2-qubit state is entangled (not separable).

Uses concurrence: for |ψ> = a|00> + b|01> + c|10> + d|11>, concurrence = 2|ad - bc|. If > 0, it's entangled.

Source code in src\cds\quantum\multi_qubit.py
def is_entangled(reg: QuantumRegister) -> bool:
    """Check if a 2-qubit state is entangled (not separable).

    Uses concurrence: for |ψ> = a|00> + b|01> + c|10> + d|11>,
    concurrence = 2|ad - bc|. If > 0, it's entangled.
    """
    if reg.n_qubits != 2:
        raise ValueError("entanglement check only for 2-qubit states")
    a, b, c, d = reg.amplitudes
    concurrence = 2 * abs(a * d - b * c)
    return concurrence > CONCURRENCE_THRESHOLD

rz_gate

rz_gate(
    reg: QuantumRegister, target: int, theta: float
) -> QuantumRegister

Rotation around Z axis.

Source code in src\cds\quantum\multi_qubit.py
def rz_gate(
    reg: QuantumRegister,
    target: int,
    theta: float,
) -> QuantumRegister:
    """Rotation around Z axis."""
    e_neg = complex(math.cos(theta / 2), -math.sin(theta / 2))
    e_pos = complex(math.cos(theta / 2), math.sin(theta / 2))
    return _gate_2x2(reg, target, [e_neg, 0, 0, e_pos])

swap

swap(
    reg: QuantumRegister, q1: int, q2: int
) -> QuantumRegister

SWAP gate — exchange two qubits.

Source code in src\cds\quantum\multi_qubit.py
def swap(
    reg: QuantumRegister,
    q1: int,
    q2: int,
) -> QuantumRegister:
    """SWAP gate — exchange two qubits."""
    reg = cnot(reg, q1, q2)
    reg = cnot(reg, q2, q1)
    reg = cnot(reg, q1, q2)
    return reg

toffoli

toffoli(
    reg: QuantumRegister, c1: int, c2: int, target: int
) -> QuantumRegister

Toffoli (CCNOT) gate — 3-qubit controlled-controlled-NOT.

Source code in src\cds\quantum\multi_qubit.py
def toffoli(
    reg: QuantumRegister,
    c1: int,
    c2: int,
    target: int,
) -> QuantumRegister:
    """Toffoli (CCNOT) gate — 3-qubit controlled-controlled-NOT."""
    n = reg.n_qubits
    new_amps = list(reg.amplitudes)
    for i in range(1 << n):
        if (i & (1 << c1)) and (i & (1 << c2)):
            j = i ^ (1 << target)
            if j > i:
                new_amps[i], new_amps[j] = (
                    reg.amplitudes[j],
                    reg.amplitudes[i],
                )
    return QuantumRegister(n_qubits=n, amplitudes=new_amps)

x_gate

x_gate(
    reg: QuantumRegister, target: int
) -> QuantumRegister

Pauli-X (NOT) on qubit target.

Source code in src\cds\quantum\multi_qubit.py
def x_gate(reg: QuantumRegister, target: int) -> QuantumRegister:
    """Pauli-X (NOT) on qubit `target`."""
    return _gate_2x2(reg, target, [0, 1, 1, 0])

y_gate

y_gate(
    reg: QuantumRegister, target: int
) -> QuantumRegister

Pauli-Y on qubit target.

Source code in src\cds\quantum\multi_qubit.py
def y_gate(reg: QuantumRegister, target: int) -> QuantumRegister:
    """Pauli-Y on qubit `target`."""
    return _gate_2x2(reg, target, [0, -1j, 1j, 0])

z_gate

z_gate(
    reg: QuantumRegister, target: int
) -> QuantumRegister

Pauli-Z on qubit target.

Source code in src\cds\quantum\multi_qubit.py
def z_gate(reg: QuantumRegister, target: int) -> QuantumRegister:
    """Pauli-Z on qubit `target`."""
    return _gate_2x2(reg, target, [1, 0, 0, -1])

simulate

simulate(
    circuit: QuantumCircuit,
    shots: int = 1000,
    seed: int | None = None,
) -> dict[int, int]

Run a circuit many times and collect measurement statistics.

Optimized to compute the state vector only once, then probabilistically sample.

Source code in src\cds\quantum\simulator.py
def simulate(circuit: QuantumCircuit, shots: int = 1000, seed: int | None = None) -> dict[int, int]:
    """Run a circuit many times and collect measurement statistics.

    Optimized to compute the state vector only once, then probabilistically sample.
    """
    rng = random.Random(seed)

    # Compute the final quantum state exactly once (Massive performance boost)
    q = circuit.run()
    p0, _ = q.probabilities()

    # Probabilistically sample the distribution 'shots' times
    results = [0 if rng.random() < p0 else 1 for _ in range(shots)]
    return dict(Counter(results))

Scientific Computing

Curated physical constants and classical physics formulas (mechanics, waves, relativity, thermo).

cds.scientific

Scientific computing utilities.

Functions:

get_constant

get_constant(name: str) -> float

Look up a physical/mathematical constant by short name and return its value.

Raises:

Type Description
KeyError

if name is not in the table (lists valid names in the message).

Source code in src\cds\scientific\constants.py
def get_constant(name: str) -> float:
    """Look up a physical/mathematical constant by short name and return its value.

    Raises:
        KeyError: if `name` is not in the table (lists valid names in the message).
    """
    if name not in CONSTANTS:
        raise KeyError(f"unknown constant: {name}. available: {list(CONSTANTS.keys())}")
    return CONSTANTS[name][0]

de_broglie_wavelength

de_broglie_wavelength(
    mass: float, velocity: float
) -> float

lambda = h / (m * v)

Source code in src\cds\scientific\formulas.py
def de_broglie_wavelength(mass: float, velocity: float) -> float:
    """lambda = h / (m * v)"""
    h = get_constant("h")
    mv = mass * velocity
    if mv == 0:
        raise ValueError("momentum can't be zero")
    return h / mv

escape_velocity

escape_velocity(mass: float, radius: float) -> float

v_esc = sqrt(2GM/r)

Source code in src\cds\scientific\formulas.py
def escape_velocity(mass: float, radius: float) -> float:
    """v_esc = sqrt(2GM/r)"""
    G = get_constant("G")
    return math.sqrt(2 * G * mass / radius)

gravitational_force

gravitational_force(
    m1: float, m2: float, r: float
) -> float

F = G * m1 * m2 / r^2

Source code in src\cds\scientific\formulas.py
def gravitational_force(m1: float, m2: float, r: float) -> float:
    """F = G * m1 * m2 / r^2"""
    G = get_constant("G")
    if r == 0:
        raise ValueError("distance can't be zero")
    return G * m1 * m2 / r**2

ideal_gas_pressure

ideal_gas_pressure(
    n_moles: float, temperature: float, volume: float
) -> float

PV = nRT => P = nRT/V

Source code in src\cds\scientific\formulas.py
def ideal_gas_pressure(n_moles: float, temperature: float, volume: float) -> float:
    """PV = nRT => P = nRT/V"""
    R = get_constant("R")
    if volume <= 0:
        raise ValueError("volume must be positive")
    return n_moles * R * temperature / volume

kinetic_energy

kinetic_energy(mass: float, velocity: float) -> float

KE = 0.5 * m * v^2

Source code in src\cds\scientific\formulas.py
def kinetic_energy(mass: float, velocity: float) -> float:
    """KE = 0.5 * m * v^2"""
    return 0.5 * mass * velocity**2

photon_energy

photon_energy(frequency: float) -> float

E = h * f

Source code in src\cds\scientific\formulas.py
def photon_energy(frequency: float) -> float:
    """E = h * f"""
    h = get_constant("h")
    return h * frequency

schwarzschild_radius

schwarzschild_radius(mass: float) -> float

r_s = 2GM/c^2

Source code in src\cds\scientific\formulas.py
def schwarzschild_radius(mass: float) -> float:
    """r_s = 2GM/c^2"""
    G = get_constant("G")
    c = get_constant("c")
    return 2 * G * mass / c**2

wave_frequency

wave_frequency(wavelength: float) -> float

f = c / lambda

Source code in src\cds\scientific\formulas.py
def wave_frequency(wavelength: float) -> float:
    """f = c / lambda"""
    c = get_constant("c")
    if wavelength <= 0:
        raise ValueError("wavelength must be positive")
    return c / wavelength

Graph Theory

BFS, DFS, Dijkstra shortest paths, Kruskal MST, topological sort, cycle detection.

cds.graph

Graph theory algorithms — BFS, DFS, Dijkstra, Kruskal MST.

Classes

Graph dataclass

Adjacency-list graph representation.

Supports both directed and undirected graphs with weighted edges.

Source code in src\cds\graph\algorithms.py
@dataclass
class Graph:
    """Adjacency-list graph representation.

    Supports both directed and undirected graphs with weighted edges.
    """

    n_vertices: int
    directed: bool = False
    adj: dict[int, list[tuple[int, float]]] = field(default_factory=dict)
    edges: list[Edge] = field(default_factory=list)

    def add_edge(self, u: int, v: int, weight: float = 1.0) -> None:
        """Add an edge from u to v (and v to u if undirected)."""
        self.adj.setdefault(u, []).append((v, weight))
        if not self.directed:
            self.adj.setdefault(v, []).append((u, weight))
        self.edges.append(Edge(u, v, weight))
Methods:
add_edge
add_edge(u: int, v: int, weight: float = 1.0) -> None

Add an edge from u to v (and v to u if undirected).

Source code in src\cds\graph\algorithms.py
def add_edge(self, u: int, v: int, weight: float = 1.0) -> None:
    """Add an edge from u to v (and v to u if undirected)."""
    self.adj.setdefault(u, []).append((v, weight))
    if not self.directed:
        self.adj.setdefault(v, []).append((u, weight))
    self.edges.append(Edge(u, v, weight))

Functions:

bfs

bfs(graph: Graph, start: int) -> list[int]

Breadth-first search traversal.

Returns vertices in BFS order starting from start. Time complexity: O(V + E) [CLRS §22.2]

Source code in src\cds\graph\algorithms.py
def bfs(graph: Graph, start: int) -> list[int]:
    """Breadth-first search traversal.

    Returns vertices in BFS order starting from `start`.
    Time complexity: O(V + E)  [CLRS §22.2]
    """
    visited: set[int] = set()
    order: list[int] = []
    queue: deque[int] = deque([start])
    visited.add(start)

    while queue:
        node = queue.popleft()
        order.append(node)
        for neighbor, _ in graph.adj.get(node, []):
            if neighbor not in visited:
                visited.add(neighbor)
                queue.append(neighbor)
    return order

dfs

dfs(graph: Graph, start: int) -> list[int]

Depth-first search traversal (iterative).

Returns vertices in DFS order starting from start. Time complexity: O(V + E) [CLRS §22.3]

Source code in src\cds\graph\algorithms.py
def dfs(graph: Graph, start: int) -> list[int]:
    """Depth-first search traversal (iterative).

    Returns vertices in DFS order starting from `start`.
    Time complexity: O(V + E)  [CLRS §22.3]
    """
    visited: set[int] = set()
    order: list[int] = []
    stack: list[int] = [start]

    while stack:
        node = stack.pop()
        if node in visited:
            continue
        visited.add(node)
        order.append(node)
        for neighbor, _ in reversed(graph.adj.get(node, [])):
            if neighbor not in visited:
                stack.append(neighbor)
    return order

dijkstra

dijkstra(
    graph: Graph, start: int
) -> tuple[dict[int, float], dict[int, int | None]]

Dijkstra's shortest path algorithm.

Returns (distances, predecessors) from start to all reachable vertices. Time complexity: O((V + E) log V) with binary heap [Dijkstra 1959]

Parameters:

Name Type Description Default
graph Graph

weighted graph (non-negative weights)

required
start int

source vertex

required

Returns:

Name Type Description
distances dict[int, float]

dict mapping vertex -> shortest distance from start

predecessors dict[int, int | None]

dict mapping vertex -> previous vertex on shortest path

Raises:

Type Description
ValueError

if a negative weight is encountered

Source code in src\cds\graph\algorithms.py
def dijkstra(
    graph: Graph,
    start: int,
) -> tuple[dict[int, float], dict[int, int | None]]:
    """Dijkstra's shortest path algorithm.

    Returns (distances, predecessors) from `start` to all reachable vertices.
    Time complexity: O((V + E) log V) with binary heap  [Dijkstra 1959]

    Args:
        graph: weighted graph (non-negative weights)
        start: source vertex

    Returns:
        distances: dict mapping vertex -> shortest distance from start
        predecessors: dict mapping vertex -> previous vertex on shortest path

    Raises:
        ValueError: if a negative weight is encountered
    """
    dist: dict[int, float] = {start: 0.0}
    prev: dict[int, int | None] = {start: None}
    heap: list[tuple[float, int]] = [(0.0, start)]
    visited: set[int] = set()

    while heap:
        d, u = heapq.heappop(heap)
        if u in visited:
            continue
        visited.add(u)

        for v, w in graph.adj.get(u, []):
            if w < 0:
                raise ValueError("negative edge weights not supported")
            new_dist = d + w
            if v not in dist or new_dist < dist[v]:
                dist[v] = new_dist
                prev[v] = u
                heapq.heappush(heap, (new_dist, v))

    return dist, prev

has_cycle

has_cycle(graph: Graph) -> bool

Detect if a directed graph has a cycle using DFS coloring.

Time complexity: O(V + E)

WHITE=0 (unvisited), GRAY=1 (in current DFS path), BLACK=2 (finished).

Source code in src\cds\graph\algorithms.py
def has_cycle(graph: Graph) -> bool:
    """Detect if a directed graph has a cycle using DFS coloring.

    Time complexity: O(V + E)

    WHITE=0 (unvisited), GRAY=1 (in current DFS path), BLACK=2 (finished).
    """
    WHITE, GRAY, BLACK = 0, 1, 2
    color: dict[int, int] = {i: WHITE for i in range(graph.n_vertices)}

    def _dfs(u: int) -> bool:
        color[u] = GRAY
        for v, _ in graph.adj.get(u, []):
            if color[v] == GRAY:
                return True
            if color[v] == WHITE and _dfs(v):
                return True
        color[u] = BLACK
        return False

    return any(_dfs(v) for v in range(graph.n_vertices) if color[v] == WHITE)

kruskal_mst

kruskal_mst(graph: Graph) -> tuple[list[Edge], float]

Kruskal's minimum spanning tree algorithm.

Time complexity: O(E log E) [Kruskal 1956]

Parameters:

Name Type Description Default
graph Graph

undirected weighted graph

required

Returns:

Name Type Description
mst_edges list[Edge]

list of edges in the MST

total_weight float

sum of edge weights in the MST

Source code in src\cds\graph\algorithms.py
def kruskal_mst(graph: Graph) -> tuple[list[Edge], float]:
    """Kruskal's minimum spanning tree algorithm.

    Time complexity: O(E log E)  [Kruskal 1956]

    Args:
        graph: undirected weighted graph

    Returns:
        mst_edges: list of edges in the MST
        total_weight: sum of edge weights in the MST
    """
    sorted_edges = sorted(graph.edges, key=lambda e: e.weight)
    parent = {i: i for i in range(graph.n_vertices)}
    rank = {i: 0 for i in range(graph.n_vertices)}

    mst: list[Edge] = []
    total = 0.0

    for edge in sorted_edges:
        if _union(parent, rank, edge.src, edge.dst):
            mst.append(edge)
            total += edge.weight
            if len(mst) == graph.n_vertices - 1:
                break

    return mst, total

topological_sort

topological_sort(graph: Graph) -> list[int]

Kahn's algorithm for topological sort of a DAG.

Time complexity: O(V + E) [CLRS §22.4]

Parameters:

Name Type Description Default
graph Graph

directed acyclic graph

required

Returns:

Type Description
list[int]

list of vertices in topological order

Raises:

Type Description
ValueError

if graph contains a cycle

Source code in src\cds\graph\algorithms.py
def topological_sort(graph: Graph) -> list[int]:
    """Kahn's algorithm for topological sort of a DAG.

    Time complexity: O(V + E)  [CLRS §22.4]

    Args:
        graph: directed acyclic graph

    Returns:
        list of vertices in topological order

    Raises:
        ValueError: if graph contains a cycle
    """
    in_degree: dict[int, int] = {i: 0 for i in range(graph.n_vertices)}
    for u in graph.adj:
        for v, _ in graph.adj[u]:
            in_degree[v] = in_degree.get(v, 0) + 1

    queue: deque[int] = deque(v for v in range(graph.n_vertices) if in_degree.get(v, 0) == 0)
    order: list[int] = []

    while queue:
        u = queue.popleft()
        order.append(u)
        for v, _ in graph.adj.get(u, []):
            in_degree[v] -= 1
            if in_degree[v] == 0:
                queue.append(v)

    if len(order) != graph.n_vertices:
        raise ValueError("graph contains a cycle")
    return order

Mathematical Modeling

Symbolic algebra for equation development: an expression tree (+, -, *, /, **, sin, cos, exp, log, sqrt) with symbolic differentiation, simplification, LaTeX export, named MathModel systems of equations, and numeric solvers (root finding and parameter fitting) built on cds.optimization.

cds.modeling

Mathematical modeling — symbolic expressions, equation systems, and solvers.

Classes

Add

Bases: _Binary

left + right.

Source code in src\cds\modeling\expression.py
class Add(_Binary):
    """``left + right``."""

    def evaluate(self, env: dict[str, float]) -> float:
        return self.left.evaluate(env) + self.right.evaluate(env)

    def diff(self, var: str) -> Expression:
        return Add(self.left.diff(var), self.right.diff(var))

    def simplify(self) -> Expression:
        left = self.left.simplify()
        right = self.right.simplify()
        if isinstance(left, Constant) and isinstance(right, Constant):
            return Constant(left.value + right.value)
        if isinstance(left, Constant) and left.value == 0.0:
            return right
        if isinstance(right, Constant) and right.value == 0.0:
            return left
        return Add(left, right)

    def to_str(self) -> str:
        return f"({self.left.to_str()} + {self.right.to_str()})"

    def to_latex(self) -> str:
        return f"{self.left.to_latex()} + {self.right.to_latex()}"

Constant

Bases: Expression

A literal numeric value in an expression tree.

Source code in src\cds\modeling\expression.py
class Constant(Expression):
    """A literal numeric value in an expression tree."""

    __slots__ = ("value",)

    def __init__(self, value: float) -> None:
        self.value = float(value)

    def evaluate(self, env: dict[str, float]) -> float:
        return self.value

    def diff(self, var: str) -> Expression:
        return Constant(0.0)

    def variables(self) -> set[str]:
        return set()

    def to_str(self) -> str:
        # Render integers without a trailing ".0" for readability.
        if self.value == int(self.value):
            return str(int(self.value))
        return repr(self.value)

    def to_latex(self) -> str:
        if self.value == int(self.value):
            return str(int(self.value))
        return repr(self.value)

    def __eq__(self, other: object) -> bool:
        return isinstance(other, Constant) and other.value == self.value

    def __hash__(self) -> int:
        return hash(("Constant", self.value))

Cos

Bases: _Unary

cos(operand).

Source code in src\cds\modeling\expression.py
class Cos(_Unary):
    """``cos(operand)``."""

    def evaluate(self, env: dict[str, float]) -> float:
        return math.cos(self.operand.evaluate(env))

    def diff(self, var: str) -> Expression:
        # Chain rule: d/dx cos(u) = -sin(u) * u'
        return Mul(Mul(Constant(-1.0), Sin(self.operand)), self.operand.diff(var))

    def simplify(self) -> Expression:
        inner = self.operand.simplify()
        if isinstance(inner, Constant):
            return Constant(math.cos(inner.value))
        return Cos(inner)

    def to_str(self) -> str:
        return f"cos({self.operand.to_str()})"

    def to_latex(self) -> str:
        return f"\\cos\\left({self.operand.to_latex()}\\right)"

Div

Bases: _Binary

left / right (quotient rule for differentiation).

Source code in src\cds\modeling\expression.py
class Div(_Binary):
    """``left / right`` (quotient rule for differentiation)."""

    def evaluate(self, env: dict[str, float]) -> float:
        return self.left.evaluate(env) / self.right.evaluate(env)

    def diff(self, var: str) -> Expression:
        # Quotient rule: d(u/v) = (u'v - uv') / v^2
        return Div(
            Sub(
                Mul(self.left.diff(var), self.right),
                Mul(self.left, self.right.diff(var)),
            ),
            Pow(self.right, Constant(2.0)),
        )

    def simplify(self) -> Expression:
        left = self.left.simplify()
        right = self.right.simplify()
        if isinstance(left, Constant) and isinstance(right, Constant):
            return Constant(left.value / right.value)
        if isinstance(left, Constant) and left.value == 0.0:
            return Constant(0.0)
        if isinstance(right, Constant) and right.value == 1.0:
            return left
        return Div(left, right)

    def to_str(self) -> str:
        return f"({self.left.to_str()} / {self.right.to_str()})"

    def to_latex(self) -> str:
        return f"\\frac{{{self.left.to_latex()}}}{{{self.right.to_latex()}}}"

Exp

Bases: _Unary

e ** operand (the exponential function).

Source code in src\cds\modeling\expression.py
class Exp(_Unary):
    """``e ** operand`` (the exponential function)."""

    def evaluate(self, env: dict[str, float]) -> float:
        return math.exp(self.operand.evaluate(env))

    def diff(self, var: str) -> Expression:
        # Chain rule: d/dx exp(u) = exp(u) * u'
        return Mul(Exp(self.operand), self.operand.diff(var))

    def simplify(self) -> Expression:
        inner = self.operand.simplify()
        if isinstance(inner, Constant):
            return Constant(math.exp(inner.value))
        return Exp(inner)

    def to_str(self) -> str:
        return f"exp({self.operand.to_str()})"

    def to_latex(self) -> str:
        return f"e^{{{self.operand.to_latex()}}}"

Expression

Abstract base for every node in the symbolic expression tree.

Subclasses implement :meth:evaluate, :meth:diff, :meth:variables, :meth:simplify, :meth:to_latex, and :meth:to_str. The dunders (__add__ etc.) live here so arithmetic works uniformly for all nodes and for mixing nodes with plain Python numbers.

Source code in src\cds\modeling\expression.py
class Expression:
    """Abstract base for every node in the symbolic expression tree.

    Subclasses implement :meth:`evaluate`, :meth:`diff`, :meth:`variables`,
    :meth:`simplify`, :meth:`to_latex`, and :meth:`to_str`. The dunders
    (``__add__`` etc.) live here so arithmetic works uniformly for all nodes
    and for mixing nodes with plain Python numbers.
    """

    # ------------------------------------------------------------------ #
    # Abstract interface — every subclass overrides these.
    # ------------------------------------------------------------------ #
    def evaluate(self, env: dict[str, float]) -> float:  # pragma: no cover
        """Evaluate this expression to a float using the variable bindings in ``env``."""
        raise NotImplementedError

    def diff(self, var: str) -> Expression:  # pragma: no cover
        """Return the symbolic derivative of this expression w.r.t. ``var``."""
        raise NotImplementedError

    def variables(self) -> set[str]:  # pragma: no cover
        """Return the set of free variable names appearing in this expression."""
        raise NotImplementedError

    def to_str(self) -> str:  # pragma: no cover
        """Render this expression as a human-readable infix string."""
        raise NotImplementedError

    def to_latex(self) -> str:  # pragma: no cover
        """Render this expression as a LaTeX math string."""
        raise NotImplementedError

    # ------------------------------------------------------------------ #
    # Convenience — built on the abstract methods, so shared by all nodes.
    # ------------------------------------------------------------------ #
    def simplify(self) -> Expression:
        """Constant-fold and apply algebraic identities to simplify this tree."""
        return self

    def subs(self, **values: float) -> Expression:
        """Substitute the given ``name=value`` pairs, returning a new expression.

        Names not present in this expression are ignored, so partial
        substitution is safe.
        """
        return _subs(self, values)

    def to_func(self, *var_names: str) -> Callable[..., float]:
        """Compile this expression into a callable ``f(*args) -> float``.

        The positional argument order is ``var_names``; this is the shape
        :mod:`cds.optimization` and :mod:`cds.modeling.solver` expect.
        """
        unknown = set(var_names) - self.variables()
        if unknown:
            raise ValueError(f"to_func var_names not in expression: {sorted(unknown)}")
        for needed in self.variables():
            if needed not in var_names:
                raise ValueError(f"to_func missing variable {needed!r} in var_names")
        names = list(var_names)

        def _f(*args: float) -> float:
            if len(args) != len(names):
                raise ValueError(f"expected {len(names)} args, got {len(args)}")
            env = dict(zip(names, args))
            return self.evaluate(env)

        return _f

    # ------------------------------------------------------------------ #
    # Operator overloads — promote numbers to Constant so math reads naturally.
    # ------------------------------------------------------------------ #
    def __add__(self, other: Expression | float | int) -> Expression:
        return Add(self, _coerce(other))

    def __radd__(self, other: float | int) -> Expression:
        return Add(_coerce(other), self)

    def __sub__(self, other: Expression | float | int) -> Expression:
        return Sub(self, _coerce(other))

    def __rsub__(self, other: float | int) -> Expression:
        return Sub(_coerce(other), self)

    def __mul__(self, other: Expression | float | int) -> Expression:
        return Mul(self, _coerce(other))

    def __rmul__(self, other: float | int) -> Expression:
        return Mul(_coerce(other), self)

    def __truediv__(self, other: Expression | float | int) -> Expression:
        return Div(self, _coerce(other))

    def __rtruediv__(self, other: float | int) -> Expression:
        return Div(_coerce(other), self)

    def __pow__(self, exponent: Expression | float | int) -> Expression:
        return Pow(self, _coerce(exponent))

    def __rpow__(self, base: float | int) -> Expression:
        return Pow(_coerce(base), self)

    def __neg__(self) -> Expression:
        return Mul(Constant(-1.0), self)

    def __pos__(self) -> Expression:
        return self

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}({self.to_str()})"
Methods:
evaluate
evaluate(env: dict[str, float]) -> float

Evaluate this expression to a float using the variable bindings in env.

Source code in src\cds\modeling\expression.py
def evaluate(self, env: dict[str, float]) -> float:  # pragma: no cover
    """Evaluate this expression to a float using the variable bindings in ``env``."""
    raise NotImplementedError
diff
diff(var: str) -> Expression

Return the symbolic derivative of this expression w.r.t. var.

Source code in src\cds\modeling\expression.py
def diff(self, var: str) -> Expression:  # pragma: no cover
    """Return the symbolic derivative of this expression w.r.t. ``var``."""
    raise NotImplementedError
variables
variables() -> set[str]

Return the set of free variable names appearing in this expression.

Source code in src\cds\modeling\expression.py
def variables(self) -> set[str]:  # pragma: no cover
    """Return the set of free variable names appearing in this expression."""
    raise NotImplementedError
to_str
to_str() -> str

Render this expression as a human-readable infix string.

Source code in src\cds\modeling\expression.py
def to_str(self) -> str:  # pragma: no cover
    """Render this expression as a human-readable infix string."""
    raise NotImplementedError
to_latex
to_latex() -> str

Render this expression as a LaTeX math string.

Source code in src\cds\modeling\expression.py
def to_latex(self) -> str:  # pragma: no cover
    """Render this expression as a LaTeX math string."""
    raise NotImplementedError
simplify
simplify() -> Expression

Constant-fold and apply algebraic identities to simplify this tree.

Source code in src\cds\modeling\expression.py
def simplify(self) -> Expression:
    """Constant-fold and apply algebraic identities to simplify this tree."""
    return self
subs
subs(**values: float) -> Expression

Substitute the given name=value pairs, returning a new expression.

Names not present in this expression are ignored, so partial substitution is safe.

Source code in src\cds\modeling\expression.py
def subs(self, **values: float) -> Expression:
    """Substitute the given ``name=value`` pairs, returning a new expression.

    Names not present in this expression are ignored, so partial
    substitution is safe.
    """
    return _subs(self, values)
to_func
to_func(*var_names: str) -> Callable[..., float]

Compile this expression into a callable f(*args) -> float.

The positional argument order is var_names; this is the shape :mod:cds.optimization and :mod:cds.modeling.solver expect.

Source code in src\cds\modeling\expression.py
def to_func(self, *var_names: str) -> Callable[..., float]:
    """Compile this expression into a callable ``f(*args) -> float``.

    The positional argument order is ``var_names``; this is the shape
    :mod:`cds.optimization` and :mod:`cds.modeling.solver` expect.
    """
    unknown = set(var_names) - self.variables()
    if unknown:
        raise ValueError(f"to_func var_names not in expression: {sorted(unknown)}")
    for needed in self.variables():
        if needed not in var_names:
            raise ValueError(f"to_func missing variable {needed!r} in var_names")
    names = list(var_names)

    def _f(*args: float) -> float:
        if len(args) != len(names):
            raise ValueError(f"expected {len(names)} args, got {len(args)}")
        env = dict(zip(names, args))
        return self.evaluate(env)

    return _f

Log

Bases: _Unary

ln(operand) (the natural logarithm, base e).

Source code in src\cds\modeling\expression.py
class Log(_Unary):
    """``ln(operand)`` (the natural logarithm, base *e*)."""

    def evaluate(self, env: dict[str, float]) -> float:
        return math.log(self.operand.evaluate(env))

    def diff(self, var: str) -> Expression:
        # Chain rule: d/dx ln(u) = u' / u
        return Div(self.operand.diff(var), self.operand)

    def simplify(self) -> Expression:
        inner = self.operand.simplify()
        if isinstance(inner, Constant):
            return Constant(math.log(inner.value))
        return Log(inner)

    def to_str(self) -> str:
        return f"log({self.operand.to_str()})"

    def to_latex(self) -> str:
        return f"\\ln\\left({self.operand.to_latex()}\\right)"

Mul

Bases: _Binary

left * right (product rule for differentiation).

Source code in src\cds\modeling\expression.py
class Mul(_Binary):
    """``left * right`` (product rule for differentiation)."""

    def evaluate(self, env: dict[str, float]) -> float:
        return self.left.evaluate(env) * self.right.evaluate(env)

    def diff(self, var: str) -> Expression:
        # Product rule: d(uv) = u'v + uv'
        return Add(
            Mul(self.left.diff(var), self.right),
            Mul(self.left, self.right.diff(var)),
        )

    def simplify(self) -> Expression:
        left = self.left.simplify()
        right = self.right.simplify()
        if isinstance(left, Constant) and isinstance(right, Constant):
            return Constant(left.value * right.value)
        if isinstance(left, Constant):
            if left.value == 0.0:
                return Constant(0.0)
            if left.value == 1.0:
                return right
        if isinstance(right, Constant):
            if right.value == 0.0:
                return Constant(0.0)
            if right.value == 1.0:
                return left
        return Mul(left, right)

    def to_str(self) -> str:
        return f"({self.left.to_str()} * {self.right.to_str()})"

    def to_latex(self) -> str:
        return f"{self.left.to_latex()} \\cdot {self.right.to_latex()}"

Pow

Bases: _Binary

base ** exponent.

Differentiation handles two useful cases: a constant exponent (d/dx u^c = c * u^(c-1) * u') and a constant base (d/dx c^u = c^u * ln(c) * u'). The fully general case u^v is handled via logarithmic differentiation: u^v * (v' * ln(u) + v * u'/u).

Source code in src\cds\modeling\expression.py
class Pow(_Binary):
    """``base ** exponent``.

    Differentiation handles two useful cases: a constant exponent
    (``d/dx u^c = c * u^(c-1) * u'``) and a constant base
    (``d/dx c^u = c^u * ln(c) * u'``). The fully general case
    ``u^v`` is handled via logarithmic differentiation:
    ``u^v * (v' * ln(u) + v * u'/u)``.
    """

    def evaluate(self, env: dict[str, float]) -> float:
        return float(self.left.evaluate(env) ** self.right.evaluate(env))

    def diff(self, var: str) -> Expression:
        base = self.left
        exp = self.right
        base_has = var in base.variables()
        exp_has = var in exp.variables()

        if not base_has and not exp_has:
            return Constant(0.0)
        if exp_has and not base_has:
            # d/dx c^u = c^u * ln(c) * u'
            return Mul(
                Mul(Pow(base, exp), Log(base)),
                exp.diff(var),
            )
        if base_has and not exp_has:
            # d/dx u^c = c * u^(c-1) * u'
            return Mul(
                Mul(exp, Pow(base, Sub(exp, Constant(1.0)))),
                base.diff(var),
            )
        # General case u^v: u^v * (v' * ln(u) + v * u'/u)
        return Mul(
            Pow(base, exp),
            Add(
                Mul(exp.diff(var), Log(base)),
                Div(Mul(exp, base.diff(var)), base),
            ),
        )

    def simplify(self) -> Expression:
        base = self.left.simplify()
        exp = self.right.simplify()
        if isinstance(base, Constant) and isinstance(exp, Constant):
            return Constant(base.value**exp.value)
        if isinstance(exp, Constant):
            if exp.value == 0.0:
                return Constant(1.0)
            if exp.value == 1.0:
                return base
        return Pow(base, exp)

    def to_str(self) -> str:
        return f"({self.left.to_str()} ** {self.right.to_str()})"

    def to_latex(self) -> str:
        return f"{self.left.to_latex()}^{{{self.right.to_latex()}}}"

Sin

Bases: _Unary

sin(operand).

Source code in src\cds\modeling\expression.py
class Sin(_Unary):
    """``sin(operand)``."""

    def evaluate(self, env: dict[str, float]) -> float:
        return math.sin(self.operand.evaluate(env))

    def diff(self, var: str) -> Expression:
        # Chain rule: d/dx sin(u) = cos(u) * u'
        return Mul(Cos(self.operand), self.operand.diff(var))

    def simplify(self) -> Expression:
        inner = self.operand.simplify()
        if isinstance(inner, Constant):
            return Constant(math.sin(inner.value))
        return Sin(inner)

    def to_str(self) -> str:
        return f"sin({self.operand.to_str()})"

    def to_latex(self) -> str:
        return f"\\sin\\left({self.operand.to_latex()}\\right)"

Sqrt

Bases: _Unary

sqrt(operand) (the principal square root).

Source code in src\cds\modeling\expression.py
class Sqrt(_Unary):
    """``sqrt(operand)`` (the principal square root)."""

    def evaluate(self, env: dict[str, float]) -> float:
        return math.sqrt(self.operand.evaluate(env))

    def diff(self, var: str) -> Expression:
        # Chain rule: d/dx sqrt(u) = u' / (2 * sqrt(u))
        return Div(
            self.operand.diff(var),
            Mul(Constant(2.0), Sqrt(self.operand)),
        )

    def simplify(self) -> Expression:
        inner = self.operand.simplify()
        if isinstance(inner, Constant):
            return Constant(math.sqrt(inner.value))
        return Sqrt(inner)

    def to_str(self) -> str:
        return f"sqrt({self.operand.to_str()})"

    def to_latex(self) -> str:
        return f"\\sqrt{{{self.operand.to_latex()}}}"

Sub

Bases: _Binary

left - right.

Source code in src\cds\modeling\expression.py
class Sub(_Binary):
    """``left - right``."""

    def evaluate(self, env: dict[str, float]) -> float:
        return self.left.evaluate(env) - self.right.evaluate(env)

    def diff(self, var: str) -> Expression:
        return Sub(self.left.diff(var), self.right.diff(var))

    def simplify(self) -> Expression:
        left = self.left.simplify()
        right = self.right.simplify()
        if isinstance(left, Constant) and isinstance(right, Constant):
            return Constant(left.value - right.value)
        if isinstance(right, Constant) and right.value == 0.0:
            return left
        return Sub(left, right)

    def to_str(self) -> str:
        return f"({self.left.to_str()} - {self.right.to_str()})"

    def to_latex(self) -> str:
        return f"{self.left.to_latex()} - {self.right.to_latex()}"

Variable

Bases: Expression

A named symbolic variable (e.g. x, theta).

Source code in src\cds\modeling\expression.py
class Variable(Expression):
    """A named symbolic variable (e.g. ``x``, ``theta``)."""

    __slots__ = ("name",)

    def __init__(self, name: str) -> None:
        self.name = name

    def evaluate(self, env: dict[str, float]) -> float:
        if self.name not in env:
            raise ValueError(f"no value bound for variable {self.name!r}")
        return env[self.name]

    def diff(self, var: str) -> Expression:
        return Constant(1.0) if var == self.name else Constant(0.0)

    def variables(self) -> set[str]:
        return {self.name}

    def to_str(self) -> str:
        return self.name

    def to_latex(self) -> str:
        return self.name

    def __eq__(self, other: object) -> bool:
        return isinstance(other, Variable) and other.name == self.name

    def __hash__(self) -> int:
        return hash(("Variable", self.name))

MathModel dataclass

A named system of symbolic equations sharing parameters and variables.

Attributes:

Name Type Description
name str

human-readable model title (used in :meth:to_markdown).

equations list[tuple[str, Expression]]

ordered (label, expression) pairs. Labels are the equation names callers refer to (e.g. "velocity"); expressions are symbolic :class:Expression trees.

parameters dict[str, float]

constant values substituted during evaluation. They shadow variables of the same name, mirroring how a physicist treats g vs t.

variables list[str]

the declared free variables. Recorded explicitly so the model is self-describing even before any equation is inspected.

description str | None

optional one-line summary.

Source code in src\cds\modeling\model.py
@dataclass
class MathModel:
    """A named system of symbolic equations sharing parameters and variables.

    Attributes:
        name: human-readable model title (used in :meth:`to_markdown`).
        equations: ordered ``(label, expression)`` pairs. Labels are the
            equation names callers refer to (e.g. ``"velocity"``);
            expressions are symbolic :class:`Expression` trees.
        parameters: constant values substituted during evaluation. They
            shadow variables of the same name, mirroring how a physicist
            treats ``g`` vs ``t``.
        variables: the declared free variables. Recorded explicitly so the
            model is self-describing even before any equation is inspected.
        description: optional one-line summary.
    """

    name: str
    equations: list[tuple[str, Expression]] = field(default_factory=list)
    parameters: dict[str, float] = field(default_factory=dict)
    variables: list[str] = field(default_factory=list)
    description: str | None = None

    # ------------------------------------------------------------------ #
    # Construction helpers
    # ------------------------------------------------------------------ #
    def add_equation(self, label: str, expr: Expression) -> None:
        """Append a named equation to the system."""
        self.equations.append((label, expr))

    def set_parameter(self, name: str, value: float) -> None:
        """Bind or update a named parameter value."""
        self.parameters[name] = float(value)

    # ------------------------------------------------------------------ #
    # Analysis
    # ------------------------------------------------------------------ #
    def evaluate(self, env: dict[str, float]) -> dict[str, float]:
        """Evaluate every equation, merging parameters into the bindings.

        Args:
            env: values for the free variables (parameters override these
                if a name collides).

        Returns:
            mapping of equation label to its evaluated numeric value.

        Raises:
            ValueError: if a free variable has no binding (propagated from
                :meth:`Expression.evaluate`).
        """
        merged: dict[str, float] = {**env, **self.parameters}
        return {label: expr.evaluate(merged) for label, expr in self.equations}

    def equation(self, label: str) -> Expression:
        """Return the expression for a named equation.

        Raises:
            KeyError: if ``label`` is not in this model.
        """
        for name, expr in self.equations:
            if name == label:
                return expr
        raise KeyError(f"no equation labelled {label!r} in model {self.name!r}")

    def gradient(self, label: str, var: str) -> Expression:
        """Symbolic partial derivative of one equation w.r.t. one variable."""
        return self.equation(label).diff(var)

    def jacobian(self, var: str) -> dict[str, Expression]:
        """Symbolic partial derivative of *every* equation w.r.t. ``var``.

        Returns:
            mapping of equation label to its derivative expression.
        """
        return {label: expr.diff(var) for label, expr in self.equations}

    def free_variables(self) -> set[str]:
        """All variable names that actually appear in some equation."""
        seen: set[str] = set()
        for _, expr in self.equations:
            seen |= expr.variables()
        return seen - set(self.parameters)

    # ------------------------------------------------------------------ #
    # Rendering
    # ------------------------------------------------------------------ #
    def to_markdown(self) -> str:
        """Render this model as a structured Markdown document."""
        lines: list[str] = [f"# Model: {self.name}", ""]
        if self.description:
            lines += [self.description, ""]
        if self.parameters:
            lines.append("## Parameters")
            for name, value in self.parameters.items():
                lines.append(f"- `{name}` = {value}")
            lines.append("")
        if self.variables:
            lines += [
                "## Variables",
                ", ".join(f"`{v}`" for v in self.variables),
                "",
            ]
        lines += ["## Equations"]
        for label, expr in self.equations:
            lines.append(f"- **{label}**: `{expr.to_str()}`")
        lines.append("")
        return "\n".join(lines)

    def to_latex(self) -> str:
        """Render the model's equations as a LaTeX align block."""
        rows = [f"{label} &= {expr.to_latex()}" for label, expr in self.equations]
        body = " \\\\ ".join(rows)
        return f"\\begin{{align*}}\n{body}\n\\end{{align*}}"
Methods:
add_equation
add_equation(label: str, expr: Expression) -> None

Append a named equation to the system.

Source code in src\cds\modeling\model.py
def add_equation(self, label: str, expr: Expression) -> None:
    """Append a named equation to the system."""
    self.equations.append((label, expr))
set_parameter
set_parameter(name: str, value: float) -> None

Bind or update a named parameter value.

Source code in src\cds\modeling\model.py
def set_parameter(self, name: str, value: float) -> None:
    """Bind or update a named parameter value."""
    self.parameters[name] = float(value)
evaluate
evaluate(env: dict[str, float]) -> dict[str, float]

Evaluate every equation, merging parameters into the bindings.

Parameters:

Name Type Description Default
env dict[str, float]

values for the free variables (parameters override these if a name collides).

required

Returns:

Type Description
dict[str, float]

mapping of equation label to its evaluated numeric value.

Raises:

Type Description
ValueError

if a free variable has no binding (propagated from :meth:Expression.evaluate).

Source code in src\cds\modeling\model.py
def evaluate(self, env: dict[str, float]) -> dict[str, float]:
    """Evaluate every equation, merging parameters into the bindings.

    Args:
        env: values for the free variables (parameters override these
            if a name collides).

    Returns:
        mapping of equation label to its evaluated numeric value.

    Raises:
        ValueError: if a free variable has no binding (propagated from
            :meth:`Expression.evaluate`).
    """
    merged: dict[str, float] = {**env, **self.parameters}
    return {label: expr.evaluate(merged) for label, expr in self.equations}
equation
equation(label: str) -> Expression

Return the expression for a named equation.

Raises:

Type Description
KeyError

if label is not in this model.

Source code in src\cds\modeling\model.py
def equation(self, label: str) -> Expression:
    """Return the expression for a named equation.

    Raises:
        KeyError: if ``label`` is not in this model.
    """
    for name, expr in self.equations:
        if name == label:
            return expr
    raise KeyError(f"no equation labelled {label!r} in model {self.name!r}")
gradient
gradient(label: str, var: str) -> Expression

Symbolic partial derivative of one equation w.r.t. one variable.

Source code in src\cds\modeling\model.py
def gradient(self, label: str, var: str) -> Expression:
    """Symbolic partial derivative of one equation w.r.t. one variable."""
    return self.equation(label).diff(var)
jacobian
jacobian(var: str) -> dict[str, Expression]

Symbolic partial derivative of every equation w.r.t. var.

Returns:

Type Description
dict[str, Expression]

mapping of equation label to its derivative expression.

Source code in src\cds\modeling\model.py
def jacobian(self, var: str) -> dict[str, Expression]:
    """Symbolic partial derivative of *every* equation w.r.t. ``var``.

    Returns:
        mapping of equation label to its derivative expression.
    """
    return {label: expr.diff(var) for label, expr in self.equations}
free_variables
free_variables() -> set[str]

All variable names that actually appear in some equation.

Source code in src\cds\modeling\model.py
def free_variables(self) -> set[str]:
    """All variable names that actually appear in some equation."""
    seen: set[str] = set()
    for _, expr in self.equations:
        seen |= expr.variables()
    return seen - set(self.parameters)
to_markdown
to_markdown() -> str

Render this model as a structured Markdown document.

Source code in src\cds\modeling\model.py
def to_markdown(self) -> str:
    """Render this model as a structured Markdown document."""
    lines: list[str] = [f"# Model: {self.name}", ""]
    if self.description:
        lines += [self.description, ""]
    if self.parameters:
        lines.append("## Parameters")
        for name, value in self.parameters.items():
            lines.append(f"- `{name}` = {value}")
        lines.append("")
    if self.variables:
        lines += [
            "## Variables",
            ", ".join(f"`{v}`" for v in self.variables),
            "",
        ]
    lines += ["## Equations"]
    for label, expr in self.equations:
        lines.append(f"- **{label}**: `{expr.to_str()}`")
    lines.append("")
    return "\n".join(lines)
to_latex
to_latex() -> str

Render the model's equations as a LaTeX align block.

Source code in src\cds\modeling\model.py
def to_latex(self) -> str:
    """Render the model's equations as a LaTeX align block."""
    rows = [f"{label} &= {expr.to_latex()}" for label, expr in self.equations]
    body = " \\\\ ".join(rows)
    return f"\\begin{{align*}}\n{body}\n\\end{{align*}}"

FitResult dataclass

Outcome of :func:fit_parameters (least-squares fitting).

Attributes:

Name Type Description
parameters dict[str, float]

fitted values, keyed by parameter name.

residual float

final sum-of-squared-residuals objective value.

iterations int

number of gradient-descent steps taken.

converged bool

whether the gradient magnitude dropped below tolerance.

Source code in src\cds\modeling\solver.py
@dataclass
class FitResult:
    """Outcome of :func:`fit_parameters` (least-squares fitting).

    Attributes:
        parameters: fitted values, keyed by parameter name.
        residual: final sum-of-squared-residuals objective value.
        iterations: number of gradient-descent steps taken.
        converged: whether the gradient magnitude dropped below tolerance.
    """

    parameters: dict[str, float]
    residual: float
    iterations: int
    converged: bool

SolveResult dataclass

Outcome of :func:solve_equation (root finding).

Attributes:

Name Type Description
x float

the root found.

residual float

|f(x)| at the root (should be near zero on convergence).

iterations int

number of Newton steps taken.

converged bool

whether the residual dropped below tolerance.

Source code in src\cds\modeling\solver.py
@dataclass
class SolveResult:
    """Outcome of :func:`solve_equation` (root finding).

    Attributes:
        x: the root found.
        residual: ``|f(x)|`` at the root (should be near zero on convergence).
        iterations: number of Newton steps taken.
        converged: whether the residual dropped below tolerance.
    """

    x: float
    residual: float
    iterations: int
    converged: bool

Functions:

fit_parameters

fit_parameters(
    model: MathModel,
    observed: Sequence[tuple[dict[str, float], float]],
    parameter_names: Sequence[str],
    x0: Sequence[float] | None = None,
    *,
    target_label: str | None = None,
    lr: float = GD_DEFAULT_LR,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 10000
) -> FitResult

Fit a model's parameters to observed data via least squares.

The objective minimised is the residual sum of squares between the model's prediction and the observed values, summed over all observations::

L(p) = Σ_i (model.evaluate(obs_env_i)[target] - observed_value_i) ** 2

where the target equation is either target_label or, if omitted, the model's first equation.

Parameters:

Name Type Description Default
model MathModel

the :class:MathModel whose parameters are tuned.

required
observed Sequence[tuple[dict[str, float], float]]

a sequence of (env, value) pairs; each env provides the free-variable values for one observation and value the measured outcome to fit.

required
parameter_names Sequence[str]

parameter names to fit (order matches x0 and the returned :attr:FitResult.parameters).

required
x0 Sequence[float] | None

starting guesses, positionally aligned with parameter_names. Defaults to all-zeros.

None
target_label str | None

which equation's output to fit. If None, the first equation in model.equations is used.

None
lr float

gradient-descent learning rate.

GD_DEFAULT_LR
tol float

convergence tolerance on gradient magnitude.

DEFAULT_TOLERANCE
max_iter int

iteration cap.

10000

Returns:

Name Type Description
a FitResult

class:FitResult with the fitted parameters.

Raises:

Type Description
ValueError

if parameter_names is empty, if target_label is unknown, or if there are no observations.

Source code in src\cds\modeling\solver.py
def fit_parameters(
    model: MathModel,
    observed: Sequence[tuple[dict[str, float], float]],
    parameter_names: Sequence[str],
    x0: Sequence[float] | None = None,
    *,
    target_label: str | None = None,
    lr: float = GD_DEFAULT_LR,
    tol: float = DEFAULT_TOLERANCE,
    max_iter: int = 10000,
) -> FitResult:
    """Fit a model's parameters to observed data via least squares.

    The objective minimised is the residual sum of squares between the
    model's prediction and the observed values, summed over all observations::

        L(p) = Σ_i (model.evaluate(obs_env_i)[target] - observed_value_i) ** 2

    where the ``target`` equation is either ``target_label`` or, if omitted,
    the model's first equation.

    Args:
        model: the :class:`MathModel` whose parameters are tuned.
        observed: a sequence of ``(env, value)`` pairs; each ``env`` provides
            the free-variable values for one observation and ``value`` the
            measured outcome to fit.
        parameter_names: parameter names to fit (order matches ``x0`` and the
            returned :attr:`FitResult.parameters`).
        x0: starting guesses, positionally aligned with ``parameter_names``.
            Defaults to all-zeros.
        target_label: which equation's output to fit. If ``None``, the first
            equation in ``model.equations`` is used.
        lr: gradient-descent learning rate.
        tol: convergence tolerance on gradient magnitude.
        max_iter: iteration cap.

    Returns:
        a :class:`FitResult` with the fitted parameters.

    Raises:
        ValueError: if ``parameter_names`` is empty, if ``target_label`` is
            unknown, or if there are no observations.
    """
    names = list(parameter_names)
    if not names:
        raise ValueError("parameter_names must list at least one parameter to fit")
    observations = list(observed)
    if not observations:
        raise ValueError("observed must contain at least one (env, value) pair")

    # Resolve the target equation once.
    if target_label is None:
        target_label = model.equations[0][0]
    target_expr = model.equation(target_label)
    base_params = dict(model.parameters)

    def objective(params: list[float]) -> float:
        env_overrides = dict(zip(names, params))
        params_full = {**base_params, **env_overrides}
        total = 0.0
        for env, observed_value in observations:
            merged = {**env, **params_full}
            predicted = target_expr.evaluate(merged)
            residual = predicted - observed_value
            total += residual * residual
        return total

    start = list(x0) if x0 is not None else [0.0] * len(names)
    opt = gradient_descent(objective, x0=start, lr=lr, tol=tol, max_iter=max_iter)
    # gradient_descent's list-input overload returns OptResult[list[float]], so
    # opt.x is statically a list[float] — no runtime narrowing needed.
    return FitResult(
        parameters=dict(zip(names, opt.x)),
        residual=opt.value,
        iterations=opt.iterations,
        converged=opt.converged,
    )

solve_equation

solve_equation(
    expr: Expression,
    variable: str,
    x0: float = 1.0,
    tol: float = NEWTON_TOLERANCE,
    max_iter: int = 1000,
) -> SolveResult

Find a root of expr (i.e. solve expr = 0) for one variable.

Compiles expr to a callable and hands it to Newton-Raphson.

Parameters:

Name Type Description Default
expr Expression

the symbolic expression whose root to find.

required
variable str

the single free variable to solve for.

required
x0 float

starting guess.

1.0
tol float

convergence tolerance on |expr(x)|.

NEWTON_TOLERANCE
max_iter int

iteration cap.

1000

Returns:

Name Type Description
a SolveResult

class:SolveResult describing the root found.

Raises:

Type Description
ValueError

if variable is not free in expr (propagated from :meth:Expression.to_func).

Source code in src\cds\modeling\solver.py
def solve_equation(
    expr: Expression,
    variable: str,
    x0: float = 1.0,
    tol: float = NEWTON_TOLERANCE,
    max_iter: int = 1000,
) -> SolveResult:
    """Find a root of ``expr`` (i.e. solve ``expr = 0``) for one variable.

    Compiles ``expr`` to a callable and hands it to Newton-Raphson.

    Args:
        expr: the symbolic expression whose root to find.
        variable: the single free variable to solve for.
        x0: starting guess.
        tol: convergence tolerance on ``|expr(x)|``.
        max_iter: iteration cap.

    Returns:
        a :class:`SolveResult` describing the root found.

    Raises:
        ValueError: if ``variable`` is not free in ``expr`` (propagated from
            :meth:`Expression.to_func`).
    """
    f = expr.to_func(variable)
    opt = newton_method(f, x0=x0, tol=tol, max_iter=max_iter)
    # newton_method returns OptResult[float] (scalar root-finder), so opt.x is
    # statically a float — no runtime narrowing needed.
    return SolveResult(
        x=opt.x,
        residual=abs(opt.value),
        iterations=opt.iterations,
        converged=opt.converged,
    )

Knowledge Organization

A knowledge graph of named concepts connected by typed, directed relations (is-a, depends-on, …) with undirected traversal (shortest path, transitive closure, cycle detection), a notebook of research notes linked to concept names, and ranked structured retrieval across both. Persistence is JSON via the stdlib.

cds.knowledge

Knowledge organization — concept graphs, research notes, and structured retrieval.

A self-contained subsystem for organising research knowledge:

  • :class:KnowledgeGraph of named :class:Concept nodes connected by typed, directed :class:Relation edges ("is-a", "depends-on", …).
  • :class:Notebook of :class:Note records linked to concept names.
  • :func:search for structured retrieval across both, ranked by relevance.

All of it is pure Python (stdlib :mod:json for persistence) and decoupled from :mod:cds.graph, whose dense integer-vertex, untyped-edge model is a poor fit for named concepts with typed relationships.

Classes

Concept dataclass

A named node in the knowledge graph.

Attributes:

Name Type Description
name str

the unique, human-readable concept identifier (also its dictionary key inside a :class:KnowledgeGraph).

description str | None

optional one- or two-line summary of the concept.

tags list[str]

free-form labels for grouping and retrieval (e.g. ["physics", "mechanics"]).

metadata dict[str, str]

additional string-valued properties (e.g. source URLs, units) that don't fit the structured fields above.

Source code in src\cds\knowledge\graph.py
@dataclass
class Concept:
    """A named node in the knowledge graph.

    Attributes:
        name: the unique, human-readable concept identifier (also its
            dictionary key inside a :class:`KnowledgeGraph`).
        description: optional one- or two-line summary of the concept.
        tags: free-form labels for grouping and retrieval (e.g.
            ``["physics", "mechanics"]``).
        metadata: additional string-valued properties (e.g. source URLs,
            units) that don't fit the structured fields above.
    """

    name: str
    description: str | None = None
    tags: list[str] = field(default_factory=list)
    metadata: dict[str, str] = field(default_factory=dict)

    def __eq__(self, other: object) -> bool:
        return isinstance(other, Concept) and other.name == self.name

    def __hash__(self) -> int:
        return hash(("Concept", self.name))

    def to_dict(self) -> dict[str, object]:
        """Serialize this concept to a JSON-friendly dict."""
        return {
            "name": self.name,
            "description": self.description,
            "tags": list(self.tags),
            "metadata": dict(self.metadata),
        }

    @classmethod
    def from_dict(cls, data: dict[str, object]) -> Concept:
        """Reconstruct a :class:`Concept` from :meth:`to_dict` output.

        Raises:
            ValueError: if ``data`` is missing keys or has the wrong types.
        """
        name = data["name"]
        if not isinstance(name, str):
            raise ValueError(f"Invalid concept name: {name!r}")
        description = data["description"]
        if description is not None and not isinstance(description, str):
            raise ValueError(f"Invalid concept description: {description!r}")
        tags_raw = data["tags"]
        if not isinstance(tags_raw, list) or not all(isinstance(t, str) for t in tags_raw):
            raise ValueError(f"Invalid concept tags: {tags_raw!r}")
        metadata_raw = data["metadata"]
        if not isinstance(metadata_raw, dict) or not all(
            isinstance(k, str) and isinstance(v, str) for k, v in metadata_raw.items()
        ):
            raise ValueError(f"Invalid concept metadata: {metadata_raw!r}")
        return cls(
            name=name,
            description=description,
            tags=list(tags_raw),
            metadata=dict(metadata_raw),
        )
Methods:
to_dict
to_dict() -> dict[str, object]

Serialize this concept to a JSON-friendly dict.

Source code in src\cds\knowledge\graph.py
def to_dict(self) -> dict[str, object]:
    """Serialize this concept to a JSON-friendly dict."""
    return {
        "name": self.name,
        "description": self.description,
        "tags": list(self.tags),
        "metadata": dict(self.metadata),
    }
from_dict classmethod
from_dict(data: dict[str, object]) -> Concept

Reconstruct a :class:Concept from :meth:to_dict output.

Raises:

Type Description
ValueError

if data is missing keys or has the wrong types.

Source code in src\cds\knowledge\graph.py
@classmethod
def from_dict(cls, data: dict[str, object]) -> Concept:
    """Reconstruct a :class:`Concept` from :meth:`to_dict` output.

    Raises:
        ValueError: if ``data`` is missing keys or has the wrong types.
    """
    name = data["name"]
    if not isinstance(name, str):
        raise ValueError(f"Invalid concept name: {name!r}")
    description = data["description"]
    if description is not None and not isinstance(description, str):
        raise ValueError(f"Invalid concept description: {description!r}")
    tags_raw = data["tags"]
    if not isinstance(tags_raw, list) or not all(isinstance(t, str) for t in tags_raw):
        raise ValueError(f"Invalid concept tags: {tags_raw!r}")
    metadata_raw = data["metadata"]
    if not isinstance(metadata_raw, dict) or not all(
        isinstance(k, str) and isinstance(v, str) for k, v in metadata_raw.items()
    ):
        raise ValueError(f"Invalid concept metadata: {metadata_raw!r}")
    return cls(
        name=name,
        description=description,
        tags=list(tags_raw),
        metadata=dict(metadata_raw),
    )

KnowledgeGraph dataclass

A knowledge graph of named concepts and typed relations.

Attributes:

Name Type Description
name str

human-readable graph title (used in :meth:to_markdown).

concepts dict[str, Concept]

mapping of concept name to :class:Concept node.

relations list[Relation]

ordered list of :class:Relation edges.

Source code in src\cds\knowledge\graph.py
@dataclass
class KnowledgeGraph:
    """A knowledge graph of named concepts and typed relations.

    Attributes:
        name: human-readable graph title (used in :meth:`to_markdown`).
        concepts: mapping of concept name to :class:`Concept` node.
        relations: ordered list of :class:`Relation` edges.
    """

    name: str
    concepts: dict[str, Concept] = field(default_factory=dict)
    relations: list[Relation] = field(default_factory=list)

    # ------------------------------------------------------------------ #
    # Construction
    # ------------------------------------------------------------------ #
    def add_concept(
        self,
        name: str,
        description: str | None = None,
        tags: list[str] | None = None,
        metadata: dict[str, str] | None = None,
    ) -> Concept:
        """Add a concept, returning the stored node.

        If ``name`` already exists, the existing concept is returned
        unchanged (idempotent) rather than overwritten.

        Args:
            name: unique concept identifier.
            description: optional summary.
            tags: optional grouping labels.
            metadata: optional string-valued properties.

        Returns:
            the stored :class:`Concept` (newly created or the pre-existing one).
        """
        if name in self.concepts:
            return self.concepts[name]
        concept = Concept(
            name=name,
            description=description,
            tags=list(tags) if tags else [],
            metadata=dict(metadata) if metadata else {},
        )
        self.concepts[name] = concept
        return concept

    def add_relation(
        self,
        source: str,
        target: str,
        kind: str,
        weight: float = 1.0,
    ) -> Relation:
        """Add a typed, directed relation ``source -> target``.

        Both endpoints must already exist as concepts (use
        :meth:`link_concepts` to auto-create them).

        Raises:
            KeyError: if ``source`` or ``target`` is not a known concept.
        """
        for endpoint, label in ((source, "source"), (target, "target")):
            if endpoint not in self.concepts:
                raise KeyError(f"unknown {label} concept: {endpoint!r}")
        relation = Relation(source=source, target=target, kind=kind, weight=weight)
        self.relations.append(relation)
        return relation

    def link_concepts(
        self,
        source: str,
        target: str,
        kind: str,
        weight: float = 1.0,
    ) -> Relation:
        """Auto-create both concepts (if missing) and add a relation between them."""
        self.add_concept(source)
        self.add_concept(target)
        return self.add_relation(source, target, kind, weight)

    # ------------------------------------------------------------------ #
    # Queries
    # ------------------------------------------------------------------ #
    def neighbors(self, name: str, kind: str | None = None) -> list[str]:
        """Undirected neighbors of ``name`` — every concept directly linked.

        A relation touching ``name`` at either endpoint contributes its
        *other* endpoint. ``kind`` optionally restricts to one relation type.

        Raises:
            KeyError: if ``name`` is not a known concept.
        """
        self._require_concept(name)
        found: list[str] = []
        for relation in self.relations:
            if kind is not None and relation.kind != kind:
                continue
            if relation.source == name and relation.target not in found:
                found.append(relation.target)
            elif relation.target == name and relation.source not in found:
                found.append(relation.source)
        return found

    def neighbors_out(self, name: str, kind: str | None = None) -> list[str]:
        """Concepts that ``name`` points to via outgoing relations.

        Raises:
            KeyError: if ``name`` is not a known concept.
        """
        self._require_concept(name)
        found: list[str] = []
        for relation in self.relations:
            if kind is not None and relation.kind != kind:
                continue
            if relation.source == name and relation.target not in found:
                found.append(relation.target)
        return found

    def neighbors_in(self, name: str, kind: str | None = None) -> list[str]:
        """Concepts that point at ``name`` via incoming relations.

        Raises:
            KeyError: if ``name`` is not a known concept.
        """
        self._require_concept(name)
        found: list[str] = []
        for relation in self.relations:
            if kind is not None and relation.kind != kind:
                continue
            if relation.target == name and relation.source not in found:
                found.append(relation.source)
        return found

    def find_path(self, source: str, target: str) -> list[str] | None:
        """Shortest undirected path (by hop count) from ``source`` to ``target``.

        Returns the sequence of concept names ``[source, ..., target]``, or
        ``None`` if no path exists or the endpoints are unknown. A path of
        length 1 (``source == target``) returns ``[source]``.

        Uses BFS following the edges in either direction, so the returned
        path may traverse relations against their direction.
        """
        if source not in self.concepts or target not in self.concepts:
            return None
        if source == target:
            return [source]
        predecessor: dict[str, str] = {source: source}
        queue: deque[str] = deque([source])
        while queue:
            node = queue.popleft()
            for neighbor in self.neighbors(node):
                if neighbor in predecessor:
                    continue
                predecessor[neighbor] = node
                if neighbor == target:
                    return _reconstruct_path(predecessor, target)
                queue.append(neighbor)
        return None

    def reachable(self, start: str) -> set[str]:
        """All concepts reachable from ``start`` over undirected edges (incl. itself).

        Returns an empty set if ``start`` is not a known concept.
        """
        if start not in self.concepts:
            return set()
        seen: set[str] = set()
        queue: deque[str] = deque([start])
        seen.add(start)
        while queue:
            node = queue.popleft()
            for neighbor in self.neighbors(node):
                if neighbor not in seen:
                    seen.add(neighbor)
                    queue.append(neighbor)
        return seen

    def find_cycles(self) -> list[list[str]]:
        """Return every directed cycle in the graph as a list of concept names.

        Each cycle is reported once, normalized to start at its
        lexicographically smallest member so the same cycle is not reported
        from every starting rotation. Self-loops (a relation whose source and
        target are equal) are returned as ``[name]``.

        Uses DFS back-edge detection with an explicit recursion-emulating
        stack so deep graphs do not hit Python's recursion limit.
        """
        adj: dict[str, list[str]] = {name: [] for name in self.concepts}
        for relation in self.relations:
            adj[relation.source].append(relation.target)

        found: set[tuple[str, ...]] = set()
        WHITE, GRAY, BLACK = 0, 1, 2
        color: dict[str, int] = {name: WHITE for name in self.concepts}

        for root in sorted(self.concepts):
            if color[root] != WHITE:
                continue
            # Each stack frame: the node plus an iterator position over its successors.
            stack: list[tuple[str, list[str]]] = [(root, list(adj[root]))]
            color[root] = GRAY
            path: list[str] = [root]
            while stack:
                node, succs = stack[-1]
                advanced = False
                while succs:
                    nxt = succs.pop()
                    if color[nxt] == GRAY:
                        # Back edge: a cycle from nxt back along the current
                        # DFS path. A node is GRAY iff it is on ``path`` (we
                        # always append to ``path`` in lockstep with marking
                        # GRAY below), so ``nxt`` is guaranteed to be present.
                        cycle = path[path.index(nxt) :]
                        found.add(_normalize_cycle(cycle))
                        # Do not descend into the gray node; keep scanning successors.
                        continue
                    # The only remaining color is WHITE: descend into it.
                    color[nxt] = GRAY
                    path.append(nxt)
                    stack.append((nxt, list(adj[nxt])))
                    advanced = True
                    break
                if not advanced:
                    # Exhausted this node's successors: mark black and pop.
                    color[node] = BLACK
                    path.pop()
                    stack.pop()
        return [list(cycle) for cycle in sorted(found)]

    def _require_concept(self, name: str) -> None:
        if name not in self.concepts:
            raise KeyError(f"unknown concept: {name!r}")

    # ------------------------------------------------------------------ #
    # Rendering & serialization
    # ------------------------------------------------------------------ #
    def to_markdown(self) -> str:
        """Render this graph as a structured Markdown document."""
        lines: list[str] = [f"# Knowledge Graph: {self.name}", ""]
        if not self.concepts:
            lines += ["_No concepts._", ""]
        else:
            lines += ["## Concepts", ""]
            for name in sorted(self.concepts):
                lines.append(f"- **{name}**")
            lines.append("")
        if not self.relations:
            lines += ["_No relations._", ""]
        else:
            lines += ["## Relations", ""]
            for relation in self.relations:
                lines.append(f"- `{relation.source}` --{relation.kind}--> `{relation.target}`")
            lines.append("")
        return "\n".join(lines)

    def to_dict(self) -> dict[str, object]:
        """Serialize the whole graph to a JSON-friendly dict."""
        return {
            "name": self.name,
            "concepts": [concept.to_dict() for concept in self.concepts.values()],
            "relations": [relation.to_dict() for relation in self.relations],
        }

    @classmethod
    def from_dict(cls, data: dict[str, object]) -> KnowledgeGraph:
        """Reconstruct a :class:`KnowledgeGraph` from :meth:`to_dict` output.

        Raises:
            ValueError: if ``data`` is missing keys or has the wrong types.
        """
        name = data["name"]
        if not isinstance(name, str):
            raise ValueError(f"Invalid graph name: {name!r}")
        concepts_raw = data["concepts"]
        relations_raw = data["relations"]
        if not isinstance(concepts_raw, list):
            raise ValueError(f"Invalid concepts list: {concepts_raw!r}")
        if not isinstance(relations_raw, list):
            raise ValueError(f"Invalid relations list: {relations_raw!r}")
        graph = cls(name=name)
        for item in concepts_raw:
            if not isinstance(item, dict):
                raise ValueError(f"Invalid concept entry: {item!r}")
            concept = Concept.from_dict(item)
            graph.concepts[concept.name] = concept
        for item in relations_raw:
            if not isinstance(item, dict):
                raise ValueError(f"Invalid relation entry: {item!r}")
            graph.relations.append(Relation.from_dict(item))
        return graph

    def save(self, path: str | Path) -> None:
        """Write this graph to ``path`` as indented UTF-8 JSON."""
        Path(path).write_text(
            json.dumps(self.to_dict(), ensure_ascii=False, indent=2),
            encoding="utf-8",
        )

    @classmethod
    def load(cls, path: str | Path) -> KnowledgeGraph:
        """Read a graph previously written by :meth:`save`.

        Raises:
            ValueError: if the file does not contain valid graph JSON.
        """
        data = json.loads(Path(path).read_text(encoding="utf-8"))
        if not isinstance(data, dict):
            raise ValueError(f"Invalid graph file (expected object): {data!r}")
        return cls.from_dict(data)
Methods:
add_concept
add_concept(
    name: str,
    description: str | None = None,
    tags: list[str] | None = None,
    metadata: dict[str, str] | None = None,
) -> Concept

Add a concept, returning the stored node.

If name already exists, the existing concept is returned unchanged (idempotent) rather than overwritten.

Parameters:

Name Type Description Default
name str

unique concept identifier.

required
description str | None

optional summary.

None
tags list[str] | None

optional grouping labels.

None
metadata dict[str, str] | None

optional string-valued properties.

None

Returns:

Type Description
Concept

the stored :class:Concept (newly created or the pre-existing one).

Source code in src\cds\knowledge\graph.py
def add_concept(
    self,
    name: str,
    description: str | None = None,
    tags: list[str] | None = None,
    metadata: dict[str, str] | None = None,
) -> Concept:
    """Add a concept, returning the stored node.

    If ``name`` already exists, the existing concept is returned
    unchanged (idempotent) rather than overwritten.

    Args:
        name: unique concept identifier.
        description: optional summary.
        tags: optional grouping labels.
        metadata: optional string-valued properties.

    Returns:
        the stored :class:`Concept` (newly created or the pre-existing one).
    """
    if name in self.concepts:
        return self.concepts[name]
    concept = Concept(
        name=name,
        description=description,
        tags=list(tags) if tags else [],
        metadata=dict(metadata) if metadata else {},
    )
    self.concepts[name] = concept
    return concept
add_relation
add_relation(
    source: str, target: str, kind: str, weight: float = 1.0
) -> Relation

Add a typed, directed relation source -> target.

Both endpoints must already exist as concepts (use :meth:link_concepts to auto-create them).

Raises:

Type Description
KeyError

if source or target is not a known concept.

Source code in src\cds\knowledge\graph.py
def add_relation(
    self,
    source: str,
    target: str,
    kind: str,
    weight: float = 1.0,
) -> Relation:
    """Add a typed, directed relation ``source -> target``.

    Both endpoints must already exist as concepts (use
    :meth:`link_concepts` to auto-create them).

    Raises:
        KeyError: if ``source`` or ``target`` is not a known concept.
    """
    for endpoint, label in ((source, "source"), (target, "target")):
        if endpoint not in self.concepts:
            raise KeyError(f"unknown {label} concept: {endpoint!r}")
    relation = Relation(source=source, target=target, kind=kind, weight=weight)
    self.relations.append(relation)
    return relation
link_concepts(
    source: str, target: str, kind: str, weight: float = 1.0
) -> Relation

Auto-create both concepts (if missing) and add a relation between them.

Source code in src\cds\knowledge\graph.py
def link_concepts(
    self,
    source: str,
    target: str,
    kind: str,
    weight: float = 1.0,
) -> Relation:
    """Auto-create both concepts (if missing) and add a relation between them."""
    self.add_concept(source)
    self.add_concept(target)
    return self.add_relation(source, target, kind, weight)
neighbors
neighbors(name: str, kind: str | None = None) -> list[str]

Undirected neighbors of name — every concept directly linked.

A relation touching name at either endpoint contributes its other endpoint. kind optionally restricts to one relation type.

Raises:

Type Description
KeyError

if name is not a known concept.

Source code in src\cds\knowledge\graph.py
def neighbors(self, name: str, kind: str | None = None) -> list[str]:
    """Undirected neighbors of ``name`` — every concept directly linked.

    A relation touching ``name`` at either endpoint contributes its
    *other* endpoint. ``kind`` optionally restricts to one relation type.

    Raises:
        KeyError: if ``name`` is not a known concept.
    """
    self._require_concept(name)
    found: list[str] = []
    for relation in self.relations:
        if kind is not None and relation.kind != kind:
            continue
        if relation.source == name and relation.target not in found:
            found.append(relation.target)
        elif relation.target == name and relation.source not in found:
            found.append(relation.source)
    return found
neighbors_out
neighbors_out(
    name: str, kind: str | None = None
) -> list[str]

Concepts that name points to via outgoing relations.

Raises:

Type Description
KeyError

if name is not a known concept.

Source code in src\cds\knowledge\graph.py
def neighbors_out(self, name: str, kind: str | None = None) -> list[str]:
    """Concepts that ``name`` points to via outgoing relations.

    Raises:
        KeyError: if ``name`` is not a known concept.
    """
    self._require_concept(name)
    found: list[str] = []
    for relation in self.relations:
        if kind is not None and relation.kind != kind:
            continue
        if relation.source == name and relation.target not in found:
            found.append(relation.target)
    return found
neighbors_in
neighbors_in(
    name: str, kind: str | None = None
) -> list[str]

Concepts that point at name via incoming relations.

Raises:

Type Description
KeyError

if name is not a known concept.

Source code in src\cds\knowledge\graph.py
def neighbors_in(self, name: str, kind: str | None = None) -> list[str]:
    """Concepts that point at ``name`` via incoming relations.

    Raises:
        KeyError: if ``name`` is not a known concept.
    """
    self._require_concept(name)
    found: list[str] = []
    for relation in self.relations:
        if kind is not None and relation.kind != kind:
            continue
        if relation.target == name and relation.source not in found:
            found.append(relation.source)
    return found
find_path
find_path(source: str, target: str) -> list[str] | None

Shortest undirected path (by hop count) from source to target.

Returns the sequence of concept names [source, ..., target], or None if no path exists or the endpoints are unknown. A path of length 1 (source == target) returns [source].

Uses BFS following the edges in either direction, so the returned path may traverse relations against their direction.

Source code in src\cds\knowledge\graph.py
def find_path(self, source: str, target: str) -> list[str] | None:
    """Shortest undirected path (by hop count) from ``source`` to ``target``.

    Returns the sequence of concept names ``[source, ..., target]``, or
    ``None`` if no path exists or the endpoints are unknown. A path of
    length 1 (``source == target``) returns ``[source]``.

    Uses BFS following the edges in either direction, so the returned
    path may traverse relations against their direction.
    """
    if source not in self.concepts or target not in self.concepts:
        return None
    if source == target:
        return [source]
    predecessor: dict[str, str] = {source: source}
    queue: deque[str] = deque([source])
    while queue:
        node = queue.popleft()
        for neighbor in self.neighbors(node):
            if neighbor in predecessor:
                continue
            predecessor[neighbor] = node
            if neighbor == target:
                return _reconstruct_path(predecessor, target)
            queue.append(neighbor)
    return None
reachable
reachable(start: str) -> set[str]

All concepts reachable from start over undirected edges (incl. itself).

Returns an empty set if start is not a known concept.

Source code in src\cds\knowledge\graph.py
def reachable(self, start: str) -> set[str]:
    """All concepts reachable from ``start`` over undirected edges (incl. itself).

    Returns an empty set if ``start`` is not a known concept.
    """
    if start not in self.concepts:
        return set()
    seen: set[str] = set()
    queue: deque[str] = deque([start])
    seen.add(start)
    while queue:
        node = queue.popleft()
        for neighbor in self.neighbors(node):
            if neighbor not in seen:
                seen.add(neighbor)
                queue.append(neighbor)
    return seen
find_cycles
find_cycles() -> list[list[str]]

Return every directed cycle in the graph as a list of concept names.

Each cycle is reported once, normalized to start at its lexicographically smallest member so the same cycle is not reported from every starting rotation. Self-loops (a relation whose source and target are equal) are returned as [name].

Uses DFS back-edge detection with an explicit recursion-emulating stack so deep graphs do not hit Python's recursion limit.

Source code in src\cds\knowledge\graph.py
def find_cycles(self) -> list[list[str]]:
    """Return every directed cycle in the graph as a list of concept names.

    Each cycle is reported once, normalized to start at its
    lexicographically smallest member so the same cycle is not reported
    from every starting rotation. Self-loops (a relation whose source and
    target are equal) are returned as ``[name]``.

    Uses DFS back-edge detection with an explicit recursion-emulating
    stack so deep graphs do not hit Python's recursion limit.
    """
    adj: dict[str, list[str]] = {name: [] for name in self.concepts}
    for relation in self.relations:
        adj[relation.source].append(relation.target)

    found: set[tuple[str, ...]] = set()
    WHITE, GRAY, BLACK = 0, 1, 2
    color: dict[str, int] = {name: WHITE for name in self.concepts}

    for root in sorted(self.concepts):
        if color[root] != WHITE:
            continue
        # Each stack frame: the node plus an iterator position over its successors.
        stack: list[tuple[str, list[str]]] = [(root, list(adj[root]))]
        color[root] = GRAY
        path: list[str] = [root]
        while stack:
            node, succs = stack[-1]
            advanced = False
            while succs:
                nxt = succs.pop()
                if color[nxt] == GRAY:
                    # Back edge: a cycle from nxt back along the current
                    # DFS path. A node is GRAY iff it is on ``path`` (we
                    # always append to ``path`` in lockstep with marking
                    # GRAY below), so ``nxt`` is guaranteed to be present.
                    cycle = path[path.index(nxt) :]
                    found.add(_normalize_cycle(cycle))
                    # Do not descend into the gray node; keep scanning successors.
                    continue
                # The only remaining color is WHITE: descend into it.
                color[nxt] = GRAY
                path.append(nxt)
                stack.append((nxt, list(adj[nxt])))
                advanced = True
                break
            if not advanced:
                # Exhausted this node's successors: mark black and pop.
                color[node] = BLACK
                path.pop()
                stack.pop()
    return [list(cycle) for cycle in sorted(found)]
to_markdown
to_markdown() -> str

Render this graph as a structured Markdown document.

Source code in src\cds\knowledge\graph.py
def to_markdown(self) -> str:
    """Render this graph as a structured Markdown document."""
    lines: list[str] = [f"# Knowledge Graph: {self.name}", ""]
    if not self.concepts:
        lines += ["_No concepts._", ""]
    else:
        lines += ["## Concepts", ""]
        for name in sorted(self.concepts):
            lines.append(f"- **{name}**")
        lines.append("")
    if not self.relations:
        lines += ["_No relations._", ""]
    else:
        lines += ["## Relations", ""]
        for relation in self.relations:
            lines.append(f"- `{relation.source}` --{relation.kind}--> `{relation.target}`")
        lines.append("")
    return "\n".join(lines)
to_dict
to_dict() -> dict[str, object]

Serialize the whole graph to a JSON-friendly dict.

Source code in src\cds\knowledge\graph.py
def to_dict(self) -> dict[str, object]:
    """Serialize the whole graph to a JSON-friendly dict."""
    return {
        "name": self.name,
        "concepts": [concept.to_dict() for concept in self.concepts.values()],
        "relations": [relation.to_dict() for relation in self.relations],
    }
from_dict classmethod
from_dict(data: dict[str, object]) -> KnowledgeGraph

Reconstruct a :class:KnowledgeGraph from :meth:to_dict output.

Raises:

Type Description
ValueError

if data is missing keys or has the wrong types.

Source code in src\cds\knowledge\graph.py
@classmethod
def from_dict(cls, data: dict[str, object]) -> KnowledgeGraph:
    """Reconstruct a :class:`KnowledgeGraph` from :meth:`to_dict` output.

    Raises:
        ValueError: if ``data`` is missing keys or has the wrong types.
    """
    name = data["name"]
    if not isinstance(name, str):
        raise ValueError(f"Invalid graph name: {name!r}")
    concepts_raw = data["concepts"]
    relations_raw = data["relations"]
    if not isinstance(concepts_raw, list):
        raise ValueError(f"Invalid concepts list: {concepts_raw!r}")
    if not isinstance(relations_raw, list):
        raise ValueError(f"Invalid relations list: {relations_raw!r}")
    graph = cls(name=name)
    for item in concepts_raw:
        if not isinstance(item, dict):
            raise ValueError(f"Invalid concept entry: {item!r}")
        concept = Concept.from_dict(item)
        graph.concepts[concept.name] = concept
    for item in relations_raw:
        if not isinstance(item, dict):
            raise ValueError(f"Invalid relation entry: {item!r}")
        graph.relations.append(Relation.from_dict(item))
    return graph
save
save(path: str | Path) -> None

Write this graph to path as indented UTF-8 JSON.

Source code in src\cds\knowledge\graph.py
def save(self, path: str | Path) -> None:
    """Write this graph to ``path`` as indented UTF-8 JSON."""
    Path(path).write_text(
        json.dumps(self.to_dict(), ensure_ascii=False, indent=2),
        encoding="utf-8",
    )
load classmethod
load(path: str | Path) -> KnowledgeGraph

Read a graph previously written by :meth:save.

Raises:

Type Description
ValueError

if the file does not contain valid graph JSON.

Source code in src\cds\knowledge\graph.py
@classmethod
def load(cls, path: str | Path) -> KnowledgeGraph:
    """Read a graph previously written by :meth:`save`.

    Raises:
        ValueError: if the file does not contain valid graph JSON.
    """
    data = json.loads(Path(path).read_text(encoding="utf-8"))
    if not isinstance(data, dict):
        raise ValueError(f"Invalid graph file (expected object): {data!r}")
    return cls.from_dict(data)

Relation dataclass

A typed, directed edge source -> target between two concepts.

Attributes:

Name Type Description
source str

name of the origin concept.

target str

name of the destination concept.

kind str

the relationship type (e.g. "is-a", "depends-on", "related-to"). Semantics are caller-defined; the graph does not interpret kinds beyond using them for filtering.

weight float

optional numeric strength (default 1.0). Higher is stronger; used by callers for ranking, not by the core traversal.

Source code in src\cds\knowledge\graph.py
@dataclass
class Relation:
    """A typed, directed edge ``source -> target`` between two concepts.

    Attributes:
        source: name of the origin concept.
        target: name of the destination concept.
        kind: the relationship type (e.g. ``"is-a"``, ``"depends-on"``,
            ``"related-to"``). Semantics are caller-defined; the graph does
            not interpret kinds beyond using them for filtering.
        weight: optional numeric strength (default 1.0). Higher is stronger;
            used by callers for ranking, not by the core traversal.
    """

    source: str
    target: str
    kind: str
    weight: float = 1.0

    def to_dict(self) -> dict[str, object]:
        """Serialize this relation to a JSON-friendly dict."""
        return {
            "source": self.source,
            "target": self.target,
            "kind": self.kind,
            "weight": self.weight,
        }

    @classmethod
    def from_dict(cls, data: dict[str, object]) -> Relation:
        """Reconstruct a :class:`Relation` from :meth:`to_dict` output.

        Raises:
            ValueError: if ``data`` is missing keys or has the wrong types.
        """
        source = data["source"]
        target = data["target"]
        kind = data["kind"]
        if not isinstance(source, str):
            raise ValueError(f"Invalid relation source: {source!r}")
        if not isinstance(target, str):
            raise ValueError(f"Invalid relation target: {target!r}")
        if not isinstance(kind, str):
            raise ValueError(f"Invalid relation kind: {kind!r}")
        weight = data["weight"]
        if not isinstance(weight, int | float) or isinstance(weight, bool):
            raise ValueError(f"Invalid relation weight: {weight!r}")
        return cls(source=source, target=target, kind=kind, weight=float(weight))
Methods:
to_dict
to_dict() -> dict[str, object]

Serialize this relation to a JSON-friendly dict.

Source code in src\cds\knowledge\graph.py
def to_dict(self) -> dict[str, object]:
    """Serialize this relation to a JSON-friendly dict."""
    return {
        "source": self.source,
        "target": self.target,
        "kind": self.kind,
        "weight": self.weight,
    }
from_dict classmethod
from_dict(data: dict[str, object]) -> Relation

Reconstruct a :class:Relation from :meth:to_dict output.

Raises:

Type Description
ValueError

if data is missing keys or has the wrong types.

Source code in src\cds\knowledge\graph.py
@classmethod
def from_dict(cls, data: dict[str, object]) -> Relation:
    """Reconstruct a :class:`Relation` from :meth:`to_dict` output.

    Raises:
        ValueError: if ``data`` is missing keys or has the wrong types.
    """
    source = data["source"]
    target = data["target"]
    kind = data["kind"]
    if not isinstance(source, str):
        raise ValueError(f"Invalid relation source: {source!r}")
    if not isinstance(target, str):
        raise ValueError(f"Invalid relation target: {target!r}")
    if not isinstance(kind, str):
        raise ValueError(f"Invalid relation kind: {kind!r}")
    weight = data["weight"]
    if not isinstance(weight, int | float) or isinstance(weight, bool):
        raise ValueError(f"Invalid relation weight: {weight!r}")
    return cls(source=source, target=target, kind=kind, weight=float(weight))

Note dataclass

A single research note linked to zero or more concept names.

Attributes:

Name Type Description
id str

unique note identifier within a :class:Notebook.

title str

short human-readable heading.

body str

the note's free-form text content.

tags list[str]

grouping labels (e.g. ["experiment", "failed"]).

linked_concepts list[str]

names of concepts this note references. These are plain strings, not references — they need not exist in any particular :class:KnowledgeGraph.

created str | None

optional ISO-8601 timestamp (or any caller-defined marker).

Source code in src\cds\knowledge\notes.py
@dataclass
class Note:
    """A single research note linked to zero or more concept names.

    Attributes:
        id: unique note identifier within a :class:`Notebook`.
        title: short human-readable heading.
        body: the note's free-form text content.
        tags: grouping labels (e.g. ``["experiment", "failed"]``).
        linked_concepts: names of concepts this note references. These are
            plain strings, not references — they need not exist in any
            particular :class:`KnowledgeGraph`.
        created: optional ISO-8601 timestamp (or any caller-defined marker).
    """

    id: str
    title: str
    body: str
    tags: list[str] = field(default_factory=list)
    linked_concepts: list[str] = field(default_factory=list)
    created: str | None = None

    def to_dict(self) -> dict[str, object]:
        """Serialize this note to a JSON-friendly dict."""
        return {
            "id": self.id,
            "title": self.title,
            "body": self.body,
            "tags": list(self.tags),
            "linked_concepts": list(self.linked_concepts),
            "created": self.created,
        }

    @classmethod
    def from_dict(cls, data: dict[str, object]) -> Note:
        """Reconstruct a :class:`Note` from :meth:`to_dict` output.

        Raises:
            ValueError: if ``data`` is missing keys or has the wrong types.
        """
        note_id = data["id"]
        title = data["title"]
        body = data["body"]
        if not isinstance(note_id, str):
            raise ValueError(f"Invalid note id: {note_id!r}")
        if not isinstance(title, str):
            raise ValueError(f"Invalid note title: {title!r}")
        if not isinstance(body, str):
            raise ValueError(f"Invalid note body: {body!r}")
        tags_raw = data["tags"]
        if not isinstance(tags_raw, list) or not all(isinstance(t, str) for t in tags_raw):
            raise ValueError(f"Invalid note tags: {tags_raw!r}")
        linked_raw = data["linked_concepts"]
        if not isinstance(linked_raw, list) or not all(isinstance(c, str) for c in linked_raw):
            raise ValueError(f"Invalid note linked_concepts: {linked_raw!r}")
        created = data["created"]
        if created is not None and not isinstance(created, str):
            raise ValueError(f"Invalid note created: {created!r}")
        return cls(
            id=note_id,
            title=title,
            body=body,
            tags=list(tags_raw),
            linked_concepts=list(linked_raw),
            created=created,
        )

    def to_markdown(self) -> str:
        """Render this note as a self-contained Markdown document."""
        lines: list[str] = [f"# {self.title}", ""]
        if self.created:
            lines += [f"_Created: {self.created}_", ""]
        if self.tags:
            lines += ["Tags: " + ", ".join(f"`{tag}`" for tag in self.tags), ""]
        lines += [self.body, ""]
        if self.linked_concepts:
            lines += ["## Linked concepts", ""]
            for concept in self.linked_concepts:
                lines.append(f"- `{concept}`")
            lines.append("")
        return "\n".join(lines)
Methods:
to_dict
to_dict() -> dict[str, object]

Serialize this note to a JSON-friendly dict.

Source code in src\cds\knowledge\notes.py
def to_dict(self) -> dict[str, object]:
    """Serialize this note to a JSON-friendly dict."""
    return {
        "id": self.id,
        "title": self.title,
        "body": self.body,
        "tags": list(self.tags),
        "linked_concepts": list(self.linked_concepts),
        "created": self.created,
    }
from_dict classmethod
from_dict(data: dict[str, object]) -> Note

Reconstruct a :class:Note from :meth:to_dict output.

Raises:

Type Description
ValueError

if data is missing keys or has the wrong types.

Source code in src\cds\knowledge\notes.py
@classmethod
def from_dict(cls, data: dict[str, object]) -> Note:
    """Reconstruct a :class:`Note` from :meth:`to_dict` output.

    Raises:
        ValueError: if ``data`` is missing keys or has the wrong types.
    """
    note_id = data["id"]
    title = data["title"]
    body = data["body"]
    if not isinstance(note_id, str):
        raise ValueError(f"Invalid note id: {note_id!r}")
    if not isinstance(title, str):
        raise ValueError(f"Invalid note title: {title!r}")
    if not isinstance(body, str):
        raise ValueError(f"Invalid note body: {body!r}")
    tags_raw = data["tags"]
    if not isinstance(tags_raw, list) or not all(isinstance(t, str) for t in tags_raw):
        raise ValueError(f"Invalid note tags: {tags_raw!r}")
    linked_raw = data["linked_concepts"]
    if not isinstance(linked_raw, list) or not all(isinstance(c, str) for c in linked_raw):
        raise ValueError(f"Invalid note linked_concepts: {linked_raw!r}")
    created = data["created"]
    if created is not None and not isinstance(created, str):
        raise ValueError(f"Invalid note created: {created!r}")
    return cls(
        id=note_id,
        title=title,
        body=body,
        tags=list(tags_raw),
        linked_concepts=list(linked_raw),
        created=created,
    )
to_markdown
to_markdown() -> str

Render this note as a self-contained Markdown document.

Source code in src\cds\knowledge\notes.py
def to_markdown(self) -> str:
    """Render this note as a self-contained Markdown document."""
    lines: list[str] = [f"# {self.title}", ""]
    if self.created:
        lines += [f"_Created: {self.created}_", ""]
    if self.tags:
        lines += ["Tags: " + ", ".join(f"`{tag}`" for tag in self.tags), ""]
    lines += [self.body, ""]
    if self.linked_concepts:
        lines += ["## Linked concepts", ""]
        for concept in self.linked_concepts:
            lines.append(f"- `{concept}`")
        lines.append("")
    return "\n".join(lines)

Notebook dataclass

An ordered collection of research notes keyed by id.

Attributes:

Name Type Description
name str

human-readable notebook title (used in :meth:to_markdown).

notes dict[str, Note]

mapping of note id to :class:Note.

Source code in src\cds\knowledge\notes.py
@dataclass
class Notebook:
    """An ordered collection of research notes keyed by id.

    Attributes:
        name: human-readable notebook title (used in :meth:`to_markdown`).
        notes: mapping of note id to :class:`Note`.
    """

    name: str
    notes: dict[str, Note] = field(default_factory=dict)

    # ------------------------------------------------------------------ #
    # Construction & lookup
    # ------------------------------------------------------------------ #
    def add_note(
        self,
        note_id: str,
        title: str,
        body: str,
        tags: list[str] | None = None,
        linked_concepts: list[str] | None = None,
        created: str | None = None,
    ) -> Note:
        """Add a note, returning the stored :class:`Note`.

        If ``note_id`` already exists it is overwritten (last-write-wins),
        matching how a researcher edits a numbered entry in place.

        Returns:
            the stored :class:`Note`.
        """
        note = Note(
            id=note_id,
            title=title,
            body=body,
            tags=list(tags) if tags else [],
            linked_concepts=list(linked_concepts) if linked_concepts else [],
            created=created,
        )
        self.notes[note_id] = note
        return note

    def get_note(self, note_id: str) -> Note:
        """Return the note with ``note_id``.

        Raises:
            KeyError: if ``note_id`` is not in this notebook.
        """
        if note_id not in self.notes:
            raise KeyError(f"unknown note: {note_id!r}")
        return self.notes[note_id]

    def notes_for_concept(self, concept: str) -> list[Note]:
        """All notes that reference ``concept`` (by linked_concepts membership)."""
        return [note for note in self.notes.values() if concept in note.linked_concepts]

    def notes_by_tag(self, tag: str) -> list[Note]:
        """All notes carrying ``tag``."""
        return [note for note in self.notes.values() if tag in note.tags]

    # ------------------------------------------------------------------ #
    # Rendering & serialization
    # ------------------------------------------------------------------ #
    def to_markdown(self) -> str:
        """Render a compact index of this notebook's notes as Markdown."""
        lines: list[str] = [f"# Notebook: {self.name}", ""]
        if not self.notes:
            lines += ["_No notes._", ""]
        else:
            lines += ["## Notes", ""]
            for note_id in sorted(self.notes):
                note = self.notes[note_id]
                tags = f" ({', '.join(note.tags)})" if note.tags else ""
                lines.append(f"- **{note_id}**: {note.title}{tags}")
            lines.append("")
        return "\n".join(lines)

    def to_dict(self) -> dict[str, object]:
        """Serialize the notebook to a JSON-friendly dict."""
        return {
            "name": self.name,
            "notes": [note.to_dict() for note in self.notes.values()],
        }

    @classmethod
    def from_dict(cls, data: dict[str, object]) -> Notebook:
        """Reconstruct a :class:`Notebook` from :meth:`to_dict` output.

        Raises:
            ValueError: if ``data`` is missing keys or has the wrong types.
        """
        name = data["name"]
        if not isinstance(name, str):
            raise ValueError(f"Invalid notebook name: {name!r}")
        notes_raw = data["notes"]
        if not isinstance(notes_raw, list):
            raise ValueError(f"Invalid notes list: {notes_raw!r}")
        notebook = cls(name=name)
        for item in notes_raw:
            if not isinstance(item, dict):
                raise ValueError(f"Invalid note entry: {item!r}")
            note = Note.from_dict(item)
            notebook.notes[note.id] = note
        return notebook

    def save(self, path: str | Path) -> None:
        """Write this notebook to ``path`` as indented UTF-8 JSON."""
        Path(path).write_text(
            json.dumps(self.to_dict(), ensure_ascii=False, indent=2),
            encoding="utf-8",
        )

    @classmethod
    def load(cls, path: str | Path) -> Notebook:
        """Read a notebook previously written by :meth:`save`.

        Raises:
            ValueError: if the file does not contain valid notebook JSON.
        """
        data = json.loads(Path(path).read_text(encoding="utf-8"))
        if not isinstance(data, dict):
            raise ValueError(f"Invalid notebook file (expected object): {data!r}")
        return cls.from_dict(data)
Methods:
add_note
add_note(
    note_id: str,
    title: str,
    body: str,
    tags: list[str] | None = None,
    linked_concepts: list[str] | None = None,
    created: str | None = None,
) -> Note

Add a note, returning the stored :class:Note.

If note_id already exists it is overwritten (last-write-wins), matching how a researcher edits a numbered entry in place.

Returns:

Type Description
Note

the stored :class:Note.

Source code in src\cds\knowledge\notes.py
def add_note(
    self,
    note_id: str,
    title: str,
    body: str,
    tags: list[str] | None = None,
    linked_concepts: list[str] | None = None,
    created: str | None = None,
) -> Note:
    """Add a note, returning the stored :class:`Note`.

    If ``note_id`` already exists it is overwritten (last-write-wins),
    matching how a researcher edits a numbered entry in place.

    Returns:
        the stored :class:`Note`.
    """
    note = Note(
        id=note_id,
        title=title,
        body=body,
        tags=list(tags) if tags else [],
        linked_concepts=list(linked_concepts) if linked_concepts else [],
        created=created,
    )
    self.notes[note_id] = note
    return note
get_note
get_note(note_id: str) -> Note

Return the note with note_id.

Raises:

Type Description
KeyError

if note_id is not in this notebook.

Source code in src\cds\knowledge\notes.py
def get_note(self, note_id: str) -> Note:
    """Return the note with ``note_id``.

    Raises:
        KeyError: if ``note_id`` is not in this notebook.
    """
    if note_id not in self.notes:
        raise KeyError(f"unknown note: {note_id!r}")
    return self.notes[note_id]
notes_for_concept
notes_for_concept(concept: str) -> list[Note]

All notes that reference concept (by linked_concepts membership).

Source code in src\cds\knowledge\notes.py
def notes_for_concept(self, concept: str) -> list[Note]:
    """All notes that reference ``concept`` (by linked_concepts membership)."""
    return [note for note in self.notes.values() if concept in note.linked_concepts]
notes_by_tag
notes_by_tag(tag: str) -> list[Note]

All notes carrying tag.

Source code in src\cds\knowledge\notes.py
def notes_by_tag(self, tag: str) -> list[Note]:
    """All notes carrying ``tag``."""
    return [note for note in self.notes.values() if tag in note.tags]
to_markdown
to_markdown() -> str

Render a compact index of this notebook's notes as Markdown.

Source code in src\cds\knowledge\notes.py
def to_markdown(self) -> str:
    """Render a compact index of this notebook's notes as Markdown."""
    lines: list[str] = [f"# Notebook: {self.name}", ""]
    if not self.notes:
        lines += ["_No notes._", ""]
    else:
        lines += ["## Notes", ""]
        for note_id in sorted(self.notes):
            note = self.notes[note_id]
            tags = f" ({', '.join(note.tags)})" if note.tags else ""
            lines.append(f"- **{note_id}**: {note.title}{tags}")
        lines.append("")
    return "\n".join(lines)
to_dict
to_dict() -> dict[str, object]

Serialize the notebook to a JSON-friendly dict.

Source code in src\cds\knowledge\notes.py
def to_dict(self) -> dict[str, object]:
    """Serialize the notebook to a JSON-friendly dict."""
    return {
        "name": self.name,
        "notes": [note.to_dict() for note in self.notes.values()],
    }
from_dict classmethod
from_dict(data: dict[str, object]) -> Notebook

Reconstruct a :class:Notebook from :meth:to_dict output.

Raises:

Type Description
ValueError

if data is missing keys or has the wrong types.

Source code in src\cds\knowledge\notes.py
@classmethod
def from_dict(cls, data: dict[str, object]) -> Notebook:
    """Reconstruct a :class:`Notebook` from :meth:`to_dict` output.

    Raises:
        ValueError: if ``data`` is missing keys or has the wrong types.
    """
    name = data["name"]
    if not isinstance(name, str):
        raise ValueError(f"Invalid notebook name: {name!r}")
    notes_raw = data["notes"]
    if not isinstance(notes_raw, list):
        raise ValueError(f"Invalid notes list: {notes_raw!r}")
    notebook = cls(name=name)
    for item in notes_raw:
        if not isinstance(item, dict):
            raise ValueError(f"Invalid note entry: {item!r}")
        note = Note.from_dict(item)
        notebook.notes[note.id] = note
    return notebook
save
save(path: str | Path) -> None

Write this notebook to path as indented UTF-8 JSON.

Source code in src\cds\knowledge\notes.py
def save(self, path: str | Path) -> None:
    """Write this notebook to ``path`` as indented UTF-8 JSON."""
    Path(path).write_text(
        json.dumps(self.to_dict(), ensure_ascii=False, indent=2),
        encoding="utf-8",
    )
load classmethod
load(path: str | Path) -> Notebook

Read a notebook previously written by :meth:save.

Raises:

Type Description
ValueError

if the file does not contain valid notebook JSON.

Source code in src\cds\knowledge\notes.py
@classmethod
def load(cls, path: str | Path) -> Notebook:
    """Read a notebook previously written by :meth:`save`.

    Raises:
        ValueError: if the file does not contain valid notebook JSON.
    """
    data = json.loads(Path(path).read_text(encoding="utf-8"))
    if not isinstance(data, dict):
        raise ValueError(f"Invalid notebook file (expected object): {data!r}")
    return cls.from_dict(data)

SearchResult dataclass

A single ranked retrieval hit.

Attributes:

Name Type Description
concept_name str | None

the matched concept name, if the hit is a concept; None otherwise.

note_id str | None

the matched note id, if the hit is a note; None otherwise.

score float

relevance in [0, 1] — higher is better.

matched_on str

short label of the field that matched (e.g. "name", "description", "title").

Source code in src\cds\knowledge\retrieval.py
@dataclass
class SearchResult:
    """A single ranked retrieval hit.

    Attributes:
        concept_name: the matched concept name, if the hit is a concept;
            ``None`` otherwise.
        note_id: the matched note id, if the hit is a note; ``None`` otherwise.
        score: relevance in ``[0, 1]`` — higher is better.
        matched_on: short label of the field that matched
            (e.g. ``"name"``, ``"description"``, ``"title"``).
    """

    concept_name: str | None
    note_id: str | None
    score: float
    matched_on: str

Functions:

search

search(
    graph: KnowledgeGraph,
    notebook: Notebook,
    query: str,
    tag: str | None = None,
) -> list[SearchResult]

Combined ranked search over both a graph's concepts and a notebook's notes.

Results from :func:search_concepts and :func:search_notes are merged and re-ranked by score (desc) then by identifier (asc).

Parameters:

Name Type Description Default
graph KnowledgeGraph

the :class:KnowledgeGraph whose concepts to search.

required
notebook Notebook

the :class:Notebook whose notes to search.

required
query str

case-insensitive search text.

required
tag str | None

optional tag filter applied to both concepts and notes.

None

Returns:

Name Type Description
ranked list[SearchResult]

class:SearchResult list (best first, ties alphabetical).

Source code in src\cds\knowledge\retrieval.py
def search(
    graph: KnowledgeGraph,
    notebook: Notebook,
    query: str,
    tag: str | None = None,
) -> list[SearchResult]:
    """Combined ranked search over both a graph's concepts and a notebook's notes.

    Results from :func:`search_concepts` and :func:`search_notes` are merged
    and re-ranked by score (desc) then by identifier (asc).

    Args:
        graph: the :class:`KnowledgeGraph` whose concepts to search.
        notebook: the :class:`Notebook` whose notes to search.
        query: case-insensitive search text.
        tag: optional tag filter applied to both concepts and notes.

    Returns:
        ranked :class:`SearchResult` list (best first, ties alphabetical).
    """
    combined = search_concepts(graph, query, tag) + search_notes(notebook, query, tag)
    combined.sort(key=lambda r: (-r.score, r.concept_name or r.note_id or ""))
    return combined

search_concepts

search_concepts(
    graph: KnowledgeGraph,
    query: str,
    tag: str | None = None,
) -> list[SearchResult]

Find concepts in graph matching query.

A concept matches if its name matches the query exactly (score 1.0) or its name or description contains the query as a substring (score 0.5). When tag is given, only concepts carrying that tag are considered.

Parameters:

Name Type Description Default
graph KnowledgeGraph

the :class:KnowledgeGraph to search.

required
query str

case-insensitive search text.

required
tag str | None

optional tag filter; None disables filtering.

None

Returns:

Name Type Description
ranked list[SearchResult]

class:SearchResult list (best first, ties alphabetical).

Source code in src\cds\knowledge\retrieval.py
def search_concepts(
    graph: KnowledgeGraph,
    query: str,
    tag: str | None = None,
) -> list[SearchResult]:
    """Find concepts in ``graph`` matching ``query``.

    A concept matches if its name matches the query exactly (score 1.0) or
    its name or description contains the query as a substring (score 0.5).
    When ``tag`` is given, only concepts carrying that tag are considered.

    Args:
        graph: the :class:`KnowledgeGraph` to search.
        query: case-insensitive search text.
        tag: optional tag filter; ``None`` disables filtering.

    Returns:
        ranked :class:`SearchResult` list (best first, ties alphabetical).
    """
    needle = query.casefold()
    results: list[SearchResult] = []
    for name in sorted(graph.concepts):
        concept = graph.concepts[name]
        if tag is not None and tag not in concept.tags:
            continue
        name_folded = name.casefold()
        if name_folded == needle:
            results.append(
                SearchResult(
                    concept_name=name, note_id=None, score=NAME_TAG_SCORE, matched_on="name"
                )
            )
        elif needle in name_folded:
            results.append(
                SearchResult(
                    concept_name=name, note_id=None, score=SUBSTRING_SCORE, matched_on="name"
                )
            )
        elif concept.description is not None and needle in concept.description.casefold():
            results.append(
                SearchResult(
                    concept_name=name, note_id=None, score=SUBSTRING_SCORE, matched_on="description"
                )
            )
    results.sort(key=lambda r: (-r.score, r.concept_name or ""))
    return results

search_notes

search_notes(
    notebook: Notebook, query: str, tag: str | None = None
) -> list[SearchResult]

Find notes in notebook matching query.

A note matches if its title matches exactly (score 1.0) or its title or body contains the query as a substring (score 0.5). When tag is given, only notes carrying that tag are considered.

Parameters:

Name Type Description Default
notebook Notebook

the :class:Notebook to search.

required
query str

case-insensitive search text.

required
tag str | None

optional tag filter; None disables filtering.

None

Returns:

Name Type Description
ranked list[SearchResult]

class:SearchResult list (best first, ties alphabetical).

Source code in src\cds\knowledge\retrieval.py
def search_notes(
    notebook: Notebook,
    query: str,
    tag: str | None = None,
) -> list[SearchResult]:
    """Find notes in ``notebook`` matching ``query``.

    A note matches if its title matches exactly (score 1.0) or its title or
    body contains the query as a substring (score 0.5). When ``tag`` is
    given, only notes carrying that tag are considered.

    Args:
        notebook: the :class:`Notebook` to search.
        query: case-insensitive search text.
        tag: optional tag filter; ``None`` disables filtering.

    Returns:
        ranked :class:`SearchResult` list (best first, ties alphabetical).
    """
    needle = query.casefold()
    results: list[SearchResult] = []
    for note_id in sorted(notebook.notes):
        note = notebook.notes[note_id]
        if tag is not None and tag not in note.tags:
            continue
        title_folded = note.title.casefold()
        if title_folded == needle:
            results.append(
                SearchResult(
                    concept_name=None, note_id=note_id, score=NAME_TAG_SCORE, matched_on="title"
                )
            )
        elif needle in title_folded:
            results.append(
                SearchResult(
                    concept_name=None, note_id=note_id, score=SUBSTRING_SCORE, matched_on="title"
                )
            )
        elif needle in note.body.casefold():
            results.append(
                SearchResult(
                    concept_name=None, note_id=note_id, score=SUBSTRING_SCORE, matched_on="body"
                )
            )
    results.sort(key=lambda r: (-r.score, r.note_id or ""))
    return results

Data Analysis

CSV loading, normalisation, smoothing, and ASCII visualisation.

cds.data_analysis

Data loading, analysis and visualization helpers.

Classes

DataSet

A lightweight, pure Python 'DataFrame' for structured data.

Data is stored internally as a list of dictionaries where keys are column names.

Source code in src\cds\data_analysis\dataset.py
class DataSet:
    """A lightweight, pure Python 'DataFrame' for structured data.

    Data is stored internally as a list of dictionaries where keys are column names.
    """

    def __init__(self, data: list[Row]):
        self.data = data
        self._columns = list(data[0].keys()) if data else []

    @property
    def columns(self) -> list[str]:
        """Return the list of column names."""
        return self._columns

    @property
    def shape(self) -> tuple[int, int]:
        """Return (rows, columns) tuple."""
        return len(self.data), len(self._columns)

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, idx: int) -> Row:
        return self.data[idx]

    def column(self, name: str) -> list[Scalar]:
        """Extract a single column as a list."""
        if name not in self._columns:
            raise ValueError(f"Column '{name}' not found. Available: {self._columns}")
        return [row[name] for row in self.data]

    def filter(self, predicate: Callable[[Row], bool]) -> DataSet:
        """Filter the dataset based on a predicate function."""
        filtered_data = [row for row in self.data if predicate(row)]
        return DataSet(filtered_data)

    def head(self, n: int = 5) -> DataSet:
        """Return the first n rows."""
        return DataSet(self.data[:n])

    def tail(self, n: int = 5) -> DataSet:
        """Return the last n rows."""
        return DataSet(self.data[-n:])

    def select(self, *names: str) -> DataSet:
        """Select a subset of columns."""
        for name in names:
            if name not in self._columns:
                raise ValueError(f"Column '{name}' not found.")

        new_data = [{name: row[name] for name in names} for row in self.data]
        return DataSet(new_data)

    def group_by(self, column_name: str) -> DataGroup:
        """Group data by a specific column for aggregation."""
        if column_name not in self._columns:
            raise ValueError(f"Column '{column_name}' not found.")

        groups: dict[Scalar, list[Row]] = {}
        for row in self.data:
            key = row[column_name]
            if key not in groups:
                groups[key] = []
            groups[key].append(row)

        return DataGroup(groups, column_name)

    def to_list(self) -> list[Row]:
        """Export data as a list of dictionaries."""
        return [row.copy() for row in self.data]

    def __repr__(self) -> str:
        if not self.data:
            return "DataSet(empty)"
        return f"DataSet(rows={len(self.data)}, cols={len(self._columns)})"
Attributes
columns property
columns: list[str]

Return the list of column names.

shape property
shape: tuple[int, int]

Return (rows, columns) tuple.

Methods:
column
column(name: str) -> list[Scalar]

Extract a single column as a list.

Source code in src\cds\data_analysis\dataset.py
def column(self, name: str) -> list[Scalar]:
    """Extract a single column as a list."""
    if name not in self._columns:
        raise ValueError(f"Column '{name}' not found. Available: {self._columns}")
    return [row[name] for row in self.data]
filter
filter(predicate: Callable[[Row], bool]) -> DataSet

Filter the dataset based on a predicate function.

Source code in src\cds\data_analysis\dataset.py
def filter(self, predicate: Callable[[Row], bool]) -> DataSet:
    """Filter the dataset based on a predicate function."""
    filtered_data = [row for row in self.data if predicate(row)]
    return DataSet(filtered_data)
head
head(n: int = 5) -> DataSet

Return the first n rows.

Source code in src\cds\data_analysis\dataset.py
def head(self, n: int = 5) -> DataSet:
    """Return the first n rows."""
    return DataSet(self.data[:n])
tail
tail(n: int = 5) -> DataSet

Return the last n rows.

Source code in src\cds\data_analysis\dataset.py
def tail(self, n: int = 5) -> DataSet:
    """Return the last n rows."""
    return DataSet(self.data[-n:])
select
select(*names: str) -> DataSet

Select a subset of columns.

Source code in src\cds\data_analysis\dataset.py
def select(self, *names: str) -> DataSet:
    """Select a subset of columns."""
    for name in names:
        if name not in self._columns:
            raise ValueError(f"Column '{name}' not found.")

    new_data = [{name: row[name] for name in names} for row in self.data]
    return DataSet(new_data)
group_by
group_by(column_name: str) -> DataGroup

Group data by a specific column for aggregation.

Source code in src\cds\data_analysis\dataset.py
def group_by(self, column_name: str) -> DataGroup:
    """Group data by a specific column for aggregation."""
    if column_name not in self._columns:
        raise ValueError(f"Column '{column_name}' not found.")

    groups: dict[Scalar, list[Row]] = {}
    for row in self.data:
        key = row[column_name]
        if key not in groups:
            groups[key] = []
        groups[key].append(row)

    return DataGroup(groups, column_name)
to_list
to_list() -> list[Row]

Export data as a list of dictionaries.

Source code in src\cds\data_analysis\dataset.py
def to_list(self) -> list[Row]:
    """Export data as a list of dictionaries."""
    return [row.copy() for row in self.data]

DataTable dataclass

In-memory tabular data: a header row plus a list of string rows.

Source code in src\cds\data_analysis\loader.py
@dataclass
class DataTable:
    """In-memory tabular data: a header row plus a list of string rows."""

    headers: list[str] = field(default_factory=list)
    rows: list[list[str]] = field(default_factory=list)

    @property
    def n_rows(self) -> int:
        """Number of data rows (excluding the header)."""
        return len(self.rows)

    @property
    def n_cols(self) -> int:
        """Number of columns (i.e. number of header entries)."""
        return len(self.headers)

    def column(self, name: str) -> list[str]:
        """Return all values in the column identified by `name`."""
        idx = self.headers.index(name)
        return [row[idx] for row in self.rows]

    def column_as_float(self, name: str) -> list[float]:
        """Return a column as floats; raises ValueError if a cell is non-numeric."""
        return [float(v) for v in self.column(name)]

    def head(self, n: int = 5) -> list[list[str]]:
        """Return the first `n` rows (default 5) for quick inspection."""
        return self.rows[:n]

    def describe(self) -> dict[str, dict[str, float]]:
        """Quick summary stats for numeric columns."""
        from cds.stats.descriptive import mean, median, stdev

        result: dict[str, dict[str, float]] = {}
        for h in self.headers:
            try:
                vals = self.column_as_float(h)
                result[h] = {
                    "count": len(vals),
                    "mean": mean(vals),
                    "std": stdev(vals),
                    "min": min(vals),
                    "median": median(vals),
                    "max": max(vals),
                }
            except (ValueError, TypeError):
                # Non-numeric column — skip aggregation, keep prior result.
                pass
        return result
Attributes
n_rows property
n_rows: int

Number of data rows (excluding the header).

n_cols property
n_cols: int

Number of columns (i.e. number of header entries).

Methods:
column
column(name: str) -> list[str]

Return all values in the column identified by name.

Source code in src\cds\data_analysis\loader.py
def column(self, name: str) -> list[str]:
    """Return all values in the column identified by `name`."""
    idx = self.headers.index(name)
    return [row[idx] for row in self.rows]
column_as_float
column_as_float(name: str) -> list[float]

Return a column as floats; raises ValueError if a cell is non-numeric.

Source code in src\cds\data_analysis\loader.py
def column_as_float(self, name: str) -> list[float]:
    """Return a column as floats; raises ValueError if a cell is non-numeric."""
    return [float(v) for v in self.column(name)]
head
head(n: int = 5) -> list[list[str]]

Return the first n rows (default 5) for quick inspection.

Source code in src\cds\data_analysis\loader.py
def head(self, n: int = 5) -> list[list[str]]:
    """Return the first `n` rows (default 5) for quick inspection."""
    return self.rows[:n]
describe
describe() -> dict[str, dict[str, float]]

Quick summary stats for numeric columns.

Source code in src\cds\data_analysis\loader.py
def describe(self) -> dict[str, dict[str, float]]:
    """Quick summary stats for numeric columns."""
    from cds.stats.descriptive import mean, median, stdev

    result: dict[str, dict[str, float]] = {}
    for h in self.headers:
        try:
            vals = self.column_as_float(h)
            result[h] = {
                "count": len(vals),
                "mean": mean(vals),
                "std": stdev(vals),
                "min": min(vals),
                "median": median(vals),
                "max": max(vals),
            }
        except (ValueError, TypeError):
            # Non-numeric column — skip aggregation, keep prior result.
            pass
    return result

Functions:

load_csv

load_csv(path: str | Path) -> DataTable

Load a CSV file into a DataTable.

The first row is treated as headers; remaining rows are stored as strings. Raises FileNotFoundError if path does not exist.

Source code in src\cds\data_analysis\loader.py
def load_csv(path: str | Path) -> DataTable:
    """Load a CSV file into a DataTable.

    The first row is treated as headers; remaining rows are stored as strings.
    Raises FileNotFoundError if `path` does not exist.
    """
    p = Path(path)
    if not p.exists():
        raise FileNotFoundError(f"no such file: {p}")
    with open(p, newline="") as f:
        reader = csv.reader(f)
        headers = next(reader)
        rows = list(reader)
    return DataTable(headers=headers, rows=rows)

moving_average

moving_average(
    data: list[float], window: int = 3
) -> list[float]

Trailing moving average with the given window size (>= 1).

Source code in src\cds\data_analysis\transform.py
def moving_average(data: list[float], window: int = 3) -> list[float]:
    """Trailing moving average with the given window size (>= 1)."""

    if window < 1:
        raise ValueError("window must be >= 1")
    result = []
    for i in range(len(data)):
        start = max(0, i - window + 1)
        chunk = data[start : i + 1]
        result.append(sum(chunk) / len(chunk))
    return result

normalize

normalize(data: list[float]) -> list[float]

Min-max normalization to [0, 1].

Source code in src\cds\data_analysis\transform.py
def normalize(data: list[float]) -> list[float]:
    """Min-max normalization to [0, 1]."""
    lo, hi = min(data), max(data)
    rng = hi - lo
    if rng == 0:
        return [0.0] * len(data)
    return [(x - lo) / rng for x in data]

z_score

z_score(data: list[float]) -> list[float]

Standardize to mean=0, std=1.

Source code in src\cds\data_analysis\transform.py
def z_score(data: list[float]) -> list[float]:
    """Standardize to mean=0, std=1."""
    m = mean(data)
    s = stdev(data)
    if s == 0:
        return [0.0] * len(data)
    return [(x - m) / s for x in data]

plot_bar

plot_bar(
    data: dict[str, float],
    title: str = "Bar Chart",
    width: int = 50,
) -> str

Generate an ASCII bar chart from a dictionary.

Parameters:

Name Type Description Default
data dict[str, float]

Mapping from label to numeric value.

required
title str

Chart title.

'Bar Chart'
width int

Maximum bar width in characters.

50
Source code in src\cds\data_analysis\viz.py
def plot_bar(data: dict[str, float], title: str = "Bar Chart", width: int = 50) -> str:
    """Generate an ASCII bar chart from a dictionary.

    Args:
        data: Mapping from label to numeric value.
        title: Chart title.
        width: Maximum bar width in characters.
    """
    if not data:
        return "No data to plot."

    vals = list(data.values())
    max_val = max(vals)
    min_val = min(vals)

    lines = [f"\n[bold]{title}[/]", "─" * len(title)]

    # Simple normalization logic that respects 0 as a baseline
    limit = max(abs(max_val), abs(min_val), 1e-10)

    for label, val in data.items():
        # Calculate bar length relative to the largest absolute value
        bar_len = int((abs(val) / limit) * width)
        if val >= 0:
            bar = "█" * bar_len
            suffix = f" (+{val:.2f})"
        else:
            # Represent negative values with a different character or notation
            bar = "░" * bar_len
            suffix = f" ({val:.2f})"

        lines.append(f"{label:<15} | {bar}{suffix}")

    return "\n".join(lines)

plot_line

plot_line(
    y_values: list[float],
    title: str = "Line Plot",
    height: int = 10,
    width: int = 60,
) -> str

Generate a simple ASCII line plot.

Parameters:

Name Type Description Default
y_values list[float]

List of numeric values.

required
title str

Plot title.

'Line Plot'
height int

Number of rows.

10
width int

Number of columns (will sample data to fit).

60
Source code in src\cds\data_analysis\viz.py
def plot_line(
    y_values: list[float], title: str = "Line Plot", height: int = 10, width: int = 60
) -> str:
    """Generate a simple ASCII line plot.

    Args:
        y_values: List of numeric values.
        title: Plot title.
        height: Number of rows.
        width: Number of columns (will sample data to fit).
    """
    if not y_values:
        return "No data to plot."

    # Guard: width must be at least 2; otherwise stride below divides by zero.
    eff_width = max(2, width)

    # Sample/Interpolate to fit width
    if len(y_values) > eff_width:
        indices = [int(i * (len(y_values) - 1) / (eff_width - 1)) for i in range(eff_width)]
        sampled = [y_values[i] for i in indices]
    else:
        sampled = y_values
        eff_width = len(y_values)

    max_y = max(sampled)
    min_y = min(sampled)
    y_range = max_y - min_y if max_y != min_y else 1.0

    # Create grid
    grid = [[" " for _ in range(eff_width)] for _ in range(height)]

    for x, y in enumerate(sampled):
        # Calculate row (inverted because row 0 is top)
        norm_y = (y - min_y) / y_range
        row = height - 1 - int(norm_y * (height - 1))
        grid[row][x] = "•"

    lines = [f"\n[bold]{title}[/]", "─" * len(title)]
    for grid_row in grid:
        lines.append("".join(grid_row))

    min_y_str = f"{min_y:.2f}"
    max_y_str = f"{max_y:.2f}"

    lines.append(f"min: {min_y_str:<{eff_width // 2}}max: {max_y_str:>{eff_width // 2}}")
    return "\n".join(lines)

Educational NLP

From-scratch transformer primitives: BPE tokeniser, sinusoidal embeddings, attention, autograd, MiniGPT.

cds.nlp

Natural language processing primitives in pure Python.

Educational, from-scratch implementations of the building blocks used in modern language models — byte-pair encoding (BPE) tokenisation, the sinusoidal token / positional embeddings from the original Transformer paper, scaled dot-product and multi-head self-attention, the Transformer encoder block (GeLU FFN, LayerNorm, residual), a scalar-valued reverse-mode autograd engine with SGD/Adam optimisers, and a high-level training helper.

Designed for teaching, prototyping, and small-model experiments, not for production-scale training (no NumPy, no BLAS — performance is deliberately the trade-off for full transparency). The optional cds[fast-jit] extra brings in Numba for the matmul hot-path without changing the public surface.

Why this module exists inside CDS: - Demonstrates that the core ideas of transformer-era NLP are short, readable pure-Python code. - Provides a reproducible playground for tokenisation experiments without pulling in tokenizers / sentencepiece / torch. - Closes the loop on the educational NLP track: BPE + embeddings + attention + autograd = a learner can now train a tiny GPT and see every gradient flowing back through the graph because the engine is ~250 lines, not 50,000.

Scope (v0.10.0b1):

  • :func:~cds.nlp.bpe.train_bpe — train a BPE vocabulary from a corpus
  • :class:~cds.nlp.bpe.BPETokenizer — encode / decode / save / load
  • :class:~cds.nlp.embed.TokenEmbedding — token lookup table
  • :class:~cds.nlp.embed.PositionalEncoding — sinusoidal positions
  • :func:~cds.nlp.attention.scaled_dot_product_attention
  • :func:~cds.nlp.attention.multi_head_attention
  • :func:~cds.nlp.attention.causal_mask — decoder self-attention mask
  • :func:~cds.nlp.layers.gelu / :func:~cds.nlp.layers.layer_norm
  • :func:~cds.nlp.layers.feed_forward
  • :func:~cds.nlp.layers.transformer_block
  • :class:~cds.nlp.autograd.Tensor / :class:Parameter — scalar autograd
  • :func:~cds.nlp.autograd.matmul — nested-Tensor matmul
  • :class:~cds.nlp.optim.SGD / :class:Adam — optimisers
  • :func:~cds.nlp.training.cross_entropy / :func:train_step — loss + loop
  • :func:~cds.nlp.viz.render_attention_heatmap — ASCII attention heatmap
  • :func:~cds.nlp.viz.render_embedding_projection — ASCII 2-D PCA scatter
  • :func:~cds.nlp.viz.render_training_curve — ASCII loss curve

Out of scope for the educational track: - Mixed precision (FP16 / bfloat16) — meaningful only with the Numba backend, deferred for now. - Dropout, ALiBi, RoPE, GQA / MQA — modern attention refinements tracked for a later educational add-on. - Subword sampling tricks (BPE-Dropout, Unigram LM). - WordPiece / SentencePiece alternatives.

References
  • Sennrich, R., Haddow, B., & Birch, A. (2016). "Neural Machine Translation of Rare Words with Subword Units." ACL.
  • Vaswani, A. et al. (2017). "Attention Is All You Need." NeurIPS.
  • Gage, P. (1994). "A New Algorithm for Data Compression." C Users Journal.
  • Kingma, D. P., & Ba, J. (2014). "Adam: A Method for Stochastic Optimization." arXiv:1412.6980.
  • Karpathy, A. (2020). micrograd — the scalar autograd engine this module imitates.

Classes

Parameter

Bases: Tensor

A :class:Tensor that's a trainable weight.

Subclass of :class:Tensor with requires_grad=True by default. Use these for everything a model should learn (embeddings, attention projections, FFN weights, biases, etc.). The optimizer sees them via :func:cds.nlp.optim.parameters.

Initial values should be small and zero-centred; the simplest default is to wrap an existing :class:Tensor via Parameter(tensor.data).

Source code in src\cds\nlp\autograd\tensor.py
class Parameter(Tensor):
    """A :class:`Tensor` that's a trainable weight.

    Subclass of :class:`Tensor` with ``requires_grad=True`` by default.
    Use these for everything a model should learn (embeddings, attention
    projections, FFN weights, biases, etc.). The optimizer sees them
    via :func:`cds.nlp.optim.parameters`.

    Initial values should be small and zero-centred; the simplest
    default is to wrap an existing :class:`Tensor` via
    ``Parameter(tensor.data)``.
    """

    def __init__(self, value: Scalar) -> None:
        super().__init__(data=float(value), requires_grad=True)

Tensor dataclass

A scalar value with optional gradient tracking.

Attributes:

Name Type Description
data Scalar

The numeric value (always a Python float — the educational track stays in scalars; vector ops are expressed as nested Tensor lists).

requires_grad bool

If True, backward() will populate grad.

grad Scalar

The running gradient (initialised to 0.0 on first backward()).

_backward BackwardFn

A closure set by each op that propagates grad to _prev. None for leaf nodes.

_prev set[Tensor]

The set of :class:Tensor nodes that produced this node (the parents in the dep graph).

Source code in src\cds\nlp\autograd\tensor.py
@dataclass(eq=False)
class Tensor:
    """A scalar value with optional gradient tracking.

    Attributes:
        data: The numeric value (always a Python ``float`` — the
            educational track stays in scalars; vector ops are
            expressed as nested ``Tensor`` lists).
        requires_grad: If True, ``backward()`` will populate ``grad``.
        grad: The running gradient (initialised to 0.0 on first
            ``backward()``).
        _backward: A closure set by each op that propagates ``grad``
            to ``_prev``. ``None`` for leaf nodes.
        _prev: The set of :class:`Tensor` nodes that produced this
            node (the parents in the dep graph).
    """

    data: Scalar
    requires_grad: bool = False
    grad: Scalar = 0.0
    _backward: BackwardFn = field(default=None, repr=False)
    _prev: set[Tensor] = field(default_factory=set, repr=False)

    def __repr__(self) -> str:
        grad_str = f", grad={self.grad}" if self.requires_grad else ""
        return f"Tensor(data={self.data}{grad_str})"

    # ------------------------------------------------------------------ #
    # Operator overloads — implemented inline so mypy strict sees them.
    # ------------------------------------------------------------------ #

    def __add__(self, other: Tensor | float | int) -> Tensor:
        return _binop("+", self, other)

    def __radd__(self, other: float | int) -> Tensor:
        return _binop("+", other, self)

    def __sub__(self, other: Tensor | float | int) -> Tensor:
        return _binop("-", self, other)

    def __rsub__(self, other: float | int) -> Tensor:
        return _binop("-", other, self)

    def __mul__(self, other: Tensor | float | int) -> Tensor:
        return _binop("*", self, other)

    def __rmul__(self, other: float | int) -> Tensor:
        return _binop("*", other, self)

    def __truediv__(self, other: Tensor | float | int) -> Tensor:
        return _binop("/", self, other)

    def __rtruediv__(self, other: float | int) -> Tensor:
        return _binop("/", other, self)

    def __neg__(self) -> Tensor:
        return neg(self)

    def __pos__(self) -> Tensor:
        return self

    def __pow__(self, exponent: float) -> Tensor:
        # Return ``NotImplemented`` for unsupported operand types instead of
        # raising — this is the Pythonic contract for arithmetic dunders
        # (lets Python try the reflected ``__rpow__`` and only raise a real
        # ``TypeError`` if neither side can handle it). CodeQL's
        # ``unexpected-raise-in-special-method`` flags ``raise`` in dunders
        # precisely because it short-circuits that reflection protocol.
        if not isinstance(exponent, (int, float)):
            # Returning NotImplemented is correct here even though the declared
            # return type is Tensor: CPython's binary-operator dispatch
            # consumes the value (it never reaches user code), and mypy
            # models NotImplemented as compatible with arithmetic-dunder
            # return types for exactly this reason.
            return NotImplemented
        c = float(exponent)

        def _backward() -> None:
            self.grad += c * (self.data ** (c - 1.0)) * out.grad

        out = Tensor(data=self.data**c)
        return _track(out, (self,), _backward)

    # ------------------------------------------------------------------ #
    # Gradient propagation
    # ------------------------------------------------------------------ #

    def backward(self) -> None:
        """Compute gradients via reverse-mode autodiff.

        Builds a post-order traversal of the graph rooted at this
        node (children before parents) then walks it in reverse,
        calling each ``_backward`` closure to chain the gradient
        back to leaves. Sets every visited leaf's ``.grad`` to the
        accumulated value.
        """
        if not self.requires_grad:
            raise RuntimeError("backward() called on a Tensor with requires_grad=False")

        # Iterative post-order DFS. We push ``(node, processed)``
        # tuples; on the first visit we re-push the node with
        # ``processed=True`` after scheduling its children, so the
        # node only gets appended to ``topo`` after every descendant
        # is already in place.
        topo: list[Tensor] = []
        visited: set[Tensor] = set()
        work: list[tuple[Tensor, bool]] = [(self, False)]
        while work:
            node, processed = work.pop()
            if processed:
                topo.append(node)
                continue
            if node in visited:
                continue
            visited.add(node)
            work.append((node, True))
            for child in node._prev:
                if child not in visited:
                    work.append((child, False))
        # ``topo`` is post-order (children before parents). Reverse
        # so the backward pass walks parents first — that propagates
        # the seed gradient correctly.
        topo.reverse()

        # Seed the output gradient.
        self.grad = 1.0

        for node in topo:
            if node._backward is not None:
                node._backward()

    def zero_grad(self) -> None:
        """Reset ``grad`` to 0 in this node and all reachable leaves.

        Call between training steps so gradients don't accumulate
        across batches (PyTorch's ``optim.zero_grad()`` semantics).
        """
        topo: list[Tensor] = []
        visited: set[Tensor] = set()
        stack: list[Tensor] = [self]
        while stack:
            node = stack.pop()
            # Defensive duplicate-pop guard. Unreachable given the LIFO
            # stack + the ``child not in visited`` filter below: that pair
            # mathematically prevents any node from being pushed twice, so
            # the re-pop never happens. Kept to mirror ``backward()``'s
            # defensive structure and to stay robust if the push filter
            # is ever relaxed.
            if node in visited:  # pragma: no cover
                continue
            visited.add(node)
            topo.append(node)
            for child in node._prev:
                if child not in visited:
                    stack.append(child)
        for node in topo:
            node.grad = 0.0
Methods:
backward
backward() -> None

Compute gradients via reverse-mode autodiff.

Builds a post-order traversal of the graph rooted at this node (children before parents) then walks it in reverse, calling each _backward closure to chain the gradient back to leaves. Sets every visited leaf's .grad to the accumulated value.

Source code in src\cds\nlp\autograd\tensor.py
def backward(self) -> None:
    """Compute gradients via reverse-mode autodiff.

    Builds a post-order traversal of the graph rooted at this
    node (children before parents) then walks it in reverse,
    calling each ``_backward`` closure to chain the gradient
    back to leaves. Sets every visited leaf's ``.grad`` to the
    accumulated value.
    """
    if not self.requires_grad:
        raise RuntimeError("backward() called on a Tensor with requires_grad=False")

    # Iterative post-order DFS. We push ``(node, processed)``
    # tuples; on the first visit we re-push the node with
    # ``processed=True`` after scheduling its children, so the
    # node only gets appended to ``topo`` after every descendant
    # is already in place.
    topo: list[Tensor] = []
    visited: set[Tensor] = set()
    work: list[tuple[Tensor, bool]] = [(self, False)]
    while work:
        node, processed = work.pop()
        if processed:
            topo.append(node)
            continue
        if node in visited:
            continue
        visited.add(node)
        work.append((node, True))
        for child in node._prev:
            if child not in visited:
                work.append((child, False))
    # ``topo`` is post-order (children before parents). Reverse
    # so the backward pass walks parents first — that propagates
    # the seed gradient correctly.
    topo.reverse()

    # Seed the output gradient.
    self.grad = 1.0

    for node in topo:
        if node._backward is not None:
            node._backward()
zero_grad
zero_grad() -> None

Reset grad to 0 in this node and all reachable leaves.

Call between training steps so gradients don't accumulate across batches (PyTorch's optim.zero_grad() semantics).

Source code in src\cds\nlp\autograd\tensor.py
def zero_grad(self) -> None:
    """Reset ``grad`` to 0 in this node and all reachable leaves.

    Call between training steps so gradients don't accumulate
    across batches (PyTorch's ``optim.zero_grad()`` semantics).
    """
    topo: list[Tensor] = []
    visited: set[Tensor] = set()
    stack: list[Tensor] = [self]
    while stack:
        node = stack.pop()
        # Defensive duplicate-pop guard. Unreachable given the LIFO
        # stack + the ``child not in visited`` filter below: that pair
        # mathematically prevents any node from being pushed twice, so
        # the re-pop never happens. Kept to mirror ``backward()``'s
        # defensive structure and to stay robust if the push filter
        # is ever relaxed.
        if node in visited:  # pragma: no cover
            continue
        visited.add(node)
        topo.append(node)
        for child in node._prev:
            if child not in visited:
                stack.append(child)
    for node in topo:
        node.grad = 0.0

BPEMerge dataclass

A single BPE merge rule.

Attributes:

Name Type Description
pair tuple[str, str]

The adjacent symbol pair that was merged.

rank int

Priority — lower rank = applied earlier. When two merges could both apply, the lower-rank one wins. (Greedy encoding relies on this.)

new_token str

The merged token string (= pair[0] + pair[1]).

Source code in src\cds\nlp\bpe.py
@dataclass
class BPEMerge:
    """A single BPE merge rule.

    Attributes:
        pair: The adjacent symbol pair that was merged.
        rank: Priority — lower rank = applied earlier. When two merges
            could both apply, the lower-rank one wins. (Greedy encoding
            relies on this.)
        new_token: The merged token string (= ``pair[0] + pair[1]``).
    """

    pair: tuple[str, str]
    rank: int
    new_token: str

    def to_dict(self) -> dict[str, object]:
        """Serialize this merge to a JSON-friendly dict."""
        return {"pair": list(self.pair), "rank": self.rank, "new_token": self.new_token}

    @classmethod
    def from_dict(cls, data: dict[str, object]) -> BPEMerge:
        """Reconstruct a :class:`BPEMerge` from :meth:`to_dict` output.

        Raises:
            ValueError: if ``data`` is missing keys, has the wrong types,
                or the ``pair`` does not contain exactly two strings.
        """
        pair_raw = data["pair"]
        if not isinstance(pair_raw, list) or len(pair_raw) != 2:
            raise ValueError(f"Invalid pair in BPE merge: {pair_raw!r}")
        a_raw, b_raw = pair_raw[0], pair_raw[1]
        if not isinstance(a_raw, str) or not isinstance(b_raw, str):
            raise ValueError(f"Invalid pair components: {pair_raw!r}")
        rank_raw = data["rank"]
        if not isinstance(rank_raw, int) or isinstance(rank_raw, bool):
            raise ValueError(f"Invalid rank: {rank_raw!r}")
        token_raw = data["new_token"]
        if not isinstance(token_raw, str):
            raise ValueError(f"Invalid new_token: {token_raw!r}")
        return cls(pair=(a_raw, b_raw), rank=rank_raw, new_token=token_raw)
Methods:
to_dict
to_dict() -> dict[str, object]

Serialize this merge to a JSON-friendly dict.

Source code in src\cds\nlp\bpe.py
def to_dict(self) -> dict[str, object]:
    """Serialize this merge to a JSON-friendly dict."""
    return {"pair": list(self.pair), "rank": self.rank, "new_token": self.new_token}
from_dict classmethod
from_dict(data: dict[str, object]) -> BPEMerge

Reconstruct a :class:BPEMerge from :meth:to_dict output.

Raises:

Type Description
ValueError

if data is missing keys, has the wrong types, or the pair does not contain exactly two strings.

Source code in src\cds\nlp\bpe.py
@classmethod
def from_dict(cls, data: dict[str, object]) -> BPEMerge:
    """Reconstruct a :class:`BPEMerge` from :meth:`to_dict` output.

    Raises:
        ValueError: if ``data`` is missing keys, has the wrong types,
            or the ``pair`` does not contain exactly two strings.
    """
    pair_raw = data["pair"]
    if not isinstance(pair_raw, list) or len(pair_raw) != 2:
        raise ValueError(f"Invalid pair in BPE merge: {pair_raw!r}")
    a_raw, b_raw = pair_raw[0], pair_raw[1]
    if not isinstance(a_raw, str) or not isinstance(b_raw, str):
        raise ValueError(f"Invalid pair components: {pair_raw!r}")
    rank_raw = data["rank"]
    if not isinstance(rank_raw, int) or isinstance(rank_raw, bool):
        raise ValueError(f"Invalid rank: {rank_raw!r}")
    token_raw = data["new_token"]
    if not isinstance(token_raw, str):
        raise ValueError(f"Invalid new_token: {token_raw!r}")
    return cls(pair=(a_raw, b_raw), rank=rank_raw, new_token=token_raw)

BPETokenizer dataclass

A trained byte-pair encoding tokenizer.

Construct one with :func:train_bpe, or load a previously saved one with :meth:load. Encoding is greedy: scan input left-to-right, find the longest contiguous substring that is in the vocab, emit its id, advance. Unknown characters fall back to <unk> (id 0).

Attributes:

Name Type Description
vocab dict[str, int]

Mapping from token string → integer id.

id_to_token dict[int, str]

Inverse of vocab (built lazily on demand).

merges list[BPEMerge]

List of BPE merge rules, in training order. The index of a rule is its priority (lower = earlier).

eow str

End-of-word marker symbol appended during training.

Source code in src\cds\nlp\bpe.py
@dataclass
class BPETokenizer:
    """A trained byte-pair encoding tokenizer.

    Construct one with :func:`train_bpe`, or load a previously saved one
    with :meth:`load`. Encoding is greedy: scan input left-to-right,
    find the longest contiguous substring that is in the vocab, emit
    its id, advance. Unknown characters fall back to ``<unk>`` (id 0).

    Attributes:
        vocab: Mapping from token string → integer id.
        id_to_token: Inverse of ``vocab`` (built lazily on demand).
        merges: List of BPE merge rules, in training order. The index
            of a rule is its priority (lower = earlier).
        eow: End-of-word marker symbol appended during training.
    """

    vocab: dict[str, int] = field(default_factory=dict)
    merges: list[BPEMerge] = field(default_factory=list)
    eow: str = _END_OF_WORD

    def __post_init__(self) -> None:
        # Build the inverse vocabulary. We keep it cached so decode() is
        # O(N) rather than O(N log V).
        self._id_to_token: dict[int, str] = {i: t for t, i in self.vocab.items()}

    @property
    def vocab_size(self) -> int:
        """Number of tokens in the vocabulary (including specials)."""
        return len(self.vocab)

    @property
    def id_to_token(self) -> dict[int, str]:
        """Read-only view of id → token mapping."""
        return dict(self._id_to_token)

    # ------------------------------------------------------------------ #
    # Encoding
    # ------------------------------------------------------------------ #

    def encode(self, text: str, add_specials: bool = False) -> list[int]:
        """Encode ``text`` into a list of token ids.

        Greedy longest-match: scan the input, at each position take the
        longest substring that exists in the vocabulary. If no character
        matches, emit ``<unk>`` and advance by one.

        Args:
            text: Raw input string. Will be pre-tokenised on whitespace
                and punctuation.
            add_specials: If True, prepend ``<bos>`` (id 2) and append
                ``<eos>`` (id 3). Off by default — the educational
                pipeline wants raw token streams.
        """
        if not self.vocab:
            raise ValueError("Vocabulary is empty. Train or load a tokenizer first.")

        words = _pre_tokenize(text)
        ids: list[int] = []
        if add_specials:
            bos_id = self.vocab.get(BOS)
            if bos_id is not None:
                ids.append(bos_id)

        for word in words:
            ids.extend(self._encode_word(word))

        if add_specials:
            eos_id = self.vocab.get(EOS)
            if eos_id is not None:
                ids.append(eos_id)
        return ids

    def _encode_word(self, word: str) -> list[int]:
        """Greedy-encode a single pre-tokenised word.

        We try every contiguous substring starting at the current cursor,
        longest first, and pick the first one that's in the vocab. This
        is O(L²) per word but L is small (typical English word ≤ 20 chars).
        """
        symbols = _word_to_symbols(word)
        result: list[int] = []
        i = 0
        n = len(symbols)
        while i < n:
            # Longest contiguous substring starting at i that is in vocab.
            matched = False
            # Walk from i+1 to i+n inclusive, looking for the longest
            # substring that exists in vocab.
            for j in range(n, i, -1):
                candidate = "".join(symbols[i:j])
                token_id = self.vocab.get(candidate)
                if token_id is not None:
                    result.append(token_id)
                    i = j
                    matched = True
                    break
            if not matched:
                # Single character not in vocab — emit unk and skip it.
                # This is unreachable in practice because every training
                # character is in the base vocab, but keep the guard.
                unk_id = self.vocab.get(UNK, 0)
                result.append(unk_id)
                i += 1
        return result

    # ------------------------------------------------------------------ #
    # Decoding
    # ------------------------------------------------------------------ #

    def decode(self, ids: list[int], strip_eow: bool = True) -> str:
        """Decode a list of ids back to a string.

        Args:
            ids: Token id sequence (must be non-negative integers in
                ``[0, vocab_size)``).
            strip_eow: If True (default), drop the ``</w>`` marker from
                the end of each word and join with spaces. Works for
                both the literal ``</w>`` token *and* merged tokens
                that end in ``</w>`` (e.g. ``"low</w>"``). If False,
                concatenate the raw token strings.
        """
        if strip_eow:
            words: list[str] = []
            current: list[str] = []
            for tid in ids:
                tok = self._id_to_token.get(tid)
                if tok is None:
                    raise ValueError(f"Unknown token id: {tid}")
                if tok == UNK:
                    current.append("�")
                elif tok.endswith(_END_OF_WORD):
                    # Either a literal eow token ("</w>") or a merged
                    # token that ends with it ("low</w>"). Flush the
                    # current word minus the suffix.
                    stem = tok[: -len(_END_OF_WORD)]
                    current.append(stem)
                    words.append("".join(current))
                    current = []
                else:
                    current.append(tok)
            if current:
                words.append("".join(current))
            # Collapse empty strings (from consecutive EOS/PAD tokens)
            words = [w for w in words if w]
            return " ".join(words)
        # Raw concatenation — useful for inspecting token boundaries.
        return "".join(self._id_to_token.get(tid, "") for tid in ids)

    # ------------------------------------------------------------------ #
    # Persistence
    # ------------------------------------------------------------------ #

    def save(self, path: str | Path) -> None:
        """Save the tokenizer to a JSON file.

        Format::

            {
                "vocab": {"<unk>": 0, "a": 1, ...},
                "merges": [{"pair": ["a", "b"], "rank": 0, "new_token": "ab"}, ...],
                "eow": "</w>"
            }
        """
        payload = {
            "vocab": self.vocab,
            "merges": [m.to_dict() for m in self.merges],
            "eow": self.eow,
        }
        Path(path).write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")

    @classmethod
    def load(cls, path: str | Path) -> BPETokenizer:
        """Load a tokenizer previously saved with :meth:`save`."""
        data = json.loads(Path(path).read_text(encoding="utf-8"))
        merges_raw = data.get("merges", [])
        if not isinstance(merges_raw, list):
            raise ValueError("Invalid merges field in tokenizer file")
        merges: list[BPEMerge] = []
        for raw_merge in merges_raw:
            if not isinstance(raw_merge, dict):
                raise ValueError(f"Invalid merge entry: {raw_merge!r}")
            merges.append(BPEMerge.from_dict(raw_merge))
        return cls(
            vocab=dict(data["vocab"]),
            merges=merges,
            eow=str(data.get("eow", _END_OF_WORD)),
        )
Attributes
vocab_size property
vocab_size: int

Number of tokens in the vocabulary (including specials).

id_to_token property
id_to_token: dict[int, str]

Read-only view of id → token mapping.

Methods:
encode
encode(text: str, add_specials: bool = False) -> list[int]

Encode text into a list of token ids.

Greedy longest-match: scan the input, at each position take the longest substring that exists in the vocabulary. If no character matches, emit <unk> and advance by one.

Parameters:

Name Type Description Default
text str

Raw input string. Will be pre-tokenised on whitespace and punctuation.

required
add_specials bool

If True, prepend <bos> (id 2) and append <eos> (id 3). Off by default — the educational pipeline wants raw token streams.

False
Source code in src\cds\nlp\bpe.py
def encode(self, text: str, add_specials: bool = False) -> list[int]:
    """Encode ``text`` into a list of token ids.

    Greedy longest-match: scan the input, at each position take the
    longest substring that exists in the vocabulary. If no character
    matches, emit ``<unk>`` and advance by one.

    Args:
        text: Raw input string. Will be pre-tokenised on whitespace
            and punctuation.
        add_specials: If True, prepend ``<bos>`` (id 2) and append
            ``<eos>`` (id 3). Off by default — the educational
            pipeline wants raw token streams.
    """
    if not self.vocab:
        raise ValueError("Vocabulary is empty. Train or load a tokenizer first.")

    words = _pre_tokenize(text)
    ids: list[int] = []
    if add_specials:
        bos_id = self.vocab.get(BOS)
        if bos_id is not None:
            ids.append(bos_id)

    for word in words:
        ids.extend(self._encode_word(word))

    if add_specials:
        eos_id = self.vocab.get(EOS)
        if eos_id is not None:
            ids.append(eos_id)
    return ids
decode
decode(ids: list[int], strip_eow: bool = True) -> str

Decode a list of ids back to a string.

Parameters:

Name Type Description Default
ids list[int]

Token id sequence (must be non-negative integers in [0, vocab_size)).

required
strip_eow bool

If True (default), drop the </w> marker from the end of each word and join with spaces. Works for both the literal </w> token and merged tokens that end in </w> (e.g. "low</w>"). If False, concatenate the raw token strings.

True
Source code in src\cds\nlp\bpe.py
def decode(self, ids: list[int], strip_eow: bool = True) -> str:
    """Decode a list of ids back to a string.

    Args:
        ids: Token id sequence (must be non-negative integers in
            ``[0, vocab_size)``).
        strip_eow: If True (default), drop the ``</w>`` marker from
            the end of each word and join with spaces. Works for
            both the literal ``</w>`` token *and* merged tokens
            that end in ``</w>`` (e.g. ``"low</w>"``). If False,
            concatenate the raw token strings.
    """
    if strip_eow:
        words: list[str] = []
        current: list[str] = []
        for tid in ids:
            tok = self._id_to_token.get(tid)
            if tok is None:
                raise ValueError(f"Unknown token id: {tid}")
            if tok == UNK:
                current.append("�")
            elif tok.endswith(_END_OF_WORD):
                # Either a literal eow token ("</w>") or a merged
                # token that ends with it ("low</w>"). Flush the
                # current word minus the suffix.
                stem = tok[: -len(_END_OF_WORD)]
                current.append(stem)
                words.append("".join(current))
                current = []
            else:
                current.append(tok)
        if current:
            words.append("".join(current))
        # Collapse empty strings (from consecutive EOS/PAD tokens)
        words = [w for w in words if w]
        return " ".join(words)
    # Raw concatenation — useful for inspecting token boundaries.
    return "".join(self._id_to_token.get(tid, "") for tid in ids)
save
save(path: str | Path) -> None

Save the tokenizer to a JSON file.

Format::

{
    "vocab": {"<unk>": 0, "a": 1, ...},
    "merges": [{"pair": ["a", "b"], "rank": 0, "new_token": "ab"}, ...],
    "eow": "</w>"
}
Source code in src\cds\nlp\bpe.py
def save(self, path: str | Path) -> None:
    """Save the tokenizer to a JSON file.

    Format::

        {
            "vocab": {"<unk>": 0, "a": 1, ...},
            "merges": [{"pair": ["a", "b"], "rank": 0, "new_token": "ab"}, ...],
            "eow": "</w>"
        }
    """
    payload = {
        "vocab": self.vocab,
        "merges": [m.to_dict() for m in self.merges],
        "eow": self.eow,
    }
    Path(path).write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
load classmethod
load(path: str | Path) -> BPETokenizer

Load a tokenizer previously saved with :meth:save.

Source code in src\cds\nlp\bpe.py
@classmethod
def load(cls, path: str | Path) -> BPETokenizer:
    """Load a tokenizer previously saved with :meth:`save`."""
    data = json.loads(Path(path).read_text(encoding="utf-8"))
    merges_raw = data.get("merges", [])
    if not isinstance(merges_raw, list):
        raise ValueError("Invalid merges field in tokenizer file")
    merges: list[BPEMerge] = []
    for raw_merge in merges_raw:
        if not isinstance(raw_merge, dict):
            raise ValueError(f"Invalid merge entry: {raw_merge!r}")
        merges.append(BPEMerge.from_dict(raw_merge))
    return cls(
        vocab=dict(data["vocab"]),
        merges=merges,
        eow=str(data.get("eow", _END_OF_WORD)),
    )

PositionalEncoding dataclass

Sinusoidal positional encoding from Vaswani et al. (2017).

PE(pos, 2i) = sin(pos / 10000^(2i / d_model)) PE(pos, 2i+1) = cos(pos / 10000^(2i / d_model))

where pos is the zero-based position and i indexes the embedding dimension. The matrix is computed once at construction and reused for every forward pass.

Attributes:

Name Type Description
max_len int

Maximum sequence length the encoding supports.

d_model int

Embedding dimensionality (must match the token embedding it's added to).

matrix list[list[float]]

The precomputed max_len × d_model encoding matrix.

Source code in src\cds\nlp\embed.py
@dataclass
class PositionalEncoding:
    """Sinusoidal positional encoding from Vaswani et al. (2017).

    PE(pos, 2i)   = sin(pos / 10000^(2i / d_model))
    PE(pos, 2i+1) = cos(pos / 10000^(2i / d_model))

    where ``pos`` is the zero-based position and ``i`` indexes the
    embedding dimension. The matrix is computed once at construction
    and reused for every forward pass.

    Attributes:
        max_len: Maximum sequence length the encoding supports.
        d_model: Embedding dimensionality (must match the token
            embedding it's added to).
        matrix: The precomputed ``max_len × d_model`` encoding matrix.
    """

    max_len: int
    d_model: int
    matrix: list[list[float]] = field(init=False)

    def __post_init__(self) -> None:
        if self.max_len <= 0:
            raise ValueError(f"max_len must be > 0, got {self.max_len}")
        if self.d_model <= 0:
            raise ValueError(f"d_model must be > 0, got {self.d_model}")

        self.matrix = _make_matrix(self.max_len, self.d_model)
        for pos in range(self.max_len):
            for i in range(self.d_model):
                # Even index → sin, odd index → cos. The exponent
                # 10000^(2i/d_model) grows geometrically across the
                # embedding dimension so each position gets a unique
                # low-frequency pattern.
                div_term = 10000.0 ** ((2 * (i // 2)) / self.d_model)
                angle = pos / div_term
                self.matrix[pos][i] = math.sin(angle) if i % 2 == 0 else math.cos(angle)

    def forward(self, length: int) -> list[list[float]]:
        """Return the first ``length`` rows of the encoding matrix.

        Args:
            length: Desired output length (must be ``<= max_len``).
        """
        if length < 0:
            raise ValueError(f"length must be >= 0, got {length}")
        if length > self.max_len:
            raise ValueError(f"length {length} exceeds max_len {self.max_len}")
        return [list(self.matrix[i]) for i in range(length)]

    @property
    def shape(self) -> tuple[int, int]:
        """Returns ``(max_len, d_model)``."""
        return (self.max_len, self.d_model)
Attributes
shape property
shape: tuple[int, int]

Returns (max_len, d_model).

Methods:
forward
forward(length: int) -> list[list[float]]

Return the first length rows of the encoding matrix.

Parameters:

Name Type Description Default
length int

Desired output length (must be <= max_len).

required
Source code in src\cds\nlp\embed.py
def forward(self, length: int) -> list[list[float]]:
    """Return the first ``length`` rows of the encoding matrix.

    Args:
        length: Desired output length (must be ``<= max_len``).
    """
    if length < 0:
        raise ValueError(f"length must be >= 0, got {length}")
    if length > self.max_len:
        raise ValueError(f"length {length} exceeds max_len {self.max_len}")
    return [list(self.matrix[i]) for i in range(length)]

TokenEmbedding dataclass

A token-id → dense-vector lookup table.

Initialised with small random values from a fixed RNG seed so the educational pipeline is reproducible. Training (gradient updates) is handled by the autograd module; this layer is read-only until then — call :meth:forward to embed, then call :meth:set_value to hand-write weights.

Attributes:

Name Type Description
vocab_size int

Number of rows in the embedding table.

d_model int

Embedding dimensionality (output width).

matrix list[list[float]]

The vocab_size × d_model weight matrix as nested lists. matrix[id][j] is the j-th component of the embedding for token id id.

Source code in src\cds\nlp\embed.py
@dataclass
class TokenEmbedding:
    """A token-id → dense-vector lookup table.

    Initialised with small random values from a fixed RNG seed so the
    educational pipeline is reproducible. Training (gradient updates)
    is handled by the autograd module; this layer is read-only until
    then — call :meth:`forward` to embed, then call :meth:`set_value`
    to hand-write weights.

    Attributes:
        vocab_size: Number of rows in the embedding table.
        d_model: Embedding dimensionality (output width).
        matrix: The ``vocab_size × d_model`` weight matrix as nested
            lists. ``matrix[id][j]`` is the j-th component of the
            embedding for token id ``id``.
    """

    vocab_size: int
    d_model: int
    matrix: list[list[float]] = field(init=False)

    def __post_init__(self) -> None:
        if self.vocab_size <= 0:
            raise ValueError(f"vocab_size must be > 0, got {self.vocab_size}")
        if self.d_model <= 0:
            raise ValueError(f"d_model must be > 0, got {self.d_model}")
        # Deterministic init via a fixed seed so test runs are
        # reproducible. The exact init scheme is Xavier/Glorot-uniform,
        # truncated so that max weight ≈ 1/sqrt(d_model).
        import random

        rng = random.Random(0xC0FFEE)
        bound = 1.0 / math.sqrt(self.d_model)
        self.matrix = [
            [rng.uniform(-bound, bound) for _ in range(self.d_model)]
            for _ in range(self.vocab_size)
        ]

    def forward(self, ids: list[int]) -> list[list[float]]:
        """Look up embeddings for a sequence of token ids.

        Args:
            ids: List of token ids (must be in ``[0, vocab_size)``).

        Returns:
            A ``len(ids) × d_model`` matrix (nested list).
        """
        out: list[list[float]] = []
        for tid in ids:
            if tid < 0 or tid >= self.vocab_size:
                raise IndexError(f"Token id {tid} out of range [0, {self.vocab_size})")
            # Defensive copy — callers might mutate the result without
            # poisoning the table.
            out.append(list(self.matrix[tid]))
        return out

    def set_value(self, token_id: int, values: list[float]) -> None:
        """Overwrite the embedding for ``token_id`` (used by tests)."""
        if len(values) != self.d_model:
            raise ValueError(f"values length {len(values)} != d_model {self.d_model}")
        self.matrix[token_id] = list(values)

    @property
    def shape(self) -> tuple[int, int]:
        """Returns ``(vocab_size, d_model)``."""
        return (self.vocab_size, self.d_model)
Attributes
shape property
shape: tuple[int, int]

Returns (vocab_size, d_model).

Methods:
forward
forward(ids: list[int]) -> list[list[float]]

Look up embeddings for a sequence of token ids.

Parameters:

Name Type Description Default
ids list[int]

List of token ids (must be in [0, vocab_size)).

required

Returns:

Type Description
list[list[float]]

A len(ids) × d_model matrix (nested list).

Source code in src\cds\nlp\embed.py
def forward(self, ids: list[int]) -> list[list[float]]:
    """Look up embeddings for a sequence of token ids.

    Args:
        ids: List of token ids (must be in ``[0, vocab_size)``).

    Returns:
        A ``len(ids) × d_model`` matrix (nested list).
    """
    out: list[list[float]] = []
    for tid in ids:
        if tid < 0 or tid >= self.vocab_size:
            raise IndexError(f"Token id {tid} out of range [0, {self.vocab_size})")
        # Defensive copy — callers might mutate the result without
        # poisoning the table.
        out.append(list(self.matrix[tid]))
    return out
set_value
set_value(token_id: int, values: list[float]) -> None

Overwrite the embedding for token_id (used by tests).

Source code in src\cds\nlp\embed.py
def set_value(self, token_id: int, values: list[float]) -> None:
    """Overwrite the embedding for ``token_id`` (used by tests)."""
    if len(values) != self.d_model:
        raise ValueError(f"values length {len(values)} != d_model {self.d_model}")
    self.matrix[token_id] = list(values)

SGD dataclass

Stochastic gradient descent with optional momentum.

Parameters:

Name Type Description Default
params list[Tensor]

Iterable of :class:Parameter (or any :class:cds.nlp.autograd.Tensor with requires_grad=True) to update.

required
lr float

Learning rate. Must be positive.

SGD_DEFAULT_LR
momentum float

Momentum factor in [0, 1). 0 reduces to vanilla SGD. > 0 updates each parameter with v = momentum * v + grad; p -= lr * v.

0.0
weight_decay float

Optional L2 penalty coefficient. Adds weight_decay * p.data to the gradient at every step.

0.0
Source code in src\cds\nlp\optim.py
@dataclass
class SGD:
    """Stochastic gradient descent with optional momentum.

    Args:
        params: Iterable of :class:`Parameter` (or any
            :class:`cds.nlp.autograd.Tensor` with ``requires_grad=True``)
            to update.
        lr: Learning rate. Must be positive.
        momentum: Momentum factor in ``[0, 1)``. ``0`` reduces to
            vanilla SGD. ``> 0`` updates each parameter with
            ``v = momentum * v + grad; p -= lr * v``.
        weight_decay: Optional L2 penalty coefficient. Adds
            ``weight_decay * p.data`` to the gradient at every step.
    """

    params: list[Tensor]
    lr: float = SGD_DEFAULT_LR
    momentum: float = 0.0
    weight_decay: float = 0.0
    _velocities: list[float] = field(init=False, default_factory=list)

    def __post_init__(self) -> None:
        if self.lr <= 0:
            raise ValueError(f"lr must be > 0, got {self.lr}")
        if not 0.0 <= self.momentum < 1.0:
            raise ValueError(f"momentum must be in [0, 1), got {self.momentum}")
        if self.weight_decay < 0:
            raise ValueError(f"weight_decay must be >= 0, got {self.weight_decay}")
        # Allocate a velocity slot per parameter.
        self._velocities = [0.0] * len(self.params)

    def step(self) -> None:
        """Apply one update to each parameter.

        Must be called *after* ``loss.backward()`` and *before*
        ``zero_grad()`` — otherwise the gradient buffer will be
        overwritten on the next forward pass.
        """
        for i, p in enumerate(self.params):
            grad = p.grad + self.weight_decay * p.data if self.weight_decay else p.grad
            if self.momentum == 0.0:
                p.data -= self.lr * grad
            else:
                self._velocities[i] = self.momentum * self._velocities[i] + grad
                p.data -= self.lr * self._velocities[i]

    def zero_grad(self) -> None:
        """Reset all parameter gradients to 0. Call between batches."""
        for p in self.params:
            p.grad = 0.0
Methods:
step
step() -> None

Apply one update to each parameter.

Must be called after loss.backward() and before zero_grad() — otherwise the gradient buffer will be overwritten on the next forward pass.

Source code in src\cds\nlp\optim.py
def step(self) -> None:
    """Apply one update to each parameter.

    Must be called *after* ``loss.backward()`` and *before*
    ``zero_grad()`` — otherwise the gradient buffer will be
    overwritten on the next forward pass.
    """
    for i, p in enumerate(self.params):
        grad = p.grad + self.weight_decay * p.data if self.weight_decay else p.grad
        if self.momentum == 0.0:
            p.data -= self.lr * grad
        else:
            self._velocities[i] = self.momentum * self._velocities[i] + grad
            p.data -= self.lr * self._velocities[i]
zero_grad
zero_grad() -> None

Reset all parameter gradients to 0. Call between batches.

Source code in src\cds\nlp\optim.py
def zero_grad(self) -> None:
    """Reset all parameter gradients to 0. Call between batches."""
    for p in self.params:
        p.grad = 0.0

Adam dataclass

Adam optimiser (Kingma & Ba 2014).

Maintains per-parameter first and second moment estimates with bias correction. Defaults match the paper (betas=(0.9, 0.999), eps=1e-8).

Parameters:

Name Type Description Default
params list[Tensor]

Trainable parameters.

required
lr float

Learning rate. Typical values for transformer training are in the 3e-4 to 1e-3 range.

ADAM_DEFAULT_LR
betas tuple[float, float]

Coefficients for the first and second moment moving averages.

ADAM_DEFAULT_BETAS
eps float

Epsilon for numerical stability in the denominator.

ADAM_DEFAULT_EPS
weight_decay float

Optional L2 penalty coefficient.

0.0
Source code in src\cds\nlp\optim.py
@dataclass
class Adam:
    """Adam optimiser (Kingma & Ba 2014).

    Maintains per-parameter first and second moment estimates with
    bias correction. Defaults match the paper (``betas=(0.9, 0.999)``,
    ``eps=1e-8``).

    Args:
        params: Trainable parameters.
        lr: Learning rate. Typical values for transformer training
            are in the ``3e-4`` to ``1e-3`` range.
        betas: Coefficients for the first and second moment moving
            averages.
        eps: Epsilon for numerical stability in the denominator.
        weight_decay: Optional L2 penalty coefficient.
    """

    params: list[Tensor]
    lr: float = ADAM_DEFAULT_LR
    betas: tuple[float, float] = ADAM_DEFAULT_BETAS
    eps: float = ADAM_DEFAULT_EPS
    weight_decay: float = 0.0
    _t: int = field(init=False, default=0)
    _m: list[float] = field(init=False, default_factory=list)
    _v: list[float] = field(init=False, default_factory=list)

    def __post_init__(self) -> None:
        if self.lr <= 0:
            raise ValueError(f"lr must be > 0, got {self.lr}")
        if not (0.0 <= self.betas[0] < 1.0 and 0.0 <= self.betas[1] < 1.0):
            raise ValueError(f"betas must each be in [0, 1), got {self.betas}")
        if self.eps <= 0:
            raise ValueError(f"eps must be > 0, got {self.eps}")
        if self.weight_decay < 0:
            raise ValueError(f"weight_decay must be >= 0, got {self.weight_decay}")
        self._m = [0.0] * len(self.params)
        self._v = [0.0] * len(self.params)

    def step(self) -> None:
        """Apply one update. Increment step counter internally."""
        self._t += 1
        b1, b2 = self.betas
        for i, p in enumerate(self.params):
            grad = p.grad + self.weight_decay * p.data if self.weight_decay else p.grad
            self._m[i] = b1 * self._m[i] + (1.0 - b1) * grad
            self._v[i] = b2 * self._v[i] + (1.0 - b2) * (grad * grad)
            # Bias correction — important in the first few hundred
            # steps when the moving averages are still warming up.
            m_hat = self._m[i] / (1.0 - b1**self._t)
            v_hat = self._v[i] / (1.0 - b2**self._t)
            p.data -= self.lr * m_hat / (math.sqrt(v_hat) + self.eps)

    def zero_grad(self) -> None:
        """Reset ``.grad`` to ``0.0`` on every managed parameter."""
        for p in self.params:
            p.grad = 0.0
Methods:
step
step() -> None

Apply one update. Increment step counter internally.

Source code in src\cds\nlp\optim.py
def step(self) -> None:
    """Apply one update. Increment step counter internally."""
    self._t += 1
    b1, b2 = self.betas
    for i, p in enumerate(self.params):
        grad = p.grad + self.weight_decay * p.data if self.weight_decay else p.grad
        self._m[i] = b1 * self._m[i] + (1.0 - b1) * grad
        self._v[i] = b2 * self._v[i] + (1.0 - b2) * (grad * grad)
        # Bias correction — important in the first few hundred
        # steps when the moving averages are still warming up.
        m_hat = self._m[i] / (1.0 - b1**self._t)
        v_hat = self._v[i] / (1.0 - b2**self._t)
        p.data -= self.lr * m_hat / (math.sqrt(v_hat) + self.eps)
zero_grad
zero_grad() -> None

Reset .grad to 0.0 on every managed parameter.

Source code in src\cds\nlp\optim.py
def zero_grad(self) -> None:
    """Reset ``.grad`` to ``0.0`` on every managed parameter."""
    for p in self.params:
        p.grad = 0.0

Functions:

causal_mask

causal_mask(n: int) -> list[list[float]]

Upper-triangular -inf mask for decoder self-attention.

Position i may attend to positions 0..=i and nothing else. The mask is added to the pre-softmax scores, so the -inf entries become zero probability after softmax.

Source code in src\cds\nlp\attention.py
def causal_mask(n: int) -> list[list[float]]:
    """Upper-triangular ``-inf`` mask for decoder self-attention.

    Position ``i`` may attend to positions ``0..=i`` and nothing else.
    The mask is added to the pre-softmax scores, so the ``-inf`` entries
    become zero probability after softmax.
    """
    if n < 0:
        raise ValueError(f"n must be >= 0, got {n}")
    return [[0.0 if j <= i else _NEG_INF for j in range(n)] for i in range(n)]

merge_heads

merge_heads(
    heads: list[list[list[float]]],
) -> list[list[float]]

Inverse of :func:split_heads: (n_heads, n, d_head) → (n, d_model).

Source code in src\cds\nlp\attention.py
def merge_heads(
    heads: list[list[list[float]]],
) -> list[list[float]]:
    """Inverse of :func:`split_heads`: ``(n_heads, n, d_head) → (n, d_model)``."""
    if not heads:
        return []
    n_heads = len(heads)
    n = len(heads[0])
    if n == 0:
        return [[] for _ in range(n)]
    d_head = len(heads[0][0])
    d_model = n_heads * d_head
    out: list[list[float]] = _make_matrix(n, d_model)
    for h in range(n_heads):
        for i in range(n):
            for j in range(d_head):
                out[i][h * d_head + j] = heads[h][i][j]
    return out

multi_head_attention

multi_head_attention(
    x: list[list[float]],
    w_q: list[list[float]],
    w_k: list[list[float]],
    w_v: list[list[float]],
    w_o: list[list[float]],
    n_heads: int,
    mask: list[list[float]] | None = None,
) -> list[list[float]]

Multi-head self-attention (Vaswani 2017 §3.2.2).

Parameters:

Name Type Description Default
x list[list[float]]

Input sequence, shape (n, d_model).

required
w_q list[list[float]]

Projection matrix for queries, shape (d_model, d_model).

required
w_k list[list[float]]

Projection matrix for keys, shape (d_model, d_model).

required
w_v list[list[float]]

Projection matrix for values, shape (d_model, d_model).

required
w_o list[list[float]]

Output projection, shape (d_model, d_model).

required
n_heads int

Number of attention heads. Must divide d_model.

required
mask list[list[float]] | None

Optional additive mask broadcast across all heads, shape (n, n).

None

Returns:

Type Description
list[list[float]]

Output sequence, shape (n, d_model).

Source code in src\cds\nlp\attention.py
def multi_head_attention(
    x: list[list[float]],
    w_q: list[list[float]],
    w_k: list[list[float]],
    w_v: list[list[float]],
    w_o: list[list[float]],
    n_heads: int,
    mask: list[list[float]] | None = None,
) -> list[list[float]]:
    """Multi-head self-attention (Vaswani 2017 §3.2.2).

    Args:
        x: Input sequence, shape ``(n, d_model)``.
        w_q: Projection matrix for queries, shape ``(d_model, d_model)``.
        w_k: Projection matrix for keys, shape ``(d_model, d_model)``.
        w_v: Projection matrix for values, shape ``(d_model, d_model)``.
        w_o: Output projection, shape ``(d_model, d_model)``.
        n_heads: Number of attention heads. Must divide ``d_model``.
        mask: Optional additive mask broadcast across all heads,
            shape ``(n, n)``.

    Returns:
        Output sequence, shape ``(n, d_model)``.
    """
    if not x:
        return []
    d_model = len(x[0])
    if d_model % n_heads != 0:
        raise ValueError(f"d_model {d_model} not divisible by n_heads {n_heads}")

    # 1. Project to Q, K, V.
    q = matmul(x, w_q)
    k = matmul(x, w_k)
    v = matmul(x, w_v)

    # 2. Split into heads, run per-head attention.
    qh = split_heads(q, n_heads)
    kh = split_heads(k, n_heads)
    vh = split_heads(v, n_heads)
    head_outputs = [scaled_dot_product_attention(qh[h], kh[h], vh[h], mask) for h in range(n_heads)]

    # 3. Concatenate heads, then project to the output space.
    merged = merge_heads(head_outputs)
    return matmul(merged, w_o)

scaled_dot_product_attention

scaled_dot_product_attention(
    q: list[list[float]],
    k: list[list[float]],
    v: list[list[float]],
    mask: list[list[float]] | None = None,
) -> list[list[float]]

Compute softmax(Q K^T / sqrt(d_k) + mask) V.

Parameters:

Name Type Description Default
q list[list[float]]

Query matrix of shape (n_q, d_k).

required
k list[list[float]]

Key matrix of shape (n_k, d_k).

required
v list[list[float]]

Value matrix of shape (n_k, d_v). n_k must equal n_q for self-attention; cross-attention uses a different n_k.

required
mask list[list[float]] | None

Optional additive mask of shape (n_q, n_k). Use 0.0 to keep a position and -inf to suppress it. The mask is added to the scaled scores before softmax.

None

Returns:

Type Description
list[list[float]]

A matrix of shape (n_q, d_v).

Source code in src\cds\nlp\attention.py
def scaled_dot_product_attention(
    q: list[list[float]],
    k: list[list[float]],
    v: list[list[float]],
    mask: list[list[float]] | None = None,
) -> list[list[float]]:
    """Compute ``softmax(Q K^T / sqrt(d_k) + mask) V``.

    Args:
        q: Query matrix of shape ``(n_q, d_k)``.
        k: Key matrix of shape ``(n_k, d_k)``.
        v: Value matrix of shape ``(n_k, d_v)``. ``n_k`` must equal
            ``n_q`` for self-attention; cross-attention uses a
            different ``n_k``.
        mask: Optional additive mask of shape ``(n_q, n_k)``. Use
            ``0.0`` to keep a position and ``-inf`` to suppress it.
            The mask is *added* to the scaled scores before softmax.

    Returns:
        A matrix of shape ``(n_q, d_v)``.
    """
    if not q or not k or not v:
        return []
    d_k = len(q[0])
    if d_k == 0:
        raise ValueError("q has zero width (d_k = 0)")
    n_q = len(q)
    n_k = len(k)
    if len(v) != n_k:
        raise ValueError(f"k has {n_k} rows but v has {len(v)}")
    if len(k[0]) != d_k:
        raise ValueError(f"q and k widths differ: {d_k} vs {len(k[0])}")
    if mask is not None and (len(mask) != n_q or len(mask[0]) != n_k):
        raise ValueError(
            f"mask shape {len(mask)}x{len(mask[0]) if mask else 0} "
            f"does not match attention shape {n_q}x{n_k}"
        )

    # scores = Q K^T / sqrt(d_k) — the scaling keeps the dot products
    # in a regime where softmax gradients are well-behaved.
    scale = 1.0 / math.sqrt(d_k)
    k_t = transpose(k)
    scores = matmul(q, k_t)
    for i in range(n_q):
        row = scores[i]
        for j in range(n_k):
            row[j] = row[j] * scale + (mask[i][j] if mask is not None else 0.0)

    # softmax row-wise, then multiply by V.
    attn_weights = [softmax(row) for row in scores]
    return matmul(attn_weights, v)

softmax

softmax(x: list[float]) -> list[float]

Numerically stable softmax for a 1-D list.

Subtracts the max before exp to avoid overflow on large inputs; the resulting distribution is invariant to the shift. Empty input returns []; the result always sums to 1.0 (within float precision).

Source code in src\cds\nlp\attention.py
def softmax(x: list[float]) -> list[float]:
    """Numerically stable softmax for a 1-D list.

    Subtracts the max before ``exp`` to avoid overflow on large
    inputs; the resulting distribution is invariant to the shift.
    Empty input returns ``[]``; the result always sums to 1.0
    (within float precision).
    """
    if not x:
        return []
    m = max(x)
    exps = [math.exp(xi - m) for xi in x]
    total = sum(exps)
    if total == 0.0 or math.isnan(total):
        # Degenerate: all inputs were -inf (``-inf - -inf`` is NaN, so
        # ``exp`` returns 0 / NaN and the sum is 0 or NaN). Return a
        # uniform distribution to keep downstream matmul finite.
        n = len(x)
        return [1.0 / n] * n
    return [e / total for e in exps]

split_heads

split_heads(
    x: list[list[float]], n_heads: int
) -> list[list[list[float]]]

Split the last dim of (n, d_model) into n_heads slices.

Returns a list [n_heads][n][d_head] where d_head = d_model / n_heads. Equivalent to x.view(n, n_heads, d_head).transpose(0, 1) in PyTorch's convention.

Source code in src\cds\nlp\attention.py
def split_heads(
    x: list[list[float]],
    n_heads: int,
) -> list[list[list[float]]]:
    """Split the last dim of ``(n, d_model)`` into ``n_heads`` slices.

    Returns a list ``[n_heads][n][d_head]`` where ``d_head = d_model /
    n_heads``. Equivalent to ``x.view(n, n_heads, d_head).transpose(0, 1)``
    in PyTorch's convention.
    """
    if n_heads <= 0:
        raise ValueError(f"n_heads must be > 0, got {n_heads}")
    if not x:
        return [[] for _ in range(n_heads)]
    n = len(x)
    d_model = len(x[0])
    if d_model % n_heads != 0:
        raise ValueError(f"d_model {d_model} is not divisible by n_heads {n_heads}")
    d_head = d_model // n_heads
    return [
        [[x[i][h * d_head + j] for j in range(d_head)] for i in range(n)] for h in range(n_heads)
    ]

transpose

transpose(m: list[list[float]]) -> list[list[float]]

Transpose a nested-list matrix.

Source code in src\cds\nlp\attention.py
def transpose(m: list[list[float]]) -> list[list[float]]:
    """Transpose a nested-list matrix."""
    if not m:
        return []
    rows = len(m)
    cols = len(m[0])
    return [[m[r][c] for r in range(rows)] for c in range(cols)]

add

add(a: Tensor, b: Tensor) -> Tensor

a + b with reverse-mode grad ∂/∂a = ∂/∂b = out.grad.

Source code in src\cds\nlp\autograd\tensor.py
def add(a: Tensor, b: Tensor) -> Tensor:
    """``a + b`` with reverse-mode grad ``∂/∂a = ∂/∂b = out.grad``."""

    def _backward() -> None:
        a.grad += out.grad
        b.grad += out.grad

    out = Tensor(data=a.data + b.data)
    return _track(out, (a, b), _backward)

div

div(a: Tensor, b: Tensor) -> Tensor

a / b with reverse-mode grad via the quotient rule.

Source code in src\cds\nlp\autograd\tensor.py
def div(a: Tensor, b: Tensor) -> Tensor:
    """``a / b`` with reverse-mode grad via the quotient rule."""

    def _backward() -> None:
        a.grad += out.grad / b.data
        b.grad -= a.data * out.grad / (b.data * b.data)

    out = Tensor(data=a.data / b.data)
    return _track(out, (a, b), _backward)

exp

exp(a: Tensor) -> Tensor

exp(a) with reverse-mode grad ∂/∂a = exp(a) * out.grad.

Source code in src\cds\nlp\autograd\ops.py
def exp(a: Tensor) -> Tensor:
    """``exp(a)`` with reverse-mode grad ``∂/∂a = exp(a) * out.grad``."""

    def _backward() -> None:
        a.grad += math.exp(a.data) * out.grad

    out = Tensor(data=math.exp(a.data))
    return _track(out, (a,), _backward)

log

log(a: Tensor) -> Tensor

Natural log. ∂/∂a = out.grad / a.data.

Raises :class:ValueError for non-positive input — the gradient is undefined there.

Source code in src\cds\nlp\autograd\ops.py
def log(a: Tensor) -> Tensor:
    """Natural log. ``∂/∂a = out.grad / a.data``.

    Raises :class:`ValueError` for non-positive input — the gradient
    is undefined there.
    """

    def _backward() -> None:
        # Unreachable: the forward guard below rejects ``a.data <= 0`` before
        # any backward is queued, so ``a.data`` can never be 0 here. The check
        # is kept as a defensive assertion against future callers that bypass
        # the public ``log`` entrypoint.
        if a.data == 0.0:  # pragma: no cover
            raise ValueError("log(0) gradient is undefined")
        a.grad += out.grad / a.data

    if a.data <= 0:
        raise ValueError(f"log requires positive input, got {a.data}")
    out = Tensor(data=math.log(a.data))
    return _track(out, (a,), _backward)

matmul

matmul(
    a: list[list[Tensor]], b: list[list[Tensor]]
) -> list[list[Tensor]]

Matrix multiply for nested :class:Tensor matrices.

a has shape (m, p); b has shape (p, n). The result is an (m, n) matrix whose entries are :class:Tensor nodes connected to the inputs via the dep graph.

Implementation is the textbook triple loop. Each inner product uses scalar autograd (one multiply + accumulate) so every entry in the result gets a backward fn that propagates to the contributing a and b entries.

For a 50K-param model this is the hot path — the pure-Python implementation stays as-is; the optional cds[fast-jit] Numba backend wraps the inner loop for ~10x speed-up without changing the autograd semantics.

Source code in src\cds\nlp\autograd\ops.py
def matmul(a: list[list[Tensor]], b: list[list[Tensor]]) -> list[list[Tensor]]:
    """Matrix multiply for nested :class:`Tensor` matrices.

    ``a`` has shape ``(m, p)``; ``b`` has shape ``(p, n)``. The result
    is an ``(m, n)`` matrix whose entries are :class:`Tensor` nodes
    connected to the inputs via the dep graph.

    Implementation is the textbook triple loop. Each inner product
    uses scalar autograd (one multiply + accumulate) so every entry
    in the result gets a backward fn that propagates to the
    contributing ``a`` and ``b`` entries.

    For a 50K-param model this is the hot path — the pure-Python
    implementation stays as-is; the optional ``cds[fast-jit]`` Numba
    backend wraps the inner loop for ~10x speed-up without
    changing the autograd semantics.
    """
    if not a or not b or not a[0] or not b[0]:
        return []
    m = len(a)
    p = len(a[0])
    if len(b) != p:
        raise ValueError(f"matmul shape mismatch: a has {p} cols, b has {len(b)} rows")
    n = len(b[0])
    # Allocate result as a (m, n) matrix of zero-constant Tensors so
    # we can mutate them in place. The constant stays out of the
    # autograd graph because ``_track`` skips it (no grad children).
    zero = Tensor(data=0.0, requires_grad=False)
    out: list[list[Tensor]] = [[zero for _ in range(n)] for _ in range(m)]
    for i in range(m):
        for j in range(n):
            acc = Tensor(data=0.0, requires_grad=False)
            for k in range(p):
                # acc += a[i][k] * b[k][j]  (scalar autograd chain)
                prod = _tracked_mul(a[i][k], b[k][j])
                acc = _tracked_add(acc, prod)
            out[i][j] = acc
    return out

mul

mul(a: Tensor, b: Tensor) -> Tensor

a * b with reverse-mode grad via the product rule.

Source code in src\cds\nlp\autograd\tensor.py
def mul(a: Tensor, b: Tensor) -> Tensor:
    """``a * b`` with reverse-mode grad via the product rule."""

    def _backward() -> None:
        a.grad += b.data * out.grad
        b.grad += a.data * out.grad

    out = Tensor(data=a.data * b.data)
    return _track(out, (a, b), _backward)

neg

neg(a: Tensor) -> Tensor

Unary negation.

Source code in src\cds\nlp\autograd\tensor.py
def neg(a: Tensor) -> Tensor:
    """Unary negation."""

    def _backward() -> None:
        a.grad -= out.grad

    out = Tensor(data=-a.data)
    return _track(out, (a,), _backward)

no_grad

no_grad() -> _NoGrad

Return a context manager that disables grad tracking.

Source code in src\cds\nlp\autograd\_grad.py
def no_grad() -> _NoGrad:
    """Return a context manager that disables grad tracking."""
    return _NoGrad()

relu

relu(a: Tensor) -> Tensor

Rectified linear unit. ∂/∂a = out.grad if a > 0 else 0.

Source code in src\cds\nlp\autograd\ops.py
def relu(a: Tensor) -> Tensor:
    """Rectified linear unit. ``∂/∂a = out.grad if a > 0 else 0``."""

    def _backward() -> None:
        if a.data > 0:
            a.grad += out.grad

    out = Tensor(data=max(0.0, a.data))
    return _track(out, (a,), _backward)

sub

sub(a: Tensor, b: Tensor) -> Tensor

a - b with reverse-mode grad ∂/∂a = +out.grad, ∂/∂b = -out.grad.

Source code in src\cds\nlp\autograd\tensor.py
def sub(a: Tensor, b: Tensor) -> Tensor:
    """``a - b`` with reverse-mode grad ``∂/∂a = +out.grad, ∂/∂b = -out.grad``."""

    def _backward() -> None:
        a.grad += out.grad
        b.grad -= out.grad

    out = Tensor(data=a.data - b.data)
    return _track(out, (a, b), _backward)

train_bpe

train_bpe(
    corpus: str,
    vocab_size: int = 1000,
    min_frequency: int = 2,
    show_progress: bool = False,
) -> BPETokenizer

Train a BPE tokenizer on a raw text corpus.

The training procedure follows Sennrich et al. (2016) — start with a base vocabulary of every distinct character in the corpus, then repeatedly merge the most frequent adjacent symbol pair until the vocabulary reaches vocab_size.

Parameters:

Name Type Description Default
corpus str

Raw training text. UTF-8 string; will be normalised to NFC implicitly by Python's str handling.

required
vocab_size int

Target vocabulary size (including the four reserved special tokens). The actual vocabulary may be smaller if the corpus has fewer unique characters — training stops when no pair exceeds min_frequency.

1000
min_frequency int

Stop merging when the most frequent pair has count ≤ this. Prevents pathological merges from rare noise.

2
show_progress bool

If True, print each merge as it happens. Off by default for clean test output.

False

Returns:

Type Description
BPETokenizer

A fully populated :class:BPETokenizer.

Raises:

Type Description
ValueError

If corpus is empty or vocab_size < 5 (must fit the four reserved tokens + at least one real token).

Example

tk = train_bpe("low low low lower lower newest newest newest", vocab_size=20) "low" in tk.vocab and "est" in tk.vocab True

Source code in src\cds\nlp\bpe.py
def train_bpe(
    corpus: str,
    vocab_size: int = 1000,
    min_frequency: int = 2,
    show_progress: bool = False,
) -> BPETokenizer:
    """Train a BPE tokenizer on a raw text corpus.

    The training procedure follows Sennrich et al. (2016) — start with
    a base vocabulary of every distinct character in the corpus, then
    repeatedly merge the most frequent adjacent symbol pair until the
    vocabulary reaches ``vocab_size``.

    Args:
        corpus: Raw training text. UTF-8 string; will be normalised to
            NFC implicitly by Python's str handling.
        vocab_size: Target vocabulary size (including the four reserved
            special tokens). The actual vocabulary may be smaller if
            the corpus has fewer unique characters — training stops when
            no pair exceeds ``min_frequency``.
        min_frequency: Stop merging when the most frequent pair has
            count ≤ this. Prevents pathological merges from rare noise.
        show_progress: If True, print each merge as it happens. Off by
            default for clean test output.

    Returns:
        A fully populated :class:`BPETokenizer`.

    Raises:
        ValueError: If ``corpus`` is empty or ``vocab_size < 5`` (must
            fit the four reserved tokens + at least one real token).

    Example:
        >>> tk = train_bpe("low low low lower lower newest newest newest", vocab_size=20)
        >>> "low" in tk.vocab and "est" in tk.vocab
        True
    """
    if not corpus:
        raise ValueError("Cannot train BPE on an empty corpus")
    if vocab_size < 5:
        raise ValueError(f"vocab_size must be >= 5 (4 specials + 1 real), got {vocab_size}")

    # 1. Base vocabulary: every distinct character in the corpus.
    base_chars = sorted(set(corpus))
    vocab: dict[str, int] = {tok: idx for idx, tok in enumerate(SPECIAL_TOKENS)}
    next_id = len(SPECIAL_TOKENS)
    for ch in base_chars:
        # SPECIAL_TOKENS are all multi-char strings, so a single ``ch`` can
        # never collide with them — the False branch is unreachable. Kept as a
        # defensive guard against a future single-char special token.
        if ch not in vocab:  # pragma: no branch
            vocab[ch] = next_id
            next_id += 1

    # 2. Tokenise corpus into per-word symbol sequences.
    words = _pre_tokenize(corpus)
    if not words:
        # Corpus contains only whitespace / punctuation.
        return BPETokenizer(vocab=vocab, merges=[])

    corpus_symbols: list[tuple[str, ...]] = [_word_to_symbols(w) for w in words]

    # 3. Iteratively merge the most frequent pair.
    merges: list[BPEMerge] = []
    while len(vocab) < vocab_size:
        stats = _get_pair_stats(corpus_symbols)
        if not stats:
            break
        best_pair, best_count = stats.most_common(1)[0]
        if best_count < min_frequency:
            break

        new_token = best_pair[0] + best_pair[1]
        # A merged token can never already be in vocab: stats only counts pairs
        # of currently-unmerged symbols, so a previously-merged token is not a
        # candidate. The False branch is unreachable; kept defensively.
        if new_token not in vocab:  # pragma: no branch
            vocab[new_token] = next_id
            next_id += 1
        merges.append(BPEMerge(pair=best_pair, rank=len(merges), new_token=new_token))
        if show_progress:
            print(f"merge {len(merges):>4}: {best_pair!r} -> {new_token!r} (count={best_count})")

        corpus_symbols = _merge_pair(corpus_symbols, best_pair)

    return BPETokenizer(vocab=vocab, merges=merges)

add_positional

add_positional(
    token_embeddings: list[list[float]],
    positional: PositionalEncoding,
) -> list[list[float]]

Add a positional encoding to a sequence of token embeddings.

Element-wise: out[i][j] = token[i][j] + pos[i][j]. Used in the Transformer encoder block to inject position information.

Parameters:

Name Type Description Default
token_embeddings list[list[float]]

An n × d_model matrix (nested list).

required
positional PositionalEncoding

A :class:PositionalEncoding whose d_model matches the token embedding width.

required

Returns:

Type Description
list[list[float]]

A new n × d_model matrix (input is not mutated).

Source code in src\cds\nlp\embed.py
def add_positional(
    token_embeddings: list[list[float]],
    positional: PositionalEncoding,
) -> list[list[float]]:
    """Add a positional encoding to a sequence of token embeddings.

    Element-wise: ``out[i][j] = token[i][j] + pos[i][j]``. Used in the
    Transformer encoder block to inject position information.

    Args:
        token_embeddings: An ``n × d_model`` matrix (nested list).
        positional: A :class:`PositionalEncoding` whose ``d_model``
            matches the token embedding width.

    Returns:
        A new ``n × d_model`` matrix (input is not mutated).
    """
    if not token_embeddings:
        return []
    n = len(token_embeddings)
    d = len(token_embeddings[0])
    if d != positional.d_model:
        raise ValueError(
            f"d_model mismatch: token embedding {d} != positional {positional.d_model}"
        )
    pos = positional.forward(n)
    return [[token_embeddings[i][j] + pos[i][j] for j in range(d)] for i in range(n)]

feed_forward

feed_forward(
    x: list[list[float]],
    w1: list[list[float]],
    b1: list[float],
    w2: list[list[float]],
    b2: list[float],
) -> list[list[float]]

Two-layer position-wise FFN with GeLU.

FFN(x) = (GeLU(x W1 + b1)) W2 + b2

Parameters:

Name Type Description Default
x list[list[float]]

Input, shape (n, d_model).

required
w1 list[list[float]]

First weight matrix, shape (d_model, d_ff).

required
b1 list[float]

First bias, length d_ff.

required
w2 list[list[float]]

Second weight matrix, shape (d_ff, d_model).

required
b2 list[float]

Second bias, length d_model.

required
Source code in src\cds\nlp\layers.py
def feed_forward(
    x: list[list[float]],
    w1: list[list[float]],
    b1: list[float],
    w2: list[list[float]],
    b2: list[float],
) -> list[list[float]]:
    """Two-layer position-wise FFN with GeLU.

    ``FFN(x) = (GeLU(x W1 + b1)) W2 + b2``

    Args:
        x: Input, shape ``(n, d_model)``.
        w1: First weight matrix, shape ``(d_model, d_ff)``.
        b1: First bias, length ``d_ff``.
        w2: Second weight matrix, shape ``(d_ff, d_model)``.
        b2: Second bias, length ``d_model``.
    """
    if not x:
        return []
    d_model = len(x[0])
    if len(b2) != d_model:
        raise ValueError(f"b2 length {len(b2)} != d_model {d_model}")
    d_ff = len(b1)
    if len(w1) != d_model or len(w1[0]) != d_ff:
        raise ValueError(
            f"w1 shape {len(w1)}x{len(w1[0]) if w1 else 0} != expected ({d_model}, {d_ff})"
        )

    h = matmul(x, w1)
    for i, row in enumerate(h):
        for j in range(d_ff):
            row[j] = gelu(row[j] + b1[j])
    out = matmul(h, w2)
    for i, row in enumerate(out):
        for j in range(d_model):
            row[j] = row[j] + b2[j]
    return out

gelu

gelu(x: float) -> float

Exact Gaussian Error Linear Unit activation.

GELU(x) = x * Phi(x) where Phi is the standard normal CDF. Computed via 0.5 * x * (1 + erf(x / sqrt(2))) for numerical accuracy. The Tanh approximation used in some papers (0.5 x (1 + tanh(...))) is faster but introduces a small bias that doesn't matter for educational use — the exact form costs nothing here.

Source code in src\cds\nlp\layers.py
def gelu(x: float) -> float:
    """Exact Gaussian Error Linear Unit activation.

    ``GELU(x) = x * Phi(x)`` where ``Phi`` is the standard normal CDF.
    Computed via ``0.5 * x * (1 + erf(x / sqrt(2)))`` for numerical
    accuracy. The Tanh approximation used in some papers
    (``0.5 x (1 + tanh(...))``) is faster but introduces a small bias
    that doesn't matter for educational use — the exact form costs
    nothing here.
    """
    return 0.5 * x * (1.0 + math.erf(x / math.sqrt(2.0)))

layer_norm

layer_norm(
    x: list[list[float]],
    gamma: list[float],
    beta: list[float],
    eps: float = LAYERNORM_EPS,
) -> list[list[float]]

Layer normalisation over the last dimension.

For each row of x: mean = E[x] var = E[(x - mean)^2] y = gamma * (x - mean) / sqrt(var + eps) + beta

Parameters:

Name Type Description Default
x list[list[float]]

Input, shape (n, d).

required
gamma list[float]

Per-feature scale, length d.

required
beta list[float]

Per-feature shift, length d.

required
eps float

Variance floor for numerical stability.

LAYERNORM_EPS
Source code in src\cds\nlp\layers.py
def layer_norm(
    x: list[list[float]],
    gamma: list[float],
    beta: list[float],
    eps: float = LAYERNORM_EPS,
) -> list[list[float]]:
    """Layer normalisation over the last dimension.

    For each row of ``x``:
        mean = E[x]
        var  = E[(x - mean)^2]
        y    = gamma * (x - mean) / sqrt(var + eps) + beta

    Args:
        x: Input, shape ``(n, d)``.
        gamma: Per-feature scale, length ``d``.
        beta: Per-feature shift, length ``d``.
        eps: Variance floor for numerical stability.
    """
    if not x:
        return []
    d = len(x[0])
    if len(gamma) != d or len(beta) != d:
        raise ValueError(f"gamma/beta length {len(gamma)}/{len(beta)} != feature dim {d}")
    out: list[list[float]] = []
    for row in x:
        inv_d = 1.0 / d
        mean = sum(row) * inv_d
        var = sum((xi - mean) ** 2 for xi in row) * inv_d
        std = math.sqrt(var + eps)
        out.append([gamma[j] * (row[j] - mean) / std + beta[j] for j in range(d)])
    return out

transformer_block

transformer_block(
    x: list[list[float]],
    attn_weights: AttentionWeights,
    ffn_weights: FeedForwardWeights,
    n_heads: int,
    mask: list[list[float]] | None = None,
    prenorm: bool = True,
) -> list[list[float]]

One Transformer encoder block.

Parameters:

Name Type Description Default
x list[list[float]]

Input sequence, shape (n, d_model).

required
attn_weights AttentionWeights

:class:AttentionWeights dict.

required
ffn_weights FeedForwardWeights

:class:FeedForwardWeights dict.

required
n_heads int

Number of attention heads.

required
mask list[list[float]] | None

Optional additive attention mask.

None
prenorm bool

If True (default), apply LayerNorm before attention and FFN (Vaswani 2017 default in modern code; the paper used post-norm, but prenorm trains more stably).

True

Returns:

Type Description
list[list[float]]

Output sequence, shape (n, d_model).

Source code in src\cds\nlp\layers.py
def transformer_block(
    x: list[list[float]],
    attn_weights: AttentionWeights,
    ffn_weights: FeedForwardWeights,
    n_heads: int,
    mask: list[list[float]] | None = None,
    prenorm: bool = True,
) -> list[list[float]]:
    """One Transformer encoder block.

    Args:
        x: Input sequence, shape ``(n, d_model)``.
        attn_weights: :class:`AttentionWeights` dict.
        ffn_weights: :class:`FeedForwardWeights` dict.
        n_heads: Number of attention heads.
        mask: Optional additive attention mask.
        prenorm: If True (default), apply LayerNorm *before* attention
            and FFN (Vaswani 2017 default in modern code; the paper
            used post-norm, but prenorm trains more stably).

    Returns:
        Output sequence, shape ``(n, d_model)``.
    """
    if not x:
        return []
    # TypedDict gives us a precise type for each key — no narrowing
    # or cast needed for the LayerNorm parameters.
    ln1_gamma = attn_weights["ln1_gamma"]
    ln1_beta = attn_weights["ln1_beta"]
    ln2_gamma = attn_weights["ln2_gamma"]
    ln2_beta = attn_weights["ln2_beta"]

    if prenorm:
        normed = layer_norm(x, ln1_gamma, ln1_beta)
        attn_out = multi_head_attention(
            normed,
            attn_weights["w_q"],
            attn_weights["w_k"],
            attn_weights["w_v"],
            attn_weights["w_o"],
            n_heads,
            mask,
        )
        x = _add(x, attn_out)

        normed2 = layer_norm(x, ln2_gamma, ln2_beta)
        ffn_out = feed_forward(
            normed2,
            ffn_weights["w1"],
            ffn_weights["b1"],
            ffn_weights["w2"],
            ffn_weights["b2"],
        )
        x = _add(x, ffn_out)
        return x

    # Post-norm: original paper convention. Less stable for deep stacks
    # but kept for completeness in the educational track.
    attn_out = multi_head_attention(
        x,
        attn_weights["w_q"],
        attn_weights["w_k"],
        attn_weights["w_v"],
        attn_weights["w_o"],
        n_heads,
        mask,
    )
    x = layer_norm(_add(x, attn_out), ln1_gamma, ln1_beta)
    ffn_out = feed_forward(
        x,
        ffn_weights["w1"],
        ffn_weights["b1"],
        ffn_weights["w2"],
        ffn_weights["b2"],
    )
    x = layer_norm(_add(x, ffn_out), ln2_gamma, ln2_beta)
    return x

parameters

parameters(items: Iterable[Tensor]) -> list[Tensor]

Collect trainable tensors from a model.

Convenience helper — many models store their weights in dictionaries or lists; this filters to requires_grad=True in one call.

Source code in src\cds\nlp\optim.py
def parameters(items: Iterable[Tensor]) -> list[Tensor]:
    """Collect trainable tensors from a model.

    Convenience helper — many models store their weights in
    dictionaries or lists; this filters to ``requires_grad=True``
    in one call.
    """
    return [t for t in items if t.requires_grad]

cross_entropy

cross_entropy(
    logits: list[float] | list[Tensor], target: int
) -> Tensor

Softmax + negative log-likelihood for one example.

Computes -log(softmax(logits)[target]) in numerically stable form (subtract the max logit before exponentiating). The result is a :class:cds.nlp.autograd.Tensor so the optimiser can backpropagate through it.

Parameters:

Name Type Description Default
logits list[float] | list[Tensor]

Output of the model's final linear layer (unnormalised log-probabilities), length V for vocab size V. May be a list of Python floats (no autograd — useful for sanity checks) or :class:cds.nlp.autograd.Tensor values (loss is connected to the autograd graph).

required
target int

Index of the correct next token in [0, V).

required

Returns:

Name Type Description
Scalar Tensor

class:Tensor — the cross-entropy loss for this

Tensor

example. backward() on it populates gradients on every

Tensor

model parameter that contributed (only if the logits were

Tensor

Tensors).

Source code in src\cds\nlp\training.py
def cross_entropy(
    logits: list[float] | list[Tensor],
    target: int,
) -> Tensor:
    """Softmax + negative log-likelihood for one example.

    Computes ``-log(softmax(logits)[target])`` in numerically stable
    form (subtract the max logit before exponentiating). The result
    is a :class:`cds.nlp.autograd.Tensor` so the optimiser can
    backpropagate through it.

    Args:
        logits: Output of the model's final linear layer (unnormalised
            log-probabilities), length ``V`` for vocab size ``V``. May
            be a list of Python floats (no autograd — useful for
            sanity checks) or :class:`cds.nlp.autograd.Tensor` values
            (loss is connected to the autograd graph).
        target: Index of the correct next token in ``[0, V)``.

    Returns:
        Scalar :class:`Tensor` — the cross-entropy loss for this
        example. ``backward()`` on it populates gradients on every
        model parameter that contributed (only if the logits were
        Tensors).
    """
    # Local import keeps the autograd module from being pulled into
    # the public ``cds.nlp`` namespace through this file.
    from cds.nlp.autograd import Tensor, exp, log

    if not logits:
        raise ValueError("cross_entropy: logits is empty")
    if not 0 <= target < len(logits):
        raise ValueError(f"cross_entropy: target {target} out of range [0, {len(logits)})")
    # Normalise to float values for the numerics — Tensor inputs
    # are unwrapped via ``.data``; float inputs pass through.
    raw = [li.data if isinstance(li, Tensor) else float(li) for li in logits]
    m = max(raw)
    m_const = Tensor(data=m, requires_grad=False)
    acc = Tensor(data=0.0, requires_grad=False)
    for li, v in zip(logits, raw):
        # Promote float logits to no-grad constants so the loss
        # itself isn't part of the graph when the user passes raw
        # floats (e.g. in tests). For Tensor logits, ``li - m_const``
        # uses the operator overload and keeps the graph.
        if isinstance(li, Tensor):
            shifted = li - m_const
        else:
            shifted = Tensor(data=v - m, requires_grad=False)
        acc = acc + exp(shifted)
    lse = m_const + log(acc)
    # Loss = LSE - logit[target] — keep the graph connection only
    # when the user passed Tensor logits.
    if isinstance(logits[target], Tensor):
        return lse - logits[target]
    return lse - Tensor(data=raw[target], requires_grad=False)

train_step

train_step(
    model_fn: Callable[
        [list[int]], list[float] | list[Tensor]
    ],
    x: list[int],
    y: int,
    optimiser: SGD | Adam,
) -> float

Run one training step on a single example.

Performs
  1. logits = model_fn(x) (the user-supplied forward pass)
  2. loss = cross_entropy(logits, y)
  3. optimiser.zero_grad(); loss.backward(); optimiser.step()

The model's parameters must be exposed somewhere the optimiser can see them — typically by collecting them into a list at construction time and passing that list to the optimiser.

Parameters:

Name Type Description Default
model_fn Callable[[list[int]], list[float] | list[Tensor]]

Pure function x -> logits for one example. The autograd graph is built inside this function (when the return type is list[Tensor]); the function should return the model's pre-softmax output for the next-token prediction.

required
x list[int]

Input token ids (length T for a T-token context).

required
y int

Target next-token id.

required
optimiser SGD | Adam

:class:cds.nlp.optim.SGD or :class:Adam whose params list contains every :class:Parameter reachable from model_fn(x).

required

Returns:

Type Description
float

The loss as a plain Python float (snapshot of loss.data).

Source code in src\cds\nlp\training.py
def train_step(
    model_fn: Callable[[list[int]], list[float] | list[Tensor]],
    x: list[int],
    y: int,
    optimiser: SGD | Adam,
) -> float:
    """Run one training step on a single example.

    Performs:
        1. ``logits = model_fn(x)`` (the user-supplied forward pass)
        2. ``loss = cross_entropy(logits, y)``
        3. ``optimiser.zero_grad(); loss.backward(); optimiser.step()``

    The model's parameters must be exposed somewhere the optimiser
    can see them — typically by collecting them into a list at
    construction time and passing that list to the optimiser.

    Args:
        model_fn: Pure function ``x -> logits`` for one example. The
            autograd graph is built inside this function (when the
            return type is ``list[Tensor]``); the function should
            return the model's pre-softmax output for the next-token
            prediction.
        x: Input token ids (length ``T`` for a ``T``-token context).
        y: Target next-token id.
        optimiser: :class:`cds.nlp.optim.SGD` or :class:`Adam` whose
            ``params`` list contains every :class:`Parameter` reachable
            from ``model_fn(x)``.

    Returns:
        The loss as a plain Python float (snapshot of ``loss.data``).
    """
    optimiser.zero_grad()
    logits = model_fn(x)
    loss = cross_entropy(logits, y)
    if not loss.requires_grad:
        raise RuntimeError(
            "train_step: model_fn returned plain floats — autograd "
            "needs the forward pass to produce Tensor logits so the "
            "loss can chain back to model parameters."
        )
    loss.backward()
    optimiser.step()
    return float(loss.data)

render_attention_heatmap

render_attention_heatmap(
    attn_weights: Sequence[Sequence[float]],
    row_tokens: Sequence[str],
    col_tokens: Sequence[str],
) -> str

Render an attention matrix as an ASCII heatmap.

Parameters:

Name Type Description Default
attn_weights Sequence[Sequence[float]]

[rows][cols] matrix of attention weights. Values are min-max normalised per render, so any real range works; rows are expected to sum to ~1 (softmaxed) but this is not enforced.

required
row_tokens Sequence[str]

one label per row (e.g. query tokens).

required
col_tokens Sequence[str]

one label per column (e.g. key tokens).

required

Returns:

Type Description
str

A multi-line str: a header row of column tokens, then one line per

str

row token followed by its shaded cells.

Raises:

Type Description
ValueError

if the matrix / label shapes do not line up.

Source code in src\cds\nlp\viz.py
def render_attention_heatmap(
    attn_weights: Sequence[Sequence[float]],
    row_tokens: Sequence[str],
    col_tokens: Sequence[str],
) -> str:
    """Render an attention matrix as an ASCII heatmap.

    Args:
        attn_weights: ``[rows][cols]`` matrix of attention weights. Values are
            min-max normalised per render, so any real range works; rows are
            expected to sum to ~1 (softmaxed) but this is not enforced.
        row_tokens: one label per row (e.g. query tokens).
        col_tokens: one label per column (e.g. key tokens).

    Returns:
        A multi-line ``str``: a header row of column tokens, then one line per
        row token followed by its shaded cells.

    Raises:
        ValueError: if the matrix / label shapes do not line up.
    """
    if not attn_weights or not attn_weights[0]:
        raise ValueError("attn_weights must be a non-empty [rows][cols] matrix")
    rows = len(attn_weights)
    cols = len(attn_weights[0])
    if len(row_tokens) != rows:
        raise ValueError(f"row_tokens length {len(row_tokens)} != rows {rows}")
    if len(col_tokens) != cols:
        raise ValueError(f"col_tokens length {len(col_tokens)} != cols {cols}")

    flat = [w for r in attn_weights for w in r]
    lo, hi = min(flat), max(flat)
    span = hi - lo

    label_w = max(len(t) for t in row_tokens)
    header_w = max(len(t) for t in col_tokens)
    header = " " * label_w + " | " + " ".join(f"{t:>{header_w}}" for t in col_tokens)
    sep = "-" * label_w + "-+-" + "-" * (cols * (header_w + 1) - 1)

    lines = [header, sep]
    for label, weights in zip(row_tokens, attn_weights):
        cells = " ".join(_shade(w, lo, span).center(header_w) for w in weights)
        lines.append(f"{label:>{label_w}} | {cells}")
    return "\n".join(lines) + "\n"

render_embedding_projection

render_embedding_projection(
    embeddings: Sequence[Sequence[float]],
    labels: Sequence[str] | None = None,
    top_n: int = 10,
    width: int = 50,
    height: int = 12,
) -> str

Render a 2-D PCA scatter of embedding vectors as ASCII.

Parameters:

Name Type Description Default
embeddings Sequence[Sequence[float]]

[n_vectors][dim] matrix.

required
labels Sequence[str] | None

optional per-vector label. If None, the row index is used.

None
top_n int

render at most this many points (highest-variance first along PC1) so large vocabularies stay readable. <= 0 renders all.

10
width int

canvas width in characters.

50
height int

canvas height in characters.

12

Returns:

Type Description
str

A multi-line str with x/y axis labels and one character per point.

Source code in src\cds\nlp\viz.py
def render_embedding_projection(
    embeddings: Sequence[Sequence[float]],
    labels: Sequence[str] | None = None,
    top_n: int = 10,
    width: int = 50,
    height: int = 12,
) -> str:
    """Render a 2-D PCA scatter of embedding vectors as ASCII.

    Args:
        embeddings: ``[n_vectors][dim]`` matrix.
        labels: optional per-vector label. If ``None``, the row index is used.
        top_n: render at most this many points (highest-variance first along PC1)
            so large vocabularies stay readable. ``<= 0`` renders all.
        width: canvas width in characters.
        height: canvas height in characters.

    Returns:
        A multi-line ``str`` with x/y axis labels and one character per point.
    """
    if not embeddings or not embeddings[0]:
        raise ValueError("embeddings must be a non-empty [n][d] matrix")
    if top_n <= 0:
        top_n = len(embeddings)
    if labels is not None and len(labels) != len(embeddings):
        raise ValueError("labels length must match number of embeddings")

    pts = _pca_2d(embeddings)
    # Keep the ``top_n`` points with the largest |PC1| so the spread is visible.
    # Tag each projected point with its original row index so a legend can map
    # back to ``labels`` after the descending-|PC1| sort reorders them.
    indexed = sorted(
        ((p, k) for k, p in enumerate(pts)),
        key=lambda item: abs(item[0][0]),
        reverse=True,
    )[:top_n]
    if labels is not None:
        labels = list(labels)
    else:
        labels = [str(i) for i in range(len(embeddings))]

    xs = [p[0] for p, _ in indexed]
    ys = [p[1] for p, _ in indexed]
    xlo, xhi = min(xs), max(xs)
    ylo, yhi = min(ys), max(ys)
    xspan = xhi - xlo if xhi > xlo else 1.0
    yspan = yhi - ylo if yhi > ylo else 1.0

    grid: list[list[str]] = [[" "] * width for _ in range(height)]
    marks = "o*+x#@%&123456789abcdefghijklmnopqrstuvwxyz"
    legend: list[str] = []
    for k, ((x, y), orig_idx) in enumerate(indexed):
        col = int((x - xlo) / xspan * (width - 1))
        # Invert y so larger PC2 is at the top.
        row = int((yhi - y) / yspan * (height - 1))
        col = max(0, min(width - 1, col))
        row = max(0, min(height - 1, row))
        mark = marks[k % len(marks)]
        grid[row][col] = mark
        legend.append(f"{mark}={labels[orig_idx]}")

    lines: list[str] = []
    lines.append(f"PC2 {yhi:.3g} |" + "".join(grid[0]))
    for r in range(1, height):
        lines.append(" " * (3 + len(f"{yhi:.3g}")) + "|" + "".join(grid[r]))
    lines.append(" " * (3 + len(f"{yhi:.3g}")) + "+" + "-" * width)
    # Right-align the x-axis hi label. pad = width - (label widths already on
    # the line); clamp at 0 so a tiny width never produces a negative format
    # width ("Sign not allowed in string format specifier").
    left = f"{'PC1':>{3 + len(f'{yhi:.3g}')}} {xlo:.3g}"
    pad = max(0, width - len(left) - len(f"{xhi:.3g}"))
    lines.append(f"{left}{'':>{pad}}{xhi:.3g}")
    # Mark→label legend so caller-supplied ``labels`` are actually surfaced.
    # ``legend`` gets one entry per plotted point, and ``indexed`` is always
    # non-empty (top_n falls back to len(embeddings) ≥ 1), so the False branch
    # is unreachable — kept defensively against a future top_n==0 path.
    if legend:  # pragma: no branch
        lines.append("  legend: " + "  ".join(legend))
    return "\n".join(lines) + "\n"

render_training_curve

render_training_curve(
    losses: Sequence[float],
    width: int = 50,
    height: int = 10,
) -> str

Render an ASCII loss curve.

Parameters:

Name Type Description Default
losses Sequence[float]

per-step training losses (monotonic-decreasing looks best, but any sequence is accepted; a single point renders as one cell).

required
width int

plot width in characters (>= 1).

50
height int

plot height in characters (>= 1).

10

Returns:

Type Description
str

A multi-line str with y-axis label, the curve, and an x-axis

str

showing the step range. Always ends with a trailing newline so it

str

composes cleanly under print().

Source code in src\cds\nlp\viz.py
def render_training_curve(
    losses: Sequence[float],
    width: int = 50,
    height: int = 10,
) -> str:
    """Render an ASCII loss curve.

    Args:
        losses: per-step training losses (monotonic-decreasing looks best,
            but any sequence is accepted; a single point renders as one cell).
        width:  plot width in characters (>= 1).
        height: plot height in characters (>= 1).

    Returns:
        A multi-line ``str`` with y-axis label, the curve, and an x-axis
        showing the step range. Always ends with a trailing newline so it
        composes cleanly under ``print()``.
    """
    if width < 1 or height < 1:
        raise ValueError("width and height must be >= 1")
    if not losses:
        raise ValueError("losses must contain at least one value")

    lo = min(losses)
    hi = max(losses)
    span = hi - lo if hi > lo else 1.0  # avoid divide-by-zero for flat curves
    n = len(losses)

    # Sample ``width`` columns from the loss series. Each column maps to the
    # loss at that fractional position, then to a plot row.
    grid: list[list[str]] = [[" "] * width for _ in range(height)]
    for col in range(width):
        idx = int(col * (n - 1) / max(1, width - 1)) if n > 1 else 0
        loss = losses[idx]
        # Invert: high loss -> row 0 (top), low loss -> bottom row.
        row = int((hi - loss) / span * (height - 1))
        row = max(0, min(height - 1, row))
        grid[row][col] = "*"

    lines: list[str] = []
    lines.append(f"{hi:.4g} |" + "".join(grid[0]))
    for r in range(1, height):
        lines.append(" " * (len(f"{hi:.4g}")) + " |" + "".join(grid[r]))
    lines.append(" " * (len(f"{hi:.4g}")) + " +" + "-" * width)
    # Right-align the last step index against ``width``. Use max(1, ...) so the
    # format width is never negative when width < ~10 chars (the label "step 0"
    # already accounts for the left side; padding only fills what remains).
    last_step = n - 1 if n > 1 else 0
    pad = max(0, width - len(f"step 0{last_step}"))
    lines.append(f"step 0{'':>{pad}}{last_step}")
    return "\n".join(lines) + "\n"