API Reference
Auto-generated reference for every public CDS module. Each entry below is rendered from the module's own docstrings by mkdocstrings.
Core Data Models
The shared Domain, Hypothesis, and HypothesisStatus types used throughout CDS — the foundation the hypothesis engine builds on.
cds.core
Core data models for CDS.
Classes
Domain
Bases: str, Enum
Broad scientific domains supported by CDS.
Source code in src\cds\core\models.py
| class Domain(str, Enum):
"""Broad scientific domains supported by CDS."""
PHYSICS = "physics"
COSMOLOGY = "cosmology"
MATHEMATICS = "mathematics"
BIOLOGY = "biology"
CHEMISTRY = "chemistry"
GENERAL_SCIENCE = "general_science"
|
Hypothesis
Bases: BaseModel
A scientific hypothesis with metadata and traceability.
Source code in src\cds\core\models.py
| class Hypothesis(BaseModel):
"""A scientific hypothesis with metadata and traceability."""
id: str = Field(..., description="Unique identifier")
statement: str = Field(..., description="The core hypothesis statement")
domain: Domain
research_question: str
rationale: str | None = None
assumptions: list[str] = Field(default_factory=list)
predictions: list[str] = Field(default_factory=list)
status: HypothesisStatus = HypothesisStatus.NEW
confidence: float = Field(0.5, ge=0.0, le=1.0)
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
tags: list[str] = Field(default_factory=list)
sources: list[str] = Field(default_factory=list, description="References or retrieval sources")
metadata: dict[str, str] = Field(default_factory=dict)
def to_markdown(self) -> str:
"""Render this hypothesis as a structured Markdown document."""
lines = [
f"# Hypothesis: {self.id}",
"",
f"**Statement**: {self.statement}",
"",
f"**Domain**: {self.domain.value}",
f"**Research Question**: {self.research_question}",
f"**Status**: {self.status.value} | **Confidence**: {self.confidence:.2f}",
"",
]
if self.rationale:
lines += ["## Rationale", self.rationale, ""]
if self.assumptions:
lines += ["## Assumptions"] + [f"- {a}" for a in self.assumptions] + [""]
if self.predictions:
preds = [f"- {p}" for p in self.predictions]
lines += ["## Predictions / Testable Consequences"] + preds + [""]
if self.tags:
lines += [f"**Tags**: {', '.join(self.tags)}"]
return "\n".join(lines)
|
Methods:
to_markdown
Render this hypothesis as a structured Markdown document.
Source code in src\cds\core\models.py
| def to_markdown(self) -> str:
"""Render this hypothesis as a structured Markdown document."""
lines = [
f"# Hypothesis: {self.id}",
"",
f"**Statement**: {self.statement}",
"",
f"**Domain**: {self.domain.value}",
f"**Research Question**: {self.research_question}",
f"**Status**: {self.status.value} | **Confidence**: {self.confidence:.2f}",
"",
]
if self.rationale:
lines += ["## Rationale", self.rationale, ""]
if self.assumptions:
lines += ["## Assumptions"] + [f"- {a}" for a in self.assumptions] + [""]
if self.predictions:
preds = [f"- {p}" for p in self.predictions]
lines += ["## Predictions / Testable Consequences"] + preds + [""]
if self.tags:
lines += [f"**Tags**: {', '.join(self.tags)}"]
return "\n".join(lines)
|
HypothesisStatus
Bases: str, Enum
Lifecycle states for a Hypothesis.
Source code in src\cds\core\models.py
| class HypothesisStatus(str, Enum):
"""Lifecycle states for a Hypothesis."""
NEW = "new"
REFINED = "refined"
CRITIQUED = "critiqued"
TESTABLE = "testable"
VALIDATED = "validated"
REJECTED = "rejected"
ARCHIVED = "archived"
|
Hypothesis Generation
The cognitive-discovery centrepiece: structured hypothesis generation from a research question, plus a statistical evaluator.
cds.hypothesis
Hypothesis generation and evaluation module for Cognitive Discovery.
Provides tools to generate structured scientific hypotheses from
research questions. Includes prompt templates and an offline
generator for immediate use, plus a clear Protocol for supplying
custom generator implementations for specialized research needs.
The focus is on making hypotheses falsifiable, with explicit
assumptions, predictions, and confidence estimates.
Example
from cds.hypothesis import generate_hypotheses
hypos = generate_hypotheses(
"Why do we observe the Hubble tension?",
domain="cosmology",
n=3
)
Classes
Domain
Bases: str, Enum
Broad scientific domains supported by CDS.
Source code in src\cds\core\models.py
| class Domain(str, Enum):
"""Broad scientific domains supported by CDS."""
PHYSICS = "physics"
COSMOLOGY = "cosmology"
MATHEMATICS = "mathematics"
BIOLOGY = "biology"
CHEMISTRY = "chemistry"
GENERAL_SCIENCE = "general_science"
|
Hypothesis
Bases: BaseModel
A scientific hypothesis with metadata and traceability.
Source code in src\cds\core\models.py
| class Hypothesis(BaseModel):
"""A scientific hypothesis with metadata and traceability."""
id: str = Field(..., description="Unique identifier")
statement: str = Field(..., description="The core hypothesis statement")
domain: Domain
research_question: str
rationale: str | None = None
assumptions: list[str] = Field(default_factory=list)
predictions: list[str] = Field(default_factory=list)
status: HypothesisStatus = HypothesisStatus.NEW
confidence: float = Field(0.5, ge=0.0, le=1.0)
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
tags: list[str] = Field(default_factory=list)
sources: list[str] = Field(default_factory=list, description="References or retrieval sources")
metadata: dict[str, str] = Field(default_factory=dict)
def to_markdown(self) -> str:
"""Render this hypothesis as a structured Markdown document."""
lines = [
f"# Hypothesis: {self.id}",
"",
f"**Statement**: {self.statement}",
"",
f"**Domain**: {self.domain.value}",
f"**Research Question**: {self.research_question}",
f"**Status**: {self.status.value} | **Confidence**: {self.confidence:.2f}",
"",
]
if self.rationale:
lines += ["## Rationale", self.rationale, ""]
if self.assumptions:
lines += ["## Assumptions"] + [f"- {a}" for a in self.assumptions] + [""]
if self.predictions:
preds = [f"- {p}" for p in self.predictions]
lines += ["## Predictions / Testable Consequences"] + preds + [""]
if self.tags:
lines += [f"**Tags**: {', '.join(self.tags)}"]
return "\n".join(lines)
|
Methods:
to_markdown
Render this hypothesis as a structured Markdown document.
Source code in src\cds\core\models.py
| def to_markdown(self) -> str:
"""Render this hypothesis as a structured Markdown document."""
lines = [
f"# Hypothesis: {self.id}",
"",
f"**Statement**: {self.statement}",
"",
f"**Domain**: {self.domain.value}",
f"**Research Question**: {self.research_question}",
f"**Status**: {self.status.value} | **Confidence**: {self.confidence:.2f}",
"",
]
if self.rationale:
lines += ["## Rationale", self.rationale, ""]
if self.assumptions:
lines += ["## Assumptions"] + [f"- {a}" for a in self.assumptions] + [""]
if self.predictions:
preds = [f"- {p}" for p in self.predictions]
lines += ["## Predictions / Testable Consequences"] + preds + [""]
if self.tags:
lines += [f"**Tags**: {', '.join(self.tags)}"]
return "\n".join(lines)
|
HypothesisStatus
Bases: str, Enum
Lifecycle states for a Hypothesis.
Source code in src\cds\core\models.py
| class HypothesisStatus(str, Enum):
"""Lifecycle states for a Hypothesis."""
NEW = "new"
REFINED = "refined"
CRITIQUED = "critiqued"
TESTABLE = "testable"
VALIDATED = "validated"
REJECTED = "rejected"
ARCHIVED = "archived"
|
ChiSquareGofPayload
Bases: TypedDict
Nested payload under the chi_square_gof dispatch key.
expected is optional at the call site: the evaluator falls back to a
uniform distribution over the categories when it is missing. total=False
makes both fields optional so callers can supply only observed; the
in-guards in :meth:HypothesisEvaluator.evaluate handle presence.
Source code in src\cds\hypothesis\evaluator.py
| class ChiSquareGofPayload(TypedDict, total=False):
"""Nested payload under the ``chi_square_gof`` dispatch key.
``expected`` is optional at the call site: the evaluator falls back to a
uniform distribution over the categories when it is missing. ``total=False``
makes both fields optional so callers can supply only ``observed``; the
``in``-guards in :meth:`HypothesisEvaluator.evaluate` handle presence.
"""
observed: list[float]
expected: list[float]
|
EvaluationData
Bases: TypedDict
Tagged-union payload selecting which statistical test evaluate runs.
Exactly one of the dispatch keys below should be set; evaluate checks
them in documented order and raises ValueError if none match. total=False
mirrors the established AdamState convention (optimization.minimize):
every field is optional and presence is the dispatch signal, checked via
if "<key>" in data: in the method body (mypy narrows those accesses).
groups -> t-test (2) or ANOVA (3+); optional labels
one_sample + popmean-> one-sample t-test vs a reference mean
chi_square_gof -> {"observed": [...], "expected": [...]}
chi_square_independence -> 2D contingency table
paired -> tuple of two paired samples
Source code in src\cds\hypothesis\evaluator.py
| class EvaluationData(TypedDict, total=False):
"""Tagged-union payload selecting which statistical test ``evaluate`` runs.
Exactly one of the dispatch keys below should be set; ``evaluate`` checks
them in documented order and raises ``ValueError`` if none match. ``total=False``
mirrors the established ``AdamState`` convention (``optimization.minimize``):
every field is optional and presence is the dispatch signal, checked via
``if "<key>" in data:`` in the method body (mypy narrows those accesses).
- ``groups`` -> t-test (2) or ANOVA (3+); optional ``labels``
- ``one_sample`` + ``popmean``-> one-sample t-test vs a reference mean
- ``chi_square_gof`` -> ``{"observed": [...], "expected": [...]}``
- ``chi_square_independence`` -> 2D contingency table
- ``paired`` -> tuple of two paired samples
"""
groups: list[list[float]]
labels: list[str]
one_sample: list[float]
popmean: float
chi_square_gof: ChiSquareGofPayload
chi_square_independence: list[list[float]]
paired: tuple[list[float], list[float]]
|
EvaluationResult
dataclass
Detailed result of a hypothesis evaluation.
Source code in src\cds\hypothesis\evaluator.py
| @dataclass
class EvaluationResult:
"""Detailed result of a hypothesis evaluation."""
hypothesis_id: str
test_name: str
statistic: float
p_value: float
is_significant: bool
conclusion: str
|
HypothesisEvaluator
Autonomous evaluator that matches hypotheses with statistical tests.
Source code in src\cds\hypothesis\evaluator.py
| class HypothesisEvaluator:
"""Autonomous evaluator that matches hypotheses with statistical tests."""
def __init__(self, alpha: float = 0.05):
self.alpha = alpha
def _build_result(
self,
hypothesis: Hypothesis,
test_name: str,
statistic: float,
p_value: float,
) -> EvaluationResult:
"""Format the outcome and update the hypothesis status."""
is_sig = p_value < self.alpha
if is_sig:
conclusion = (
f"Hypothesis supported at alpha={self.alpha}. "
f"Significant result found ({test_name})."
)
hypothesis.status = HypothesisStatus.VALIDATED
else:
conclusion = (
f"Failed to support hypothesis at alpha={self.alpha}. "
f"No significant result ({test_name})."
)
hypothesis.status = HypothesisStatus.REJECTED
return EvaluationResult(
hypothesis_id=hypothesis.id,
test_name=test_name,
statistic=statistic,
p_value=p_value,
is_significant=is_sig,
conclusion=conclusion,
)
def compare_groups(
self,
hypothesis: Hypothesis,
groups: list[list[float]],
labels: list[str] | None = None,
) -> EvaluationResult:
"""Evaluate a hypothesis by comparing multiple numeric groups.
Uses t-test for 2 groups, ANOVA for more.
"""
if len(groups) < 2:
raise ValueError("Evaluation requires at least 2 groups of data.")
if len(groups) == 2:
res = two_sample_ttest(groups[0], groups[1])
test_name = "Two-sample t-test"
else:
res = one_way_anova(*groups)
test_name = "One-way ANOVA"
return self._build_result(hypothesis, test_name, res.statistic, res.p_value)
def compare_to_reference(
self,
hypothesis: Hypothesis,
sample: list[float],
popmean: float,
) -> EvaluationResult:
"""One-sample t-test: does the sample differ from a reference mean?"""
if len(sample) < 2:
raise ValueError("One-sample evaluation requires at least 2 observations.")
res = one_sample_ttest(sample, popmean)
return self._build_result(hypothesis, "One-sample t-test", res.statistic, res.p_value)
def goodness_of_fit(
self,
hypothesis: Hypothesis,
observed: list[float],
expected: list[float] | None = None,
) -> EvaluationResult:
"""Chi-square goodness-of-fit: observed vs expected category counts.
If ``expected`` is omitted, a uniform distribution over the categories
is assumed (all categories equally likely).
"""
if len(observed) < 2:
raise ValueError("Goodness-of-fit requires at least 2 categories.")
if expected is None:
total = sum(observed)
n = len(observed)
expected = [total / n] * n
res = chi_square_gof(observed, expected)
return self._build_result(
hypothesis, "Chi-square goodness-of-fit", res.statistic, res.p_value
)
def test_independence(
self,
hypothesis: Hypothesis,
table: list[list[float]],
) -> EvaluationResult:
"""Chi-square test of independence on a contingency table."""
if len(table) < 2 or any(len(row) < 2 for row in table):
raise ValueError("Independence test requires a 2x2 or larger contingency table.")
res = chi_square_independence(table)
return self._build_result(hypothesis, "Chi-square independence", res.statistic, res.p_value)
def evaluate(self, hypothesis: Hypothesis, data: EvaluationData) -> EvaluationResult:
"""General evaluation entry point dispatching on the data format.
Supported keys (checked in order):
- ``groups`` : list of numeric groups (t-test / ANOVA)
- ``one_sample`` + ``popmean`` : sample and reference mean
- ``chi_square_gof`` : ``{"observed": [...], "expected": [...]}``
- ``chi_square_independence`` : 2D contingency table
- ``paired`` : tuple of two paired samples (treated as groups)
"""
if "groups" in data:
return self.compare_groups(hypothesis, data["groups"], data.get("labels"))
if "one_sample" in data:
return self.compare_to_reference(hypothesis, data["one_sample"], data["popmean"])
if "chi_square_gof" in data:
payload = data["chi_square_gof"]
return self.goodness_of_fit(
hypothesis,
payload["observed"],
payload.get("expected"),
)
if "chi_square_independence" in data:
return self.test_independence(hypothesis, data["chi_square_independence"])
if "paired" in data:
a, b = data["paired"]
return self.compare_groups(hypothesis, [list(a), list(b)])
raise ValueError(
"Unsupported data format for evaluation. "
"Provide one of: 'groups', 'one_sample' (with 'popmean'), "
"'chi_square_gof', 'chi_square_independence', or 'paired'."
)
|
Methods:
compare_groups
compare_groups(
hypothesis: Hypothesis,
groups: list[list[float]],
labels: list[str] | None = None,
) -> EvaluationResult
Evaluate a hypothesis by comparing multiple numeric groups.
Uses t-test for 2 groups, ANOVA for more.
Source code in src\cds\hypothesis\evaluator.py
| def compare_groups(
self,
hypothesis: Hypothesis,
groups: list[list[float]],
labels: list[str] | None = None,
) -> EvaluationResult:
"""Evaluate a hypothesis by comparing multiple numeric groups.
Uses t-test for 2 groups, ANOVA for more.
"""
if len(groups) < 2:
raise ValueError("Evaluation requires at least 2 groups of data.")
if len(groups) == 2:
res = two_sample_ttest(groups[0], groups[1])
test_name = "Two-sample t-test"
else:
res = one_way_anova(*groups)
test_name = "One-way ANOVA"
return self._build_result(hypothesis, test_name, res.statistic, res.p_value)
|
compare_to_reference
compare_to_reference(
hypothesis: Hypothesis,
sample: list[float],
popmean: float,
) -> EvaluationResult
One-sample t-test: does the sample differ from a reference mean?
Source code in src\cds\hypothesis\evaluator.py
| def compare_to_reference(
self,
hypothesis: Hypothesis,
sample: list[float],
popmean: float,
) -> EvaluationResult:
"""One-sample t-test: does the sample differ from a reference mean?"""
if len(sample) < 2:
raise ValueError("One-sample evaluation requires at least 2 observations.")
res = one_sample_ttest(sample, popmean)
return self._build_result(hypothesis, "One-sample t-test", res.statistic, res.p_value)
|
goodness_of_fit
goodness_of_fit(
hypothesis: Hypothesis,
observed: list[float],
expected: list[float] | None = None,
) -> EvaluationResult
Chi-square goodness-of-fit: observed vs expected category counts.
If expected is omitted, a uniform distribution over the categories
is assumed (all categories equally likely).
Source code in src\cds\hypothesis\evaluator.py
| def goodness_of_fit(
self,
hypothesis: Hypothesis,
observed: list[float],
expected: list[float] | None = None,
) -> EvaluationResult:
"""Chi-square goodness-of-fit: observed vs expected category counts.
If ``expected`` is omitted, a uniform distribution over the categories
is assumed (all categories equally likely).
"""
if len(observed) < 2:
raise ValueError("Goodness-of-fit requires at least 2 categories.")
if expected is None:
total = sum(observed)
n = len(observed)
expected = [total / n] * n
res = chi_square_gof(observed, expected)
return self._build_result(
hypothesis, "Chi-square goodness-of-fit", res.statistic, res.p_value
)
|
test_independence
test_independence(
hypothesis: Hypothesis, table: list[list[float]]
) -> EvaluationResult
Chi-square test of independence on a contingency table.
Source code in src\cds\hypothesis\evaluator.py
| def test_independence(
self,
hypothesis: Hypothesis,
table: list[list[float]],
) -> EvaluationResult:
"""Chi-square test of independence on a contingency table."""
if len(table) < 2 or any(len(row) < 2 for row in table):
raise ValueError("Independence test requires a 2x2 or larger contingency table.")
res = chi_square_independence(table)
return self._build_result(hypothesis, "Chi-square independence", res.statistic, res.p_value)
|
evaluate
evaluate(
hypothesis: Hypothesis, data: EvaluationData
) -> EvaluationResult
General evaluation entry point dispatching on the data format.
Supported keys (checked in order):
groups : list of numeric groups (t-test / ANOVA)
one_sample + popmean : sample and reference mean
chi_square_gof : {"observed": [...], "expected": [...]}
chi_square_independence : 2D contingency table
paired : tuple of two paired samples (treated as groups)
Source code in src\cds\hypothesis\evaluator.py
| def evaluate(self, hypothesis: Hypothesis, data: EvaluationData) -> EvaluationResult:
"""General evaluation entry point dispatching on the data format.
Supported keys (checked in order):
- ``groups`` : list of numeric groups (t-test / ANOVA)
- ``one_sample`` + ``popmean`` : sample and reference mean
- ``chi_square_gof`` : ``{"observed": [...], "expected": [...]}``
- ``chi_square_independence`` : 2D contingency table
- ``paired`` : tuple of two paired samples (treated as groups)
"""
if "groups" in data:
return self.compare_groups(hypothesis, data["groups"], data.get("labels"))
if "one_sample" in data:
return self.compare_to_reference(hypothesis, data["one_sample"], data["popmean"])
if "chi_square_gof" in data:
payload = data["chi_square_gof"]
return self.goodness_of_fit(
hypothesis,
payload["observed"],
payload.get("expected"),
)
if "chi_square_independence" in data:
return self.test_independence(hypothesis, data["chi_square_independence"])
if "paired" in data:
a, b = data["paired"]
return self.compare_groups(hypothesis, [list(a), list(b)])
raise ValueError(
"Unsupported data format for evaluation. "
"Provide one of: 'groups', 'one_sample' (with 'popmean'), "
"'chi_square_gof', 'chi_square_independence', or 'paired'."
)
|
HypothesisGenerator
Bases: Protocol
Interface for hypothesis generators.
Source code in src\cds\hypothesis\generator.py
| class HypothesisGenerator(Protocol):
"""Interface for hypothesis generators."""
def generate(
self,
research_question: str,
domain: Domain | str = Domain.GENERAL_SCIENCE,
n: int = 3,
**kwargs: object,
) -> list[Hypothesis]:
"""Generate `n` hypotheses for the given research question."""
|
Methods:
generate
generate(
research_question: str,
domain: Domain | str = Domain.GENERAL_SCIENCE,
n: int = 3,
**kwargs: object
) -> list[Hypothesis]
Generate n hypotheses for the given research question.
Source code in src\cds\hypothesis\generator.py
| def generate(
self,
research_question: str,
domain: Domain | str = Domain.GENERAL_SCIENCE,
n: int = 3,
**kwargs: object,
) -> list[Hypothesis]:
"""Generate `n` hypotheses for the given research question."""
|
PromptTemplate
Prompt templates for different providers / use cases.
Source code in src\cds\hypothesis\generator.py
| class PromptTemplate:
"""Prompt templates for different providers / use cases."""
SYSTEM = (
"You are an expert research scientist and rigorous thinker. "
"Your goal is to propose high-quality, falsifiable, "
"novel-yet-grounded scientific hypotheses. "
"Always make assumptions explicit. Prioritize testability and clarity. "
"Respond ONLY in the requested structured format."
)
USER_BASE = """Research Question: {research_question}
Domain focus: {domain}
Generate {n} distinct hypotheses.
For each hypothesis provide:
- Clear one-sentence statement
- Short rationale (2-4 sentences) connecting to known science
- Key assumptions (bullet list)
- Specific, measurable predictions or consequences (bullet list)
- Estimated confidence (0-1) with brief justification
Format each as:
ID: H-<number>
Statement: ...
Rationale: ...
Assumptions:
- ...
Predictions:
- ...
Confidence: 0.xx
"""
@classmethod
def render(cls, research_question: str, domain: Domain, n: int = 3) -> str:
"""Format the user-side prompt for a hypothesis generation request."""
return cls.USER_BASE.format(
research_question=research_question,
domain=domain.value,
n=n,
)
|
Methods:
render
classmethod
render(
research_question: str, domain: Domain, n: int = 3
) -> str
Format the user-side prompt for a hypothesis generation request.
Source code in src\cds\hypothesis\generator.py
| @classmethod
def render(cls, research_question: str, domain: Domain, n: int = 3) -> str:
"""Format the user-side prompt for a hypothesis generation request."""
return cls.USER_BASE.format(
research_question=research_question,
domain=domain.value,
n=n,
)
|
SimpleOfflineGenerator
A deterministic offline generator for demos and early development.
It creates plausible but generic hypotheses. Researchers can replace
or wrap it with a custom implementation of HypothesisGenerator
tailored to their domain or data sources.
Source code in src\cds\hypothesis\generator.py
| class SimpleOfflineGenerator:
"""
A deterministic offline generator for demos and early development.
It creates plausible but generic hypotheses. Researchers can replace
or wrap it with a custom implementation of HypothesisGenerator
tailored to their domain or data sources.
"""
def __init__(self) -> None:
self.templates = {
Domain.COSMOLOGY: [
(
"Late-time modifications to gravity can mimic "
"dark energy while altering structure growth."
),
(
"A time-varying dark energy equation of state "
"w(a) with a sharp transition at z~0.5 "
"explains current tensions."
),
(
"Primordial non-Gaussianity of local type at "
"f_NL ~ 5-10 is detectable with next-gen "
"surveys and resolves sigma8 tension."
),
],
Domain.PHYSICS: [
(
"A hidden sector with light mediators can "
"resolve the muon g-2 anomaly without "
"conflicting with collider bounds."
),
(
"Modified dispersion relations at Planck scale "
"suppress high-energy cosmic rays in a "
"characteristic energy-dependent way."
),
],
Domain.MATHEMATICS: [
(
"A new family of special functions between "
"hypergeometric and q-hypergeometric satisfies "
"a novel functional equation."
),
],
}
def generate(
self,
research_question: str,
domain: Domain | str = Domain.GENERAL_SCIENCE,
n: int = 3,
**kwargs: object,
) -> list[Hypothesis]:
"""Generate `n` hypotheses from the built-in domain templates."""
# Ensure domain is a Domain enum instance. ``Domain`` subclasses
# ``str``, so the isinstance guard is True for both plain strings and
# enum members; the False branch (skip mapping) is therefore
# unreachable from typed callers — it remains as a defensive seam for
# hypothetical non-str subclasses and is excluded from coverage.
if isinstance(domain, str): # pragma: no branch
try:
# Case-insensitive mapping for better UX
domain = Domain(domain.lower())
except ValueError:
domain = Domain.GENERAL_SCIENCE
ideas = self.templates.get(domain, self.templates[Domain.PHYSICS])[:n]
if len(ideas) < n:
# The built-in templates only cover a few domains. For any other
# domain (or when more hypotheses are requested than templates
# exist), fall back to a generic template derived from the
# research question rather than a domain-specific claim, so the
# output stays a usable starting point for a custom generator.
ideas += [
f"A yet-untested factor influencing {research_question} "
f"produces a measurable, reproducible effect.",
] * (n - len(ideas))
hypos: list[Hypothesis] = []
for i, idea in enumerate(ideas[:n], 1):
h = Hypothesis(
id=f"H-{uuid.uuid4().hex[:8]}",
statement=idea,
domain=domain,
research_question=research_question,
rationale=(
"Builds on known tensions in the literature "
"and proposes a falsifiable deviation."
),
assumptions=[
"Background model is approximately correct at low energies.",
"New physics at observable scales doesn't violate existing constraints.",
],
predictions=[
"A measurable deviation in observable O at scale S with amplitude A.",
"Correlation between two previously uncorrelated datasets D1 and D2.",
],
status=HypothesisStatus.NEW,
confidence=0.45 + (i * 0.05),
tags=[domain.value, "early-draft"],
)
hypos.append(h)
return hypos
|
Methods:
generate
generate(
research_question: str,
domain: Domain | str = Domain.GENERAL_SCIENCE,
n: int = 3,
**kwargs: object
) -> list[Hypothesis]
Generate n hypotheses from the built-in domain templates.
Source code in src\cds\hypothesis\generator.py
| def generate(
self,
research_question: str,
domain: Domain | str = Domain.GENERAL_SCIENCE,
n: int = 3,
**kwargs: object,
) -> list[Hypothesis]:
"""Generate `n` hypotheses from the built-in domain templates."""
# Ensure domain is a Domain enum instance. ``Domain`` subclasses
# ``str``, so the isinstance guard is True for both plain strings and
# enum members; the False branch (skip mapping) is therefore
# unreachable from typed callers — it remains as a defensive seam for
# hypothetical non-str subclasses and is excluded from coverage.
if isinstance(domain, str): # pragma: no branch
try:
# Case-insensitive mapping for better UX
domain = Domain(domain.lower())
except ValueError:
domain = Domain.GENERAL_SCIENCE
ideas = self.templates.get(domain, self.templates[Domain.PHYSICS])[:n]
if len(ideas) < n:
# The built-in templates only cover a few domains. For any other
# domain (or when more hypotheses are requested than templates
# exist), fall back to a generic template derived from the
# research question rather than a domain-specific claim, so the
# output stays a usable starting point for a custom generator.
ideas += [
f"A yet-untested factor influencing {research_question} "
f"produces a measurable, reproducible effect.",
] * (n - len(ideas))
hypos: list[Hypothesis] = []
for i, idea in enumerate(ideas[:n], 1):
h = Hypothesis(
id=f"H-{uuid.uuid4().hex[:8]}",
statement=idea,
domain=domain,
research_question=research_question,
rationale=(
"Builds on known tensions in the literature "
"and proposes a falsifiable deviation."
),
assumptions=[
"Background model is approximately correct at low energies.",
"New physics at observable scales doesn't violate existing constraints.",
],
predictions=[
"A measurable deviation in observable O at scale S with amplitude A.",
"Correlation between two previously uncorrelated datasets D1 and D2.",
],
status=HypothesisStatus.NEW,
confidence=0.45 + (i * 0.05),
tags=[domain.value, "early-draft"],
)
hypos.append(h)
return hypos
|
Functions:
generate_hypotheses
generate_hypotheses(
research_question: str,
domain: Domain = Domain.GENERAL_SCIENCE,
n: int = 3,
generator: HypothesisGenerator | None = None,
) -> list[Hypothesis]
Convenience entrypoint.
Source code in src\cds\hypothesis\generator.py
| def generate_hypotheses(
research_question: str,
domain: Domain = Domain.GENERAL_SCIENCE,
n: int = 3,
generator: HypothesisGenerator | None = None,
) -> list[Hypothesis]:
"""Convenience entrypoint."""
gen = generator or SimpleOfflineGenerator()
return gen.generate(research_question=research_question, domain=domain, n=n)
|
Statistics
Descriptive statistics, regression, and frequentist hypothesis tests (t-test, chi-square, ANOVA, Mann-Whitney, …).
cds.stats
Statistical analysis tools.
Classes
TestResult
dataclass
Result of a hypothesis test: test statistic, degrees of freedom, p.
Source code in src\cds\stats\hypothesis_tests.py
| @dataclass
class TestResult:
"""Result of a hypothesis test: test statistic, degrees of freedom, p."""
statistic: float
df: float
p_value: float
|
RegressionResult
dataclass
Fitted linear-regression parameters and goodness-of-fit.
Source code in src\cds\stats\regression.py
| @dataclass
class RegressionResult:
"""Fitted linear-regression parameters and goodness-of-fit."""
slope: float
intercept: float
r_squared: float
def predict(self, x: float) -> float:
"""Predict the response y for a given x using the fitted line."""
return self.slope * x + self.intercept
|
Methods:
predict
predict(x: float) -> float
Predict the response y for a given x using the fitted line.
Source code in src\cds\stats\regression.py
| def predict(self, x: float) -> float:
"""Predict the response y for a given x using the fitted line."""
return self.slope * x + self.intercept
|
Functions:
correlation
correlation(x: list[float], y: list[float]) -> float
Calculate the Pearson correlation coefficient between two lists.
Parameters:
| Name |
Type |
Description |
Default |
x
|
list[float]
|
|
required
|
y
|
list[float]
|
|
required
|
Returns:
| Type |
Description |
float
|
Pearson correlation coefficient.
|
Raises:
| Type |
Description |
ValueError
|
if lengths mismatch or lists are too short.
|
Source code in src\cds\stats\descriptive.py
| def correlation(x: list[float], y: list[float]) -> float:
"""Calculate the Pearson correlation coefficient between two lists.
Args:
x: first list of values
y: second list of values
Returns:
Pearson correlation coefficient.
Raises:
ValueError: if lengths mismatch or lists are too short.
"""
if len(x) != len(y):
raise ValueError("lists must be the same length")
if len(x) < 2:
raise ValueError("correlation requires at least two data points")
mx, my = mean(x), mean(y)
num = sum((xi - mx) * (yi - my) for xi, yi in zip(x, y))
den = math.sqrt(sum((xi - mx) ** 2 for xi in x) * sum((yi - my) ** 2 for yi in y))
return num / den if den > NEAR_ZERO else 0.0
|
mean
mean(data: list[float]) -> float
Calculate the arithmetic mean of a list of numbers.
Parameters:
| Name |
Type |
Description |
Default |
data
|
list[float]
|
|
required
|
Returns:
| Type |
Description |
float
|
Arithmetic mean (sum / N).
|
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\stats\descriptive.py
| def mean(data: list[float]) -> float:
"""Calculate the arithmetic mean of a list of numbers.
Args:
data: List of numeric values.
Returns:
Arithmetic mean (sum / N).
Raises:
ValueError: if data is empty.
"""
if not data:
raise ValueError("mean requires at least one data point")
return sum(data) / len(data)
|
median(data: list[float]) -> float
Calculate the median (middle value) of a list of numbers.
Parameters:
| Name |
Type |
Description |
Default |
data
|
list[float]
|
|
required
|
Returns:
Source code in src\cds\stats\descriptive.py
| def median(data: list[float]) -> float:
"""Calculate the median (middle value) of a list of numbers.
Args:
data: List of numeric values.
Returns:
Median value.
"""
if not data:
return 0.0
sorted_data = sorted(data)
n = len(sorted_data)
mid = n // 2
if n % 2 == 0:
return (sorted_data[mid - 1] + sorted_data[mid]) / 2
return float(sorted_data[mid])
|
stdev
stdev(data: list[float], ddof: int = 1) -> float
Calculate the standard deviation of a list of numbers.
Parameters:
| Name |
Type |
Description |
Default |
data
|
list[float]
|
|
required
|
ddof
|
int
|
Delta Degrees of Freedom.
|
1
|
Returns:
Source code in src\cds\stats\descriptive.py
| def stdev(data: list[float], ddof: int = 1) -> float:
"""Calculate the standard deviation of a list of numbers.
Args:
data: List of numeric values.
ddof: Delta Degrees of Freedom.
Returns:
Standard deviation.
"""
return math.sqrt(variance(data, ddof))
|
variance
variance(data: list[float], ddof: int = 1) -> float
Calculate the sample variance of a list of numbers.
Parameters:
| Name |
Type |
Description |
Default |
data
|
list[float]
|
|
required
|
ddof
|
int
|
Delta Degrees of Freedom (1 for sample, 0 for population).
|
1
|
Returns:
| Type |
Description |
float
|
Sample or population variance.
|
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\stats\descriptive.py
| def variance(data: list[float], ddof: int = 1) -> float:
"""Calculate the sample variance of a list of numbers.
Args:
data: List of numeric values.
ddof: Delta Degrees of Freedom (1 for sample, 0 for population).
Returns:
Sample or population variance.
Raises:
ValueError: if data size is <= ddof.
"""
if len(data) <= ddof:
raise ValueError(f"variance requires more than {ddof} data points")
m = mean(data)
return sum((x - m) ** 2 for x in data) / (len(data) - ddof)
|
chi2_sf
chi2_sf(x: float, df: float) -> float
Upper-tail probability for the chi-square distribution: P(X >= x).
Equals Q(df/2, x/2) with the regularized upper incomplete gamma.
Reference: Pearson (1900); Abramowitz & Stegun §26.4.
Source code in src\cds\stats\hypothesis_tests.py
| def chi2_sf(x: float, df: float) -> float:
"""Upper-tail probability for the chi-square distribution: P(X >= x).
Equals Q(df/2, x/2) with the regularized upper incomplete gamma.
Reference: Pearson (1900); Abramowitz & Stegun §26.4.
"""
if x <= 0.0:
return 1.0
return _gammq(df / 2.0, x / 2.0)
|
chi_square_gof
chi_square_gof(
observed: list[float], expected: list[float]
) -> TestResult
Pearson's chi-square goodness-of-fit test.
Statistic chi2 = sum((O_i - E_i)^2 / E_i) with len-1 degrees of freedom.
Reference: Pearson, K. (1900), Philosophical Magazine 50(302), 157-175.
Parameters:
| Name |
Type |
Description |
Default |
observed
|
list[float]
|
|
required
|
expected
|
list[float]
|
expected counts (same length, all > 0)
|
required
|
Returns:
| Type |
Description |
TestResult
|
TestResult with chi2 statistic, df = k-1, upper-tail p-value
|
Source code in src\cds\stats\hypothesis_tests.py
| def chi_square_gof(
observed: list[float],
expected: list[float],
) -> TestResult:
"""Pearson's chi-square goodness-of-fit test.
Statistic chi2 = sum((O_i - E_i)^2 / E_i) with len-1 degrees of freedom.
Reference: Pearson, K. (1900), Philosophical Magazine 50(302), 157-175.
Args:
observed: observed counts
expected: expected counts (same length, all > 0)
Returns:
TestResult with chi2 statistic, df = k-1, upper-tail p-value
"""
if len(observed) != len(expected):
raise ValueError("observed and expected must have same length")
if len(observed) < 2:
raise ValueError("need at least 2 categories")
if any(e <= 0 for e in expected):
raise ValueError("expected counts must be positive")
chi2 = sum((o - e) ** 2 / e for o, e in zip(observed, expected))
df = len(observed) - 1
return TestResult(statistic=chi2, df=df, p_value=chi2_sf(chi2, df))
|
chi_square_independence
chi_square_independence(
table: list[list[float]],
) -> TestResult
Pearson's chi-square test of independence for a contingency table.
Expected counts E_ij = (row_i total)(col_j total) / grand total;
degrees of freedom (rows-1)(cols-1).
Reference: Pearson, K. (1900), Philosophical Magazine 50(302), 157-175.
Parameters:
| Name |
Type |
Description |
Default |
table
|
list[list[float]]
|
r x c contingency table of non-negative counts
|
required
|
Returns:
| Type |
Description |
TestResult
|
TestResult with chi2 statistic, df, upper-tail p-value
|
Source code in src\cds\stats\hypothesis_tests.py
| def chi_square_independence(table: list[list[float]]) -> TestResult:
"""Pearson's chi-square test of independence for a contingency table.
Expected counts E_ij = (row_i total)(col_j total) / grand total;
degrees of freedom (rows-1)(cols-1).
Reference: Pearson, K. (1900), Philosophical Magazine 50(302), 157-175.
Args:
table: r x c contingency table of non-negative counts
Returns:
TestResult with chi2 statistic, df, upper-tail p-value
"""
rows = len(table)
if rows < 2:
raise ValueError("need at least 2 rows")
cols = len(table[0])
if cols < 2 or any(len(r) != cols for r in table):
raise ValueError("need a rectangular table with at least 2 columns")
row_tot = [sum(r) for r in table]
col_tot = [sum(table[i][j] for i in range(rows)) for j in range(cols)]
grand = sum(row_tot)
if grand == 0:
raise ValueError("table total must be positive")
chi2 = 0.0
for i in range(rows):
for j in range(cols):
exp = row_tot[i] * col_tot[j] / grand
if exp > 0:
chi2 += (table[i][j] - exp) ** 2 / exp
df = (rows - 1) * (cols - 1)
return TestResult(statistic=chi2, df=df, p_value=chi2_sf(chi2, df))
|
f_sf
f_sf(f: float, df1: float, df2: float) -> float
Upper-tail probability for the F distribution: P(F >= f).
Equals I_{df2/(df2+df1 f)}(df2/2, df1/2).
Reference: Fisher (1925); Numerical Recipes §6.14.
Source code in src\cds\stats\hypothesis_tests.py
| def f_sf(f: float, df1: float, df2: float) -> float:
"""Upper-tail probability for the F distribution: P(F >= f).
Equals I_{df2/(df2+df1 f)}(df2/2, df1/2).
Reference: Fisher (1925); Numerical Recipes §6.14.
"""
if f <= 0.0:
return 1.0
x = df2 / (df2 + df1 * f)
return _betai(df2 / 2.0, df1 / 2.0, x)
|
one_sample_ttest
one_sample_ttest(
data: list[float], popmean: float = 0.0
) -> TestResult
One-sample Student's t-test against a population mean.
Tests H0: mean(data) == popmean. The statistic is
t = (x_bar - mu) / (s / sqrt(n)) with n-1 degrees of freedom.
Reference: Student [Gosset] (1908), Biometrika 6(1), 1-25.
Parameters:
| Name |
Type |
Description |
Default |
data
|
list[float]
|
sample observations (n >= 2)
|
required
|
popmean
|
float
|
hypothesized population mean
|
0.0
|
Returns:
| Type |
Description |
TestResult
|
TestResult with t statistic, df = n-1, two-tailed p-value
|
Source code in src\cds\stats\hypothesis_tests.py
| def one_sample_ttest(data: list[float], popmean: float = 0.0) -> TestResult:
"""One-sample Student's t-test against a population mean.
Tests H0: mean(data) == popmean. The statistic is
t = (x_bar - mu) / (s / sqrt(n)) with n-1 degrees of freedom.
Reference: Student [Gosset] (1908), Biometrika 6(1), 1-25.
Args:
data: sample observations (n >= 2)
popmean: hypothesized population mean
Returns:
TestResult with t statistic, df = n-1, two-tailed p-value
"""
n = len(data)
if n < 2:
raise ValueError("need at least 2 observations")
df = n - 1
se = math.sqrt(variance(data, ddof=1) / n)
if se == 0.0:
raise ValueError("zero variance; t-test undefined")
t = (mean(data) - popmean) / se
return TestResult(statistic=t, df=df, p_value=t_sf(t, df))
|
one_way_anova
one_way_anova(*groups: list[float]) -> TestResult
Fisher's one-way analysis of variance (ANOVA F-test).
Partitions total variability into between-group and within-group sums of
squares and forms F = MS_between / MS_within with (k-1, N-k) degrees of
freedom.
Reference: Fisher, R. A. (1925). "Statistical Methods for Research
Workers," Oliver & Boyd.
Parameters:
| Name |
Type |
Description |
Default |
*groups
|
list[float]
|
two or more samples, each with at least one observation
|
()
|
Returns:
| Type |
Description |
TestResult
|
TestResult with F statistic, df = k-1 (stored), upper-tail p-value.
|
TestResult
|
The within-group degrees of freedom (N-k) are used internally for p.
|
Source code in src\cds\stats\hypothesis_tests.py
| def one_way_anova(*groups: list[float]) -> TestResult:
"""Fisher's one-way analysis of variance (ANOVA F-test).
Partitions total variability into between-group and within-group sums of
squares and forms F = MS_between / MS_within with (k-1, N-k) degrees of
freedom.
Reference: Fisher, R. A. (1925). "Statistical Methods for Research
Workers," Oliver & Boyd.
Args:
*groups: two or more samples, each with at least one observation
Returns:
TestResult with F statistic, df = k-1 (stored), upper-tail p-value.
The within-group degrees of freedom (N-k) are used internally for p.
"""
k = len(groups)
if k < 2:
raise ValueError("need at least 2 groups")
if any(len(g) < 1 for g in groups):
raise ValueError("each group needs at least one observation")
n_total = sum(len(g) for g in groups)
if n_total <= k:
raise ValueError("need more observations than groups")
grand_mean = sum(sum(g) for g in groups) / n_total
ss_between = sum(len(g) * (mean(g) - grand_mean) ** 2 for g in groups)
ss_within = sum(sum((x - mean(g)) ** 2 for x in g) for g in groups)
df_between = k - 1
df_within = n_total - k
ms_between = ss_between / df_between
ms_within = ss_within / df_within
if ms_within == 0.0:
raise ValueError("zero within-group variance; F undefined")
f = ms_between / ms_within
return TestResult(
statistic=f,
df=df_between,
p_value=f_sf(f, df_between, df_within),
)
|
t_sf
t_sf(t: float, df: float) -> float
Two-tailed survival probability for Student's t distribution.
Returns P(|T| >= |t|) for T ~ t(df), via the incomplete beta function:
p = I_{df/(df+t^2)}(df/2, 1/2).
Reference: Student (1908); Numerical Recipes §6.14.
Source code in src\cds\stats\hypothesis_tests.py
| def t_sf(t: float, df: float) -> float:
"""Two-tailed survival probability for Student's t distribution.
Returns P(|T| >= |t|) for T ~ t(df), via the incomplete beta function:
p = I_{df/(df+t^2)}(df/2, 1/2).
Reference: Student (1908); Numerical Recipes §6.14.
"""
x = df / (df + t * t)
return _betai(df / 2.0, 0.5, x)
|
two_sample_ttest
two_sample_ttest(
a: list[float], b: list[float], equal_var: bool = True
) -> TestResult
Two-sample t-test for equality of means.
With equal_var=True uses the pooled-variance (Student) t-test; with
equal_var=False uses Welch's t-test with the Welch-Satterthwaite
degrees of freedom.
References
- Student [Gosset] (1908), Biometrika 6(1), 1-25.
- Welch, B. L. (1947). "The generalization of 'Student's' problem
when several different population variances are involved."
Biometrika, 34(1-2), 28-35.
Parameters:
| Name |
Type |
Description |
Default |
a
|
list[float]
|
|
required
|
b
|
list[float]
|
|
required
|
equal_var
|
bool
|
pooled-variance test if True, Welch's test otherwise
|
True
|
Returns:
| Type |
Description |
TestResult
|
TestResult with t statistic, degrees of freedom, two-tailed p-value
|
Source code in src\cds\stats\hypothesis_tests.py
| def two_sample_ttest(
a: list[float],
b: list[float],
equal_var: bool = True,
) -> TestResult:
"""Two-sample t-test for equality of means.
With ``equal_var=True`` uses the pooled-variance (Student) t-test; with
``equal_var=False`` uses Welch's t-test with the Welch-Satterthwaite
degrees of freedom.
References:
- Student [Gosset] (1908), Biometrika 6(1), 1-25.
- Welch, B. L. (1947). "The generalization of 'Student's' problem
when several different population variances are involved."
Biometrika, 34(1-2), 28-35.
Args:
a: first sample (n >= 2)
b: second sample (n >= 2)
equal_var: pooled-variance test if True, Welch's test otherwise
Returns:
TestResult with t statistic, degrees of freedom, two-tailed p-value
"""
na, nb = len(a), len(b)
if na < 2 or nb < 2:
raise ValueError("each sample needs at least 2 observations")
va, vb = variance(a, ddof=1), variance(b, ddof=1)
diff = mean(a) - mean(b)
if equal_var:
df = na + nb - 2
sp2 = ((na - 1) * va + (nb - 1) * vb) / df
se = math.sqrt(sp2 * (1.0 / na + 1.0 / nb))
df_eff = float(df)
else:
se = math.sqrt(va / na + vb / nb)
num = (va / na + vb / nb) ** 2
den = (va / na) ** 2 / (na - 1) + (vb / nb) ** 2 / (nb - 1)
df_eff = num / den
if se == 0.0:
raise ValueError("zero variance; t-test undefined")
t = diff / se
return TestResult(statistic=t, df=df_eff, p_value=t_sf(t, df_eff))
|
linear_regression
linear_regression(
x: list[float], y: list[float]
) -> RegressionResult
Fit y = slope*x + intercept by ordinary least squares.
Returns:
| Type |
Description |
RegressionResult
|
RegressionResult with slope, intercept, and R^2.
|
Raises:
| Type |
Description |
ValueError
|
if x and y have different lengths, fewer than 2 points,
or all x values are identical (zero variance).
|
Source code in src\cds\stats\regression.py
| def linear_regression(x: list[float], y: list[float]) -> RegressionResult:
"""Fit y = slope*x + intercept by ordinary least squares.
Returns:
RegressionResult with slope, intercept, and R^2.
Raises:
ValueError: if `x` and `y` have different lengths, fewer than 2 points,
or all x values are identical (zero variance).
"""
if len(x) != len(y) or len(x) < 2:
raise ValueError("need matching lists with at least 2 points")
mx = mean(x)
my = mean(y)
num = sum((xi - mx) * (yi - my) for xi, yi in zip(x, y))
den = sum((xi - mx) ** 2 for xi in x)
if den == 0:
raise ValueError("all x values are identical")
slope = num / den
intercept = my - slope * mx
# r-squared
ss_res = sum((yi - (slope * xi + intercept)) ** 2 for xi, yi in zip(x, y))
ss_tot = sum((yi - my) ** 2 for yi in y)
r_sq = 1 - ss_res / ss_tot if ss_tot > 0 else 0.0
return RegressionResult(slope=slope, intercept=intercept, r_squared=r_sq)
|
Probability
Continuous PDFs (Gaussian, uniform, exponential) and discrete PMFs (binomial, Poisson) with reproducible sampling.
cds.probability
Probability distributions and sampling.
Functions:
binomial_pmf
binomial_pmf(k: int, n: int, p: float) -> float
Binomial distribution probability mass function.
P(X=k) = C(n,k) * p^k * (1-p)^(n-k)
Parameters:
| Name |
Type |
Description |
Default |
k
|
int
|
|
required
|
n
|
int
|
|
required
|
p
|
float
|
probability of success per trial
|
required
|
Raises:
| Type |
Description |
ValueError
|
if parameters are invalid
|
Source code in src\cds\probability\distributions.py
| def binomial_pmf(k: int, n: int, p: float) -> float:
"""Binomial distribution probability mass function.
P(X=k) = C(n,k) * p^k * (1-p)^(n-k)
Args:
k: number of successes
n: number of trials
p: probability of success per trial
Raises:
ValueError: if parameters are invalid
"""
if not (0 <= p <= 1):
raise ValueError("p must be in [0, 1]")
if k < 0 or k > n:
return 0.0
coeff = math.comb(n, k)
return coeff * (p**k) * ((1 - p) ** (n - k))
|
exponential_pdf
exponential_pdf(x: float, lam: float = 1.0) -> float
Exponential distribution PDF.
Parameters:
| Name |
Type |
Description |
Default |
x
|
float
|
point to evaluate (must be >= 0)
|
required
|
lam
|
float
|
|
1.0
|
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\probability\distributions.py
| def exponential_pdf(x: float, lam: float = 1.0) -> float:
"""Exponential distribution PDF.
Args:
x: point to evaluate (must be >= 0)
lam: rate parameter (lambda)
Raises:
ValueError: if lam <= 0
"""
if lam <= 0:
raise ValueError("lambda must be positive")
if x < 0:
return 0.0
return lam * math.exp(-lam * x)
|
gaussian_pdf
gaussian_pdf(
x: float, mu: float = 0.0, sigma: float = 1.0
) -> float
Gaussian (normal) probability density function.
Parameters:
| Name |
Type |
Description |
Default |
x
|
float
|
|
required
|
mu
|
float
|
|
0.0
|
sigma
|
float
|
|
1.0
|
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\probability\distributions.py
| def gaussian_pdf(x: float, mu: float = 0.0, sigma: float = 1.0) -> float:
"""Gaussian (normal) probability density function.
Args:
x: point to evaluate
mu: mean
sigma: standard deviation
Raises:
ValueError: if sigma <= 0
"""
if sigma <= 0:
raise ValueError("sigma must be positive")
coeff = 1 / (sigma * math.sqrt(2 * math.pi))
exponent = -0.5 * ((x - mu) / sigma) ** 2
return coeff * math.exp(exponent)
|
poisson_pmf
poisson_pmf(k: int, lam: float) -> float
Poisson distribution probability mass function.
P(X=k) = (lambda^k * e^-lambda) / k!
Parameters:
| Name |
Type |
Description |
Default |
k
|
int
|
|
required
|
lam
|
float
|
|
required
|
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\probability\distributions.py
| def poisson_pmf(k: int, lam: float) -> float:
"""Poisson distribution probability mass function.
P(X=k) = (lambda^k * e^-lambda) / k!
Args:
k: number of events
lam: expected rate (lambda)
Raises:
ValueError: if lam < 0 or k < 0
"""
if lam < 0:
raise ValueError("lambda must be non-negative")
if k < 0:
return 0.0
return (lam**k) * math.exp(-lam) / math.factorial(k)
|
uniform_pdf(
x: float, a: float = 0.0, b: float = 1.0
) -> float
Uniform distribution PDF on [a, b].
Parameters:
| Name |
Type |
Description |
Default |
x
|
float
|
|
required
|
a
|
float
|
|
0.0
|
b
|
float
|
|
1.0
|
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\probability\distributions.py
| def uniform_pdf(
x: float,
a: float = 0.0,
b: float = 1.0,
) -> float:
"""Uniform distribution PDF on [a, b].
Args:
x: point to evaluate
a: lower bound
b: upper bound
Raises:
ValueError: if a >= b
"""
if a >= b:
raise ValueError("a must be less than b")
if a <= x <= b:
return 1 / (b - a)
return 0.0
|
uniform_sample(
a: float, b: float, n: int, seed: int | None = None
) -> list[float]
Generate n uniform random samples from [a, b].
Parameters:
| Name |
Type |
Description |
Default |
a
|
float
|
|
required
|
b
|
float
|
|
required
|
n
|
int
|
|
required
|
seed
|
int | None
|
|
None
|
Source code in src\cds\probability\distributions.py
| def uniform_sample(
a: float,
b: float,
n: int,
seed: int | None = None,
) -> list[float]:
"""Generate n uniform random samples from [a, b].
Args:
a: lower bound
b: upper bound
n: number of samples
seed: optional random seed
"""
rng = random.Random(seed)
return [rng.uniform(a, b) for _ in range(n)]
|
Mathematical Utilities
Calculus (derivative, integral, gradient) and a compact linear-algebra toolkit (PLU, QR, Cholesky, eigenvalues via power iteration).
cds.math_utils
Math helper functions.
Functions:
derivative
derivative(
f: Callable[[float], float],
x: float,
h_base: float = 1e-07,
) -> float
Central difference approximation with adaptive step size.
Source code in src\cds\math_utils\calculus.py
| def derivative(f: Callable[[float], float], x: float, h_base: float = 1e-7) -> float:
"""Central difference approximation with adaptive step size."""
h = h_base * max(1.0, abs(x))
return (f(x + h) - f(x - h)) / (2 * h)
|
gradient
gradient(
f: Callable[..., float],
point: list[float],
h_base: float = 1e-07,
) -> list[float]
Numerical gradient for multivariable functions with adaptive scaling.
Source code in src\cds\math_utils\calculus.py
| def gradient(f: Callable[..., float], point: list[float], h_base: float = 1e-7) -> list[float]:
"""Numerical gradient for multivariable functions with adaptive scaling."""
grad = []
for i in range(len(point)):
# Scale step size h relative to point magnitude to maintain precision
h = h_base * max(1.0, abs(point[i]))
def partial(val: float, idx: int = i) -> float:
"""Return f evaluated with coordinate `idx` set to `val`."""
p = point.copy()
p[idx] = val
return f(*p)
grad.append((partial(point[i] + h) - partial(point[i] - h)) / (2 * h))
return grad
|
integral
integral(
f: Callable[[float], float],
a: float,
b: float,
n: int = 1000,
) -> float
Simpson's rule for numerical integration.
Source code in src\cds\math_utils\calculus.py
| def integral(f: Callable[[float], float], a: float, b: float, n: int = 1000) -> float:
"""Simpson's rule for numerical integration."""
if n % 2 != 0:
n += 1
h = (b - a) / n
s = f(a) + f(b)
for i in range(1, n):
coeff = 4 if i % 2 != 0 else 2
s += coeff * f(a + i * h)
return s * h / 3
|
cholesky
cholesky(m: Matrix) -> Matrix
Cholesky decomposition of a symmetric positive-definite matrix.
Returns the lower-triangular L such that A = L L^T. Roughly twice as
efficient as LU for SPD systems and numerically stable.
Reference
Benoît, C. (1924). "Note sur une méthode de résolution des équations
normales... (Procédé du Commandant Cholesky)." Bulletin Géodésique,
2, 67-77. See also Golub & Van Loan, §4.2.
Parameters:
| Name |
Type |
Description |
Default |
m
|
Matrix
|
symmetric positive-definite matrix
|
required
|
Returns:
| Type |
Description |
Matrix
|
lower-triangular matrix L with A = L L^T
|
Raises:
| Type |
Description |
ValueError
|
if the matrix is not positive definite
|
Source code in src\cds\math_utils\linalg.py
| def cholesky(m: Matrix) -> Matrix:
"""Cholesky decomposition of a symmetric positive-definite matrix.
Returns the lower-triangular L such that A = L L^T. Roughly twice as
efficient as LU for SPD systems and numerically stable.
Reference:
Benoît, C. (1924). "Note sur une méthode de résolution des équations
normales... (Procédé du Commandant Cholesky)." Bulletin Géodésique,
2, 67-77. See also Golub & Van Loan, §4.2.
Args:
m: symmetric positive-definite matrix
Returns:
lower-triangular matrix L with A = L L^T
Raises:
ValueError: if the matrix is not positive definite
"""
n = len(m)
L = [[0.0] * n for _ in range(n)]
for i in range(n):
for j in range(i + 1):
s = sum(L[i][k] * L[j][k] for k in range(j))
if i == j:
diag = m[i][i] - s
if diag <= 0.0:
raise ValueError(
"matrix is not positive definite — Cholesky decomposition requires symmetric positive definite input; check that the matrix is symmetric and all eigenvalues > 0"
)
L[i][j] = math.sqrt(diag)
else:
L[i][j] = (m[i][j] - s) / L[j][j]
return L
|
determinant
determinant(m: Matrix) -> float
Compute matrix determinant using PLU decomposition (O(N^3)).
Avoids the O(N!) complexity of minor expansion.
Source code in src\cds\math_utils\linalg.py
| def determinant(m: Matrix) -> float:
"""Compute matrix determinant using PLU decomposition (O(N^3)).
Avoids the O(N!) complexity of minor expansion.
"""
n = len(m)
if n == 0:
return 1.0
if n == 1:
return m[0][0]
try:
P, L, U = lu_decomposition(m)
except ValueError:
# If matrix is singular, determinant is 0
return 0.0
# Determinant of LU is product of diag(U)
# (diag(L) is all 1s).
det = 1.0
for i in range(n):
det *= U[i][i]
# Determinant of P is (-1)^s where s is number of row swaps.
# We compute it using cycle decomposition: s = n - number_of_cycles.
num_cycles = 0
p_indices = [row.index(1.0) for row in P]
visited = [False] * n
for i in range(n):
if not visited[i]:
num_cycles += 1
curr = i
while not visited[curr]:
visited[curr] = True
curr = p_indices[curr]
return float(det * ((-1) ** (n - num_cycles)))
|
dot
dot(a: Vector, b: Vector) -> float
Inner product of two equal-length vectors.
Raises:
| Type |
Description |
ValueError
|
if a and b have different lengths.
|
Source code in src\cds\math_utils\linalg.py
| def dot(a: Vector, b: Vector) -> float:
"""Inner product of two equal-length vectors.
Raises:
ValueError: if `a` and `b` have different lengths.
"""
if len(a) != len(b):
raise ValueError(f"vectors a and b must have the same length (got {len(a)} and {len(b)})")
return sum(x * y for x, y in zip(a, b))
|
gram_schmidt
gram_schmidt(vectors: list[Vector]) -> list[Vector]
Gram-Schmidt orthonormalization.
Produces an orthonormal set from the input vectors.
[Trefethen & Bau, Lecture 8]
Parameters:
| Name |
Type |
Description |
Default |
vectors
|
list[Vector]
|
list of linearly independent vectors
|
required
|
Returns:
| Type |
Description |
list[Vector]
|
orthonormal basis vectors
|
Source code in src\cds\math_utils\linalg.py
| def gram_schmidt(vectors: list[Vector]) -> list[Vector]:
"""Gram-Schmidt orthonormalization.
Produces an orthonormal set from the input vectors.
[Trefethen & Bau, Lecture 8]
Args:
vectors: list of linearly independent vectors
Returns:
orthonormal basis vectors
"""
ortho: list[Vector] = []
for v in vectors:
u = v[:]
for q in ortho:
proj = sum(u[i] * q[i] for i in range(len(u)))
u = [u[i] - proj * q[i] for i in range(len(u))]
norm = math.sqrt(sum(x * x for x in u))
if norm < NEAR_ZERO:
continue
ortho.append([x / norm for x in u])
return ortho
|
identity
identity(n: int) -> Matrix
Create n×n identity matrix.
Source code in src\cds\math_utils\linalg.py
| def identity(n: int) -> Matrix:
"""Create n×n identity matrix."""
return [[1.0 if i == j else 0.0 for j in range(n)] for i in range(n)]
|
lu_decomposition
lu_decomposition(
m: Matrix,
) -> tuple[Matrix, Matrix, Matrix]
LU decomposition with partial pivoting (PA = LU).
A = P_inv * L * U where P_inv is a permutation matrix,
L is lower triangular (ones on diagonal) and U is upper triangular.
Returns:
| Type |
Description |
tuple[Matrix, Matrix, Matrix]
|
|
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\math_utils\linalg.py
| def lu_decomposition(m: Matrix) -> tuple[Matrix, Matrix, Matrix]:
"""LU decomposition with partial pivoting (PA = LU).
A = P_inv * L * U where P_inv is a permutation matrix,
L is lower triangular (ones on diagonal) and U is upper triangular.
Returns:
P, L, U matrices.
Raises:
ValueError: if matrix is singular
"""
n = len(m)
P = identity(n)
L = [[0.0] * n for _ in range(n)]
U = [row[:] for row in m]
for k in range(n):
# Partial pivoting
pivot_idx = k
max_val = abs(U[k][k])
for i in range(k + 1, n):
if abs(U[i][k]) > max_val:
max_val = abs(U[i][k])
pivot_idx = i
if max_val < NEAR_ZERO:
raise ValueError(
f"zero pivot at column {k} — the input matrix is singular or nearly singular; try regularizing or checking your data"
)
if pivot_idx != k:
U[k], U[pivot_idx] = U[pivot_idx], U[k]
P[k], P[pivot_idx] = P[pivot_idx], P[k]
L[k], L[pivot_idx] = L[pivot_idx], L[k]
L[k][k] = 1.0
for i in range(k + 1, n):
factor = U[i][k] / U[k][k]
L[i][k] = factor
for j in range(k, n):
U[i][j] -= factor * U[k][j]
return P, L, U
|
mat_mul
mat_mul(a: Matrix, b: Matrix) -> Matrix
Matrix multiplication A * B.
Pre-transposes B so that columns are read as contiguous rows, which
keeps memory access row-major and lets the inner loops run over
Python's C-implemented zip/sum rather than indexed lookups.
Source code in src\cds\math_utils\linalg.py
| def mat_mul(a: Matrix, b: Matrix) -> Matrix:
"""Matrix multiplication A * B.
Pre-transposes B so that columns are read as contiguous rows, which
keeps memory access row-major and lets the inner loops run over
Python's C-implemented ``zip``/``sum`` rather than indexed lookups.
"""
rows_a, cols_a = len(a), len(a[0])
rows_b, cols_b = len(b), len(b[0])
if cols_a != rows_b:
raise ValueError(f"incompatible shapes: {rows_a}x{cols_a} and {rows_b}x{cols_b}")
# Transpose B once up front: each output column becomes a row we can
# iterate cheaply, instead of striding through B column-by-column.
b_T = list(zip(*b))
return [[sum(ai * bi for ai, bi in zip(row_a, col_b)) for col_b in b_T] for row_a in a]
|
matrix_inverse
matrix_inverse(m: Matrix) -> Matrix
Compute matrix inverse via PLU decomposition.
Reuses a single P, L, U factorization and solves A * x_i = e_i
for each column of the identity matrix to build the inverse.
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\math_utils\linalg.py
| def matrix_inverse(m: Matrix) -> Matrix:
"""Compute matrix inverse via PLU decomposition.
Reuses a single P, L, U factorization and solves A * x_i = e_i
for each column of the identity matrix to build the inverse.
Raises:
ValueError: if matrix is singular
"""
n = len(m)
P, L, U = lu_decomposition(m)
inv = [[0.0] * n for _ in range(n)]
for col in range(n):
# e is the standard basis vector
b = [0.0] * n
b[col] = 1.0
# Apply permutation: Pb
pb = [sum(P[i][j] * b[j] for j in range(n)) for i in range(n)]
# forward: Ly = Pb
y = [0.0] * n
for i in range(n):
y[i] = pb[i] - sum(L[i][j] * y[j] for j in range(i))
# backward: Ux = y
x = [0.0] * n
for i in range(n - 1, -1, -1):
if abs(U[i][i]) < NEAR_ZERO:
raise ValueError(
f"singular matrix — LU backward substitution failed at row {i} (during inverse computation); matrix has no unique inverse"
)
x[i] = (y[i] - sum(U[i][j] * x[j] for j in range(i + 1, n))) / U[i][i]
for row in range(n):
inv[row][col] = x[row]
return inv
|
power_iteration
power_iteration(
m: Matrix,
max_iter: int = 1000,
tol: float = NEWTON_TOLERANCE,
) -> tuple[float, Vector]
Find dominant eigenvalue and eigenvector using power iteration.
Von Mises iteration (1929). Optimized with scaling to prevent overflow.
Parameters:
| Name |
Type |
Description |
Default |
m
|
Matrix
|
|
required
|
max_iter
|
int
|
|
1000
|
tol
|
float
|
|
NEWTON_TOLERANCE
|
Returns:
| Type |
Description |
tuple[float, Vector]
|
(eigenvalue, eigenvector) tuple
|
Source code in src\cds\math_utils\linalg.py
| def power_iteration(
m: Matrix,
max_iter: int = 1000,
tol: float = NEWTON_TOLERANCE,
) -> tuple[float, Vector]:
"""Find dominant eigenvalue and eigenvector using power iteration.
Von Mises iteration (1929). Optimized with scaling to prevent overflow.
Args:
m: square matrix
max_iter: iteration limit
tol: convergence tolerance
Returns:
(eigenvalue, eigenvector) tuple
"""
n = len(m)
v = [1.0] * n
# Initial scaling
max_val = max(abs(x) for x in v)
v = [x / max_val for x in v]
eigenvalue = 0.0
for _ in range(max_iter):
# w = A * v
w = [sum(m[i][j] * v[j] for j in range(n)) for i in range(n)]
# Scaling to prevent overflow in large systems
# norm = sqrt(sum(w_i^2)). CPython floats overflow to inf rather
# than raising OverflowError, so we detect both cases and fall back
# to absolute-max scaling, which is safe for any magnitude.
squared_sum = sum(x * x for x in w)
if math.isinf(squared_sum):
norm = max(abs(x) for x in w)
else:
try:
norm = math.sqrt(squared_sum)
except OverflowError: # pragma: no cover - defensive for non-CPython libm
# Defensive: still raised on some platforms for subnormal inputs
norm = max(abs(x) for x in w)
if norm < NEAR_ZERO:
break
v_new = [x / norm for x in w]
# Rayleigh quotient: (v^T * A * v) / (v^T * v)
# Accurate for any normalization (L2 or L-inf)
numerator = sum(v_new[i] * sum(m[i][j] * v_new[j] for j in range(n)) for i in range(n))
denominator = sum(vi * vi for vi in v_new)
new_eigenvalue = numerator / denominator if denominator > NEAR_ZERO else 0.0
if abs(new_eigenvalue - eigenvalue) < tol:
return new_eigenvalue, v_new
eigenvalue = new_eigenvalue
v = v_new
return eigenvalue, v
|
qr_decomposition
qr_decomposition(m: Matrix) -> tuple[Matrix, Matrix]
QR decomposition via Householder reflections.
Factorizes A (n×n) into an orthogonal matrix Q and upper-triangular R
such that A = Q R. Householder triangularization is backward stable and
preferred over classical Gram-Schmidt for numerical work.
Reference
Householder, A. S. (1958). "Unitary triangularization of a
nonsymmetric matrix." Journal of the ACM, 5(4), 339-342.
See also Golub & Van Loan, §5.2; Trefethen & Bau, Lecture 10.
Parameters:
| Name |
Type |
Description |
Default |
m
|
Matrix
|
|
required
|
Returns:
| Type |
Description |
tuple[Matrix, Matrix]
|
(Q, R) with Q orthogonal and R upper triangular
|
Source code in src\cds\math_utils\linalg.py
| def qr_decomposition(m: Matrix) -> tuple[Matrix, Matrix]:
"""QR decomposition via Householder reflections.
Factorizes A (n×n) into an orthogonal matrix Q and upper-triangular R
such that A = Q R. Householder triangularization is backward stable and
preferred over classical Gram-Schmidt for numerical work.
Reference:
Householder, A. S. (1958). "Unitary triangularization of a
nonsymmetric matrix." Journal of the ACM, 5(4), 339-342.
See also Golub & Van Loan, §5.2; Trefethen & Bau, Lecture 10.
Args:
m: square matrix
Returns:
(Q, R) with Q orthogonal and R upper triangular
"""
n = len(m)
R = [row[:] for row in m]
Q = identity(n)
for k in range(n - 1):
# column vector x = R[k:, k]
x = [R[i][k] for i in range(k, n)]
norm_x = math.sqrt(sum(xi * xi for xi in x))
if norm_x < NEAR_ZERO:
continue
# Householder vector v
alpha = -norm_x if x[0] >= 0 else norm_x
v = x[:]
v[0] -= alpha
norm_v = math.sqrt(sum(vi * vi for vi in v))
if norm_v < NEAR_ZERO: # pragma: no cover - unreachable: norm_x>0 implies norm_v>0
continue
v = [vi / norm_v for vi in v]
# apply H = I - 2 v v^T to R (rows k..n-1)
for j in range(n):
dot_vr = sum(v[i] * R[k + i][j] for i in range(n - k))
for i in range(n - k):
R[k + i][j] -= 2.0 * v[i] * dot_vr
# accumulate Q = Q H (columns k..n-1)
for i in range(n):
dot_qv = sum(Q[i][k + j] * v[j] for j in range(n - k))
for j in range(n - k):
Q[i][k + j] -= 2.0 * dot_qv * v[j]
return Q, R
|
solve_linear
solve_linear(A: Matrix, b: Vector) -> Vector
Solve Ax = b using PLU decomposition.
Solves LUx = Pb.
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\math_utils\linalg.py
| def solve_linear(A: Matrix, b: Vector) -> Vector:
"""Solve Ax = b using PLU decomposition.
Solves LUx = Pb.
Raises:
ValueError: if matrix is singular
"""
n = len(A)
P, L, U = lu_decomposition(A)
# Apply permutation: Pb
pb = [sum(P[i][j] * b[j] for j in range(n)) for i in range(n)]
# forward: Ly = Pb
y = [0.0] * n
for i in range(n):
y[i] = pb[i] - sum(L[i][j] * y[j] for j in range(i))
# backward: Ux = y
x = [0.0] * n
for i in range(n - 1, -1, -1):
if abs(U[i][i]) < NEAR_ZERO:
raise ValueError(
f"singular matrix — LU backward substitution failed at row {i}; matrix has no unique inverse"
)
x[i] = (y[i] - sum(U[i][j] * x[j] for j in range(i + 1, n))) / U[i][i]
return x
|
transpose
transpose(m: Matrix) -> Matrix
Return the transpose of a 2-D matrix (rows <-> columns).
Source code in src\cds\math_utils\linalg.py
| def transpose(m: Matrix) -> Matrix:
"""Return the transpose of a 2-D matrix (rows <-> columns)."""
if not m:
return []
return [[m[i][j] for i in range(len(m))] for j in range(len(m[0]))]
|
Numerical Integration
Deterministic quadrature rules: trapezoid, Simpson 1/3 and 3/8, Gauss–Legendre, Romberg, and adaptive Simpson.
cds.numerical_integration
Deterministic numerical quadrature — Newton-Cotes, Romberg, Gauss-Legendre.
Complements :mod:cds.montecarlo (stochastic integration) and :mod:cds.diffeq
(ODE integration) with classical deterministic integration rules.
Classes
QuadratureResult
dataclass
Result of an adaptive numerical integration.
Attributes:
| Name |
Type |
Description |
value |
float
|
computed approximation of the integral
|
method |
str
|
name of the quadrature rule used
|
n_eval |
int
|
number of integrand evaluations performed
|
error_estimate |
float
|
internal estimate of the truncation error (nan if
unavailable for the chosen rule)
|
Source code in src\cds\numerical_integration\quadrature.py
| @dataclass
class QuadratureResult:
"""Result of an adaptive numerical integration.
Attributes:
value: computed approximation of the integral
method: name of the quadrature rule used
n_eval: number of integrand evaluations performed
error_estimate: internal estimate of the truncation error (``nan`` if
unavailable for the chosen rule)
"""
value: float
method: str
n_eval: int
error_estimate: float
|
Functions:
adaptive_simpson
adaptive_simpson(
f: Callable[[float], float],
a: float,
b: float,
tol: float = 1e-10,
max_depth: int = 50,
) -> QuadratureResult
Adaptive recursive Simpson quadrature.
Recursively bisects subintervals where the local error estimate (the
difference between Simpson over the whole interval and over its halves)
exceeds tol, concentrating work where the integrand is hard. [Lyness 1969]
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float], float]
|
|
required
|
a
|
float
|
|
required
|
b
|
float
|
|
required
|
tol
|
float
|
desired absolute tolerance
|
1e-10
|
max_depth
|
int
|
maximum recursion depth to bound cost on hard integrands
|
50
|
Returns:
| Type |
Description |
QuadratureResult
|
class:QuadratureResult carrying the number of integrand evaluations.
|
Raises:
| Type |
Description |
ValueError
|
|
RuntimeError
|
if max_depth is exhausted before convergence.
|
Source code in src\cds\numerical_integration\quadrature.py
| def adaptive_simpson(
f: Callable[[float], float],
a: float,
b: float,
tol: float = 1e-10,
max_depth: int = 50,
) -> QuadratureResult:
"""Adaptive recursive Simpson quadrature.
Recursively bisects subintervals where the local error estimate (the
difference between Simpson over the whole interval and over its halves)
exceeds ``tol``, concentrating work where the integrand is hard. [Lyness 1969]
Args:
f: integrand
a: lower limit
b: upper limit
tol: desired absolute tolerance
max_depth: maximum recursion depth to bound cost on hard integrands
Returns:
:class:`QuadratureResult` carrying the number of integrand evaluations.
Raises:
ValueError: if ``max_depth < 1``.
RuntimeError: if ``max_depth`` is exhausted before convergence.
"""
def _simpson(fa: float, fm: float, fb: float, a: float, b: float) -> float:
return (b - a) / 6.0 * (fa + 4.0 * fm + fb)
if max_depth < 1:
raise ValueError("max_depth must be >= 1")
counter = {"n": 0}
def _eval(x: float) -> float:
counter["n"] += 1
return f(x)
def _recurse(
a: float, b: float, fa: float, fb: float, fm: float, whole: float, depth: int, eps: float
) -> float:
m = 0.5 * (a + b)
lm = 0.5 * (a + m)
rm = 0.5 * (m + b)
flm = _eval(lm)
frm = _eval(rm)
left = _simpson(fa, flm, fm, a, m)
right = _simpson(fm, frm, fb, m, b)
diff = left + right - whole
# A NaN diff means the integrand produced NaN on this subinterval
# (e.g. a divergent/undefined integrand). Stop recursing immediately so
# the NaN propagates to the top-level guard instead of branching until
# max_depth is exhausted (2**max_depth calls -> hang).
if math.isnan(diff):
return left + right + diff / 15.0
# Standard Lyness error estimate (scaled by 1/15).
if depth <= 0 or abs(diff) <= 15.0 * eps:
return left + right + diff / 15.0
return _recurse(a, m, fa, fm, flm, left, depth - 1, 0.5 * eps) + _recurse(
m, b, fm, fb, frm, right, depth - 1, 0.5 * eps
)
fa = _eval(a)
fb = _eval(b)
fm = _eval(0.5 * (a + b))
whole = _simpson(fa, fm, fb, a, b)
value = _recurse(a, b, fa, fb, fm, whole, max_depth, tol)
if math.isnan(value):
raise RuntimeError("adaptive_simpson produced NaN (likely divergent integrand)")
return QuadratureResult(
value=value,
method="adaptive_simpson",
n_eval=counter["n"],
error_estimate=math.nan,
)
|
gaussian_quadrature
gaussian_quadrature(
f: Callable[[float], float],
a: float,
b: float,
n: int = 5,
) -> float
Gauss-Legendre quadrature with n nodes.
Exact for polynomials of degree up to 2n - 1. The [-1, 1] rule is
affinely mapped onto [a, b]. [Gauss 1814]
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float], float]
|
|
required
|
a
|
float
|
|
required
|
b
|
float
|
upper limit (may be less than a)
|
required
|
n
|
int
|
number of Gauss-Legendre nodes (>= 1)
|
5
|
Returns:
| Type |
Description |
float
|
Approximation of the integral.
|
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\numerical_integration\quadrature.py
| def gaussian_quadrature(
f: Callable[[float], float],
a: float,
b: float,
n: int = 5,
) -> float:
"""Gauss-Legendre quadrature with ``n`` nodes.
Exact for polynomials of degree up to ``2n - 1``. The ``[-1, 1]`` rule is
affinely mapped onto ``[a, b]``. [Gauss 1814]
Args:
f: integrand
a: lower limit
b: upper limit (may be less than ``a``)
n: number of Gauss-Legendre nodes (``>= 1``)
Returns:
Approximation of the integral.
Raises:
ValueError: if ``n < 1``.
"""
if n < 1:
raise ValueError("n must be >= 1")
half = 0.5 * (b - a)
mid = 0.5 * (a + b)
total = 0.0
for node, weight in _gauss_legendre_nodes(n):
total += weight * f(half * node + mid)
return half * total
|
romberg
romberg(
f: Callable[[float], float],
a: float,
b: float,
tol: float = 1e-10,
max_iter: int = 20,
) -> QuadratureResult
Romberg integration via Richardson extrapolation on the trapezoidal rule.
Builds a triangular table where column k is O(h^{2k}) accurate.
Halting is driven by the relative/absolute change in the extrapolated
diagonal. [Romberg 1955]
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float], float]
|
|
required
|
a
|
float
|
|
required
|
b
|
float
|
|
required
|
tol
|
float
|
convergence tolerance on successive diagonal estimates
|
1e-10
|
max_iter
|
int
|
maximum number of extrapolation levels (each adds one row)
|
20
|
Returns:
| Type |
Description |
QuadratureResult
|
class:QuadratureResult with an internal error estimate.
|
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\numerical_integration\quadrature.py
| def romberg(
f: Callable[[float], float],
a: float,
b: float,
tol: float = 1e-10,
max_iter: int = 20,
) -> QuadratureResult:
"""Romberg integration via Richardson extrapolation on the trapezoidal rule.
Builds a triangular table where column ``k`` is ``O(h^{2k})`` accurate.
Halting is driven by the relative/absolute change in the extrapolated
diagonal. [Romberg 1955]
Args:
f: integrand
a: lower limit
b: upper limit
tol: convergence tolerance on successive diagonal estimates
max_iter: maximum number of extrapolation levels (each adds one row)
Returns:
:class:`QuadratureResult` with an internal error estimate.
Raises:
ValueError: if ``max_iter < 1``.
"""
if max_iter < 1:
raise ValueError("max_iter must be >= 1")
width = b - a
# Total integrand evaluations across all levels: 1 + 1 + 2 + 4 + ... + 2^(m-1)
n_eval = 1
# R[0][0]: single trapezoid
r: list[list[float]] = [[0.5 * width * (f(a) + f(b))]]
best = r[0][0]
error_est = math.inf
for k in range(1, max_iter):
# Trapezoid with 2^k panels reusing the 2^(k-1) level
panels = 1 << (k - 1)
h = width / (1 << k)
total = 0.0
for i in range(1, panels + 1):
total += f(a + (2 * i - 1) * h)
n_eval += panels
t_k = 0.5 * r[k - 1][0] + h * total
row = [t_k]
for j in range(1, k + 1):
# Richardson extrapolation factor 4^j / (4^j - 1)
factor = 1 << (2 * j) # 4^j
row.append((factor * row[j - 1] - r[k - 1][j - 1]) / (factor - 1))
r.append(row)
error_est = abs(row[k] - best)
best = row[k]
if error_est <= tol * max(1.0, abs(best)):
break
return QuadratureResult(
value=best,
method="romberg",
n_eval=n_eval,
error_estimate=error_est,
)
|
simpson
simpson(
f: Callable[[float], float],
a: float,
b: float,
n: int = 1000,
) -> float
Composite Simpson's 1/3 rule.
Closed Newton-Cotes of order 2; error O(h⁴). Requires an even number of
panels so that every group of two panels spans one parabola. [Simpson 1743]
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float], float]
|
|
required
|
a
|
float
|
|
required
|
b
|
float
|
upper limit (may be less than a)
|
required
|
n
|
int
|
number of panels (must be even and >= 2)
|
1000
|
Returns:
| Type |
Description |
float
|
Approximation of the integral.
|
Raises:
| Type |
Description |
ValueError
|
if n is not an even number >= 2.
|
Source code in src\cds\numerical_integration\quadrature.py
| def simpson(
f: Callable[[float], float],
a: float,
b: float,
n: int = 1000,
) -> float:
"""Composite Simpson's 1/3 rule.
Closed Newton-Cotes of order 2; error ``O(h⁴)``. Requires an even number of
panels so that every group of two panels spans one parabola. [Simpson 1743]
Args:
f: integrand
a: lower limit
b: upper limit (may be less than ``a``)
n: number of panels (must be even and ``>= 2``)
Returns:
Approximation of the integral.
Raises:
ValueError: if ``n`` is not an even number ``>= 2``.
"""
if n < 2 or n % 2 != 0:
raise ValueError("n must be an even integer >= 2")
h = (b - a) / n
s = f(a) + f(b)
for i in range(1, n):
s += (4.0 if i % 2 == 1 else 2.0) * f(a + i * h)
return (h / 3.0) * s
|
simpson_38
simpson_38(
f: Callable[[float], float],
a: float,
b: float,
n: int = 999,
) -> float
Composite Simpson's 3/8 rule.
Closed Newton-Cotes of order 3 over groups of three panels; error O(h⁴).
Useful as a companion to the 1/3 rule when n is a multiple of 3.
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float], float]
|
|
required
|
a
|
float
|
|
required
|
b
|
float
|
upper limit (may be less than a)
|
required
|
n
|
int
|
number of panels (must be a multiple of 3 and >= 3)
|
999
|
Returns:
| Type |
Description |
float
|
Approximation of the integral.
|
Raises:
| Type |
Description |
ValueError
|
if n is not a multiple of 3 >= 3.
|
Source code in src\cds\numerical_integration\quadrature.py
| def simpson_38(
f: Callable[[float], float],
a: float,
b: float,
n: int = 999,
) -> float:
"""Composite Simpson's 3/8 rule.
Closed Newton-Cotes of order 3 over groups of three panels; error ``O(h⁴)``.
Useful as a companion to the 1/3 rule when ``n`` is a multiple of 3.
Args:
f: integrand
a: lower limit
b: upper limit (may be less than ``a``)
n: number of panels (must be a multiple of 3 and ``>= 3``)
Returns:
Approximation of the integral.
Raises:
ValueError: if ``n`` is not a multiple of 3 ``>= 3``.
"""
if n < 3 or n % 3 != 0:
raise ValueError("n must be a multiple of 3 and >= 3")
h = (b - a) / n
s = f(a) + f(b)
for i in range(1, n):
s += (3.0 if i % 3 != 0 else 2.0) * f(a + i * h)
return (3.0 * h / 8.0) * s
|
trapezoid
trapezoid(
f: Callable[[float], float],
a: float,
b: float,
n: int = 1000,
) -> float
Composite trapezoidal rule.
Approximates ∫_a^b f(x) dx with n equal panels. Closed Newton-Cotes
of order 1; error O(h²) for twice-differentiable integrands. [Cotes 1722]
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float], float]
|
|
required
|
a
|
float
|
|
required
|
b
|
float
|
upper limit (may be less than a)
|
required
|
n
|
int
|
number of panels (n >= 1)
|
1000
|
Returns:
| Type |
Description |
float
|
Approximation of the integral.
|
Raises:
| Type |
Description |
ValueError
|
|
Source code in src\cds\numerical_integration\quadrature.py
| def trapezoid(
f: Callable[[float], float],
a: float,
b: float,
n: int = 1000,
) -> float:
"""Composite trapezoidal rule.
Approximates ``∫_a^b f(x) dx`` with ``n`` equal panels. Closed Newton-Cotes
of order 1; error ``O(h²)`` for twice-differentiable integrands. [Cotes 1722]
Args:
f: integrand
a: lower limit
b: upper limit (may be less than ``a``)
n: number of panels (``n >= 1``)
Returns:
Approximation of the integral.
Raises:
ValueError: if ``n < 1``.
"""
if n < 1:
raise ValueError("n must be >= 1")
h = (b - a) / n
s = 0.5 * (f(a) + f(b))
for i in range(1, n):
s += f(a + i * h)
return h * s
|
Differential Equations
Initial-value-problem solvers: Euler, midpoint, RK4, adaptive RK45, and a system-of-ODEs integrator.
cds.diffeq
Ordinary differential equation solvers — Euler, RK4, RK45, leapfrog.
Classes
ODESolution
dataclass
Result of an ODE integration.
Source code in src\cds\diffeq\solvers.py
| @dataclass
class ODESolution:
"""Result of an ODE integration."""
t: list[float]
y: list[float]
method: str
steps: int
|
Functions:
euler_method
euler_method(
f: Callable[[float, float], float],
t0: float,
y0: float,
t_end: float,
dt: float = RK45_DEFAULT_DT,
) -> ODESolution
Euler's method for dy/dt = f(t, y).
First-order explicit method. Local truncation error O(dt²),
global error O(dt). [Euler 1768]
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float, float], float]
|
right-hand side function f(t, y)
|
required
|
t0
|
float
|
|
required
|
y0
|
float
|
|
required
|
t_end
|
float
|
|
required
|
dt
|
float
|
|
RK45_DEFAULT_DT
|
Source code in src\cds\diffeq\solvers.py
| def euler_method(
f: Callable[[float, float], float],
t0: float,
y0: float,
t_end: float,
dt: float = RK45_DEFAULT_DT,
) -> ODESolution:
"""Euler's method for dy/dt = f(t, y).
First-order explicit method. Local truncation error O(dt²),
global error O(dt). [Euler 1768]
Args:
f: right-hand side function f(t, y)
t0: initial time
y0: initial value y(t0)
t_end: end time
dt: time step
"""
t_vals = [t0]
y_vals = [y0]
t, y = t0, y0
steps = 0
while t < t_end - LOOP_EPSILON:
h = min(dt, t_end - t)
y = y + h * f(t, y)
t = t + h
t_vals.append(t)
y_vals.append(y)
steps += 1
return ODESolution(t=t_vals, y=y_vals, method="euler", steps=steps)
|
midpoint_method
midpoint_method(
f: Callable[[float, float], float],
t0: float,
y0: float,
t_end: float,
dt: float = RK45_DEFAULT_DT,
) -> ODESolution
Explicit midpoint method (2nd-order Runge-Kutta).
Local truncation error O(dt³), global error O(dt²).
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float, float], float]
|
right-hand side function f(t, y)
|
required
|
t0
|
float
|
|
required
|
y0
|
float
|
|
required
|
t_end
|
float
|
|
required
|
dt
|
float
|
|
RK45_DEFAULT_DT
|
Source code in src\cds\diffeq\solvers.py
| def midpoint_method(
f: Callable[[float, float], float],
t0: float,
y0: float,
t_end: float,
dt: float = RK45_DEFAULT_DT,
) -> ODESolution:
"""Explicit midpoint method (2nd-order Runge-Kutta).
Local truncation error O(dt³), global error O(dt²).
Args:
f: right-hand side function f(t, y)
t0: initial time
y0: initial value y(t0)
t_end: end time
dt: time step
"""
t_vals = [t0]
y_vals = [y0]
t, y = t0, y0
steps = 0
while t < t_end - LOOP_EPSILON:
h = min(dt, t_end - t)
k1 = f(t, y)
k2 = f(t + h / 2, y + h * k1 / 2)
y = y + h * k2
t = t + h
t_vals.append(t)
y_vals.append(y)
steps += 1
return ODESolution(t=t_vals, y=y_vals, method="midpoint", steps=steps)
|
rk4
rk4(
f: Callable[[float, float], float],
t0: float,
y0: float,
t_end: float,
dt: float = RK45_DEFAULT_DT,
) -> ODESolution
Classical 4th-order Runge-Kutta method.
Local truncation error O(dt⁵), global error O(dt⁴). [Runge 1895, Kutta 1901]
The standard Butcher tableau
0 |
1/2 | 1/2
1/2 | 0 1/2
1 | 0 0 1
----|----------------
| 1/6 1/3 1/3 1/6
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float, float], float]
|
right-hand side function f(t, y)
|
required
|
t0
|
float
|
|
required
|
y0
|
float
|
|
required
|
t_end
|
float
|
|
required
|
dt
|
float
|
|
RK45_DEFAULT_DT
|
Source code in src\cds\diffeq\solvers.py
| def rk4(
f: Callable[[float, float], float],
t0: float,
y0: float,
t_end: float,
dt: float = RK45_DEFAULT_DT,
) -> ODESolution:
"""Classical 4th-order Runge-Kutta method.
Local truncation error O(dt⁵), global error O(dt⁴). [Runge 1895, Kutta 1901]
The standard Butcher tableau:
0 |
1/2 | 1/2
1/2 | 0 1/2
1 | 0 0 1
----|----------------
| 1/6 1/3 1/3 1/6
Args:
f: right-hand side function f(t, y)
t0: initial time
y0: initial value y(t0)
t_end: end time
dt: time step
"""
t_vals = [t0]
y_vals = [y0]
t, y = t0, y0
steps = 0
while t < t_end - LOOP_EPSILON:
h = min(dt, t_end - t)
k1 = f(t, y)
k2 = f(t + h / 2, y + h * k1 / 2)
k3 = f(t + h / 2, y + h * k2 / 2)
k4 = f(t + h, y + h * k3)
y = y + (h / 6) * (k1 + 2 * k2 + 2 * k3 + k4)
t = t + h
t_vals.append(t)
y_vals.append(y)
steps += 1
return ODESolution(t=t_vals, y=y_vals, method="rk4", steps=steps)
|
rk45
rk45(
f: Callable[[float, float], float],
t0: float,
y0: float,
t_end: float,
dt: float = RK45_DEFAULT_DT,
atol: float = RK45_DEFAULT_ATOL,
rtol: float = RK45_DEFAULT_RTOL,
) -> ODESolution
Dormand-Prince (RK45) adaptive step-size method.
Computes 4th and 5th order estimates to approximate local error
and adjust the step size automatically. [Dormand & Prince 1980]
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float, float], float]
|
|
required
|
t0
|
float
|
|
required
|
y0
|
float
|
|
required
|
t_end
|
float
|
|
required
|
dt
|
float
|
|
RK45_DEFAULT_DT
|
atol
|
float
|
|
RK45_DEFAULT_ATOL
|
rtol
|
float
|
|
RK45_DEFAULT_RTOL
|
Source code in src\cds\diffeq\solvers.py
| def rk45(
f: Callable[[float, float], float],
t0: float,
y0: float,
t_end: float,
dt: float = RK45_DEFAULT_DT,
atol: float = RK45_DEFAULT_ATOL,
rtol: float = RK45_DEFAULT_RTOL,
) -> ODESolution:
"""Dormand-Prince (RK45) adaptive step-size method.
Computes 4th and 5th order estimates to approximate local error
and adjust the step size automatically. [Dormand & Prince 1980]
Args:
f: right-hand side f(t, y)
t0: initial time
y0: initial value
t_end: end time
dt: initial time step
atol: absolute tolerance
rtol: relative tolerance
"""
# Dormand-Prince Butcher Tableau coefficients
a = [0, 1 / 5, 3 / 10, 4 / 5, 8 / 9, 1, 1]
b = [
[],
[1 / 5],
[3 / 40, 9 / 40],
[44 / 45, -56 / 15, 32 / 9],
[19372 / 6561, -25360 / 2187, 64448 / 6561, -212 / 729],
[9017 / 3168, -355 / 33, 46732 / 5247, 49 / 176, -5103 / 18656],
[35 / 384, 0, 500 / 1113, 125 / 192, -2187 / 6784, 11 / 84],
]
c5 = [35 / 384, 0, 500 / 1113, 125 / 192, -2187 / 6784, 11 / 84, 0]
c4 = [5179 / 57600, 0, 7571 / 16695, 393 / 640, -92097 / 339200, 187 / 2100, 1 / 40]
t, y = t0, y0
t_vals = [t]
y_vals = [y]
h = dt
steps = 0
# Absolute step-size floor, scaled to the integration span, below which no
# further progress can be made (a "machine precision floor"). Prevents the
# adaptive loop from spinning forever on stiff/diverging problems.
span = abs(t_end - t0) if t_end != t0 else 1.0
eps_floor = 16 * sys.float_info.epsilon * max(abs(t), span)
while t < t_end - LOOP_EPSILON:
if t + h > t_end:
h = t_end - t
k = [0.0] * 7
k[0] = f(t, y)
for i in range(1, 7):
y_next = y + h * sum(b[i][j] * k[j] for j in range(i))
k[i] = f(t + a[i] * h, y_next)
# Estimate 5th and 4th order solutions
y5 = y + h * sum(c5[i] * k[i] for i in range(7))
y4 = y + h * sum(c4[i] * k[i] for i in range(7))
# Local error estimate
error = abs(y5 - y4)
tolerance = atol + rtol * abs(y)
if error <= tolerance:
# Step accepted
t += h
y = y5
t_vals.append(t)
y_vals.append(y)
steps += 1
# Adjust step size
if error > 0:
h_opt = h * (tolerance / error) ** 0.2
h = min(max(RK45_STEP_SHRINK * h, RK45_STEP_SAFETY * h_opt), RK45_STEP_GROW * h)
else:
h *= 10.0 # Error is zero, aggressively increase step up to max scale
# Precision floor to prevent infinite loop: either the step size has
# shrunk below the span-scaled epsilon floor, or it has become so small
# that adding it to t makes no progress (t + h == t).
if h < eps_floor or t + h == t:
raise RuntimeError("Step size h reached machine precision floor.")
return ODESolution(t=t_vals, y=y_vals, method="rk45", steps=steps)
|
solve_system
solve_system(
f: Callable[[float, list[float]], list[float]],
t0: float,
y0: list[float],
t_end: float,
dt: float = RK45_DEFAULT_DT,
) -> tuple[list[float], list[list[float]]]
RK4 for systems of ODEs: dy/dt = f(t, y) where y is a vector.
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float, list[float]], list[float]]
|
right-hand side f(t, y) returning a list of derivatives
|
required
|
t0
|
float
|
|
required
|
y0
|
list[float]
|
|
required
|
t_end
|
float
|
|
required
|
dt
|
float
|
|
RK45_DEFAULT_DT
|
Returns:
| Type |
Description |
tuple[list[float], list[list[float]]]
|
(t_values, y_values) where y_values[i] is the state vector at t_values[i]
|
Source code in src\cds\diffeq\solvers.py
| def solve_system(
f: Callable[[float, list[float]], list[float]],
t0: float,
y0: list[float],
t_end: float,
dt: float = RK45_DEFAULT_DT,
) -> tuple[list[float], list[list[float]]]:
"""RK4 for systems of ODEs: dy/dt = f(t, y) where y is a vector.
Args:
f: right-hand side f(t, y) returning a list of derivatives
t0: initial time
y0: initial state vector
t_end: end time
dt: time step
Returns:
(t_values, y_values) where y_values[i] is the state vector at t_values[i]
"""
n = len(y0)
t_vals = [t0]
y_vals = [list(y0)]
t = t0
y = list(y0)
while t < t_end - LOOP_EPSILON:
h = min(dt, t_end - t)
k1 = f(t, y)
y_tmp = [y[i] + h * k1[i] / 2 for i in range(n)]
k2 = f(t + h / 2, y_tmp)
y_tmp = [y[i] + h * k2[i] / 2 for i in range(n)]
k3 = f(t + h / 2, y_tmp)
y_tmp = [y[i] + h * k3[i] for i in range(n)]
k4 = f(t + h, y_tmp)
y = [y[i] + (h / 6) * (k1[i] + 2 * k2[i] + 2 * k3[i] + k4[i]) for i in range(n)]
t = t + h
t_vals.append(t)
y_vals.append(list(y))
return t_vals, y_vals
|
Monte Carlo Methods
Stochastic integration: π estimation, generic Monte-Carlo integration, 1D/2D random walks, and Buffon's needle.
cds.montecarlo
Monte Carlo methods — estimation, integration, random walks.
Functions:
buffon_needle
buffon_needle(
needle_length: float = 1.0,
line_spacing: float = 2.0,
n_throws: int = 100000,
seed: int | None = None,
) -> MCResult
Buffon's needle experiment for estimating π.
Drop a needle of length L onto parallel lines spaced D apart.
P(crossing) = 2L / (πD), so π ≈ 2L / (D * P(crossing)).
Reference: Buffon (1777).
Parameters:
| Name |
Type |
Description |
Default |
needle_length
|
float
|
length of the needle (must be <= line_spacing)
|
1.0
|
line_spacing
|
float
|
distance between parallel lines
|
2.0
|
n_throws
|
int
|
|
100000
|
seed
|
int | None
|
|
None
|
Raises:
| Type |
Description |
ValueError
|
if needle_length > line_spacing
|
Source code in src\cds\montecarlo\methods.py
| def buffon_needle(
needle_length: float = 1.0,
line_spacing: float = 2.0,
n_throws: int = 100_000,
seed: int | None = None,
) -> MCResult:
"""Buffon's needle experiment for estimating π.
Drop a needle of length L onto parallel lines spaced D apart.
P(crossing) = 2L / (πD), so π ≈ 2L / (D * P(crossing)).
Reference: Buffon (1777).
Args:
needle_length: length of the needle (must be <= line_spacing)
line_spacing: distance between parallel lines
n_throws: number of needle drops
seed: optional random seed
Raises:
ValueError: if needle_length > line_spacing
"""
if needle_length > line_spacing:
raise ValueError("needle must be shorter than line spacing")
rng = random.Random(seed)
crossings = 0
for _ in range(n_throws):
center = rng.uniform(0, line_spacing / 2)
angle = rng.uniform(0, math.pi)
tip = (needle_length / 2) * math.sin(angle)
if tip >= center:
crossings += 1
if crossings == 0:
return MCResult(estimate=0.0, samples=n_throws, std_error=0.0)
p = crossings / n_throws
estimate = (2 * needle_length) / (line_spacing * p)
se_p = math.sqrt(p * (1 - p) / n_throws)
se = (2 * needle_length * se_p) / (line_spacing * p * p) if p > 0 else 0.0
return MCResult(estimate=estimate, samples=n_throws, std_error=se)
|
estimate_pi
estimate_pi(
n_samples: int = 100000, seed: int | None = None
) -> MCResult
Estimate π using the unit-circle method (Parallelized).
Throw random points into the unit square [0,1]×[0,1].
Fraction inside the quarter-circle ≈ π/4.
Parameters:
| Name |
Type |
Description |
Default |
n_samples
|
int
|
|
100000
|
seed
|
int | None
|
|
None
|
Source code in src\cds\montecarlo\methods.py
| def estimate_pi(n_samples: int = 100_000, seed: int | None = None) -> MCResult:
"""Estimate π using the unit-circle method (Parallelized).
Throw random points into the unit square [0,1]×[0,1].
Fraction inside the quarter-circle ≈ π/4.
Args:
n_samples: number of random points
seed: optional random seed
"""
if n_samples <= 0:
return MCResult(0.0, n_samples, 0.0)
cores = min(multiprocessing.cpu_count(), n_samples)
chunk_size = n_samples // cores
chunks = [chunk_size] * cores
chunks[-1] += n_samples - sum(chunks) # add remainder to last chunk
if seed is None:
import os
import sys
seed = int.from_bytes(os.urandom(4), sys.byteorder)
seeds = [seed + i for i in range(cores)]
tasks = list(zip(chunks, seeds))
inside = 0
with ProcessPoolExecutor(max_workers=cores) as executor:
for result in executor.map(_pi_worker, tasks):
inside += result
p = inside / n_samples
estimate = 4.0 * p
se = 4.0 * math.sqrt(p * (1 - p) / n_samples) if n_samples > 1 else 0.0
return MCResult(estimate=estimate, samples=n_samples, std_error=se)
|
mc_integrate
mc_integrate(
f: Callable[[float], float],
a: float,
b: float,
n_samples: int = 100000,
seed: int | None = None,
) -> MCResult
Monte Carlo integration of f over [a, b].
E[f(X)] * (b-a) where X ~ Uniform(a, b).
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float], float]
|
|
required
|
a
|
float
|
|
required
|
b
|
float
|
|
required
|
n_samples
|
int
|
number of random evaluations
|
100000
|
seed
|
int | None
|
|
None
|
Source code in src\cds\montecarlo\methods.py
| def mc_integrate(
f: Callable[[float], float],
a: float,
b: float,
n_samples: int = 100_000,
seed: int | None = None,
) -> MCResult:
"""Monte Carlo integration of f over [a, b].
E[f(X)] * (b-a) where X ~ Uniform(a, b).
Args:
f: function to integrate
a: lower bound
b: upper bound
n_samples: number of random evaluations
seed: optional random seed
"""
rng = random.Random(seed)
total = 0.0
total_sq = 0.0
width = b - a
for _ in range(n_samples):
x = a + rng.random() * width
val = f(x)
total += val
total_sq += val * val
mean_val = total / n_samples
estimate = mean_val * width
var = (total_sq / n_samples - mean_val**2) if n_samples > 1 else 0.0
se = width * math.sqrt(var / n_samples) if var > 0 else 0.0
return MCResult(estimate=estimate, samples=n_samples, std_error=se)
|
random_walk_1d
random_walk_1d(
steps: int,
step_size: float = 1.0,
seed: int | None = None,
) -> list[float]
1D symmetric random walk.
At each step, move +step_size or -step_size with equal probability.
Parameters:
| Name |
Type |
Description |
Default |
steps
|
int
|
|
required
|
step_size
|
float
|
|
1.0
|
seed
|
int | None
|
|
None
|
Returns:
| Type |
Description |
list[float]
|
list of positions at each step (length = steps + 1)
|
Source code in src\cds\montecarlo\methods.py
| def random_walk_1d(
steps: int,
step_size: float = 1.0,
seed: int | None = None,
) -> list[float]:
"""1D symmetric random walk.
At each step, move +step_size or -step_size with equal probability.
Args:
steps: number of steps
step_size: size of each step
seed: optional random seed
Returns:
list of positions at each step (length = steps + 1)
"""
rng = random.Random(seed)
positions = [0.0]
pos = 0.0
for _ in range(steps):
pos += step_size if rng.random() < 0.5 else -step_size
positions.append(pos)
return positions
|
random_walk_2d
random_walk_2d(
steps: int,
step_size: float = 1.0,
seed: int | None = None,
) -> list[tuple[float, float]]
2D random walk on a plane.
At each step, move in a random direction (uniform angle).
Parameters:
| Name |
Type |
Description |
Default |
steps
|
int
|
|
required
|
step_size
|
float
|
|
1.0
|
seed
|
int | None
|
|
None
|
Returns:
| Type |
Description |
list[tuple[float, float]]
|
list of (x, y) positions at each step (length = steps + 1)
|
Source code in src\cds\montecarlo\methods.py
| def random_walk_2d(
steps: int,
step_size: float = 1.0,
seed: int | None = None,
) -> list[tuple[float, float]]:
"""2D random walk on a plane.
At each step, move in a random direction (uniform angle).
Args:
steps: number of steps
step_size: size of each step
seed: optional random seed
Returns:
list of (x, y) positions at each step (length = steps + 1)
"""
rng = random.Random(seed)
positions: list[tuple[float, float]] = [(0.0, 0.0)]
x, y = 0.0, 0.0
for _ in range(steps):
angle = rng.uniform(0, 2 * math.pi)
x += step_size * math.cos(angle)
y += step_size * math.sin(angle)
positions.append((x, y))
return positions
|
Optimization
Gradient descent, Newton's method, Adam, and golden-section line search.
cds.optimization
Numerical optimization algorithms.
Functions:
adam
adam(
f: Callable[..., float],
x0: float,
lr: float = ADAM_DEFAULT_LR,
beta1: float = ADAM_DEFAULT_BETAS[0],
beta2: float = ADAM_DEFAULT_BETAS[1],
eps: float = ADAM_DEFAULT_EPS,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 10000,
h: float = DEFAULT_FD_STEP,
state: AdamState | None = None,
grad_f: (
Callable[..., float | list[float]] | None
) = None,
) -> OptResult[float]
adam(
f: Callable[..., float],
x0: list[float],
lr: float = ADAM_DEFAULT_LR,
beta1: float = ADAM_DEFAULT_BETAS[0],
beta2: float = ADAM_DEFAULT_BETAS[1],
eps: float = ADAM_DEFAULT_EPS,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 10000,
h: float = DEFAULT_FD_STEP,
state: AdamState | None = None,
grad_f: (
Callable[..., float | list[float]] | None
) = None,
) -> OptResult[list[float]]
adam(
f: Callable[..., float],
x0: float | list[float],
lr: float = ADAM_DEFAULT_LR,
beta1: float = ADAM_DEFAULT_BETAS[0],
beta2: float = ADAM_DEFAULT_BETAS[1],
eps: float = ADAM_DEFAULT_EPS,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 10000,
h: float = DEFAULT_FD_STEP,
state: AdamState | None = None,
grad_f: (
Callable[..., float | list[float]] | None
) = None,
) -> OptResult[float] | OptResult[list[float]]
Minimize using Adam optimizer (adaptive learning rate) for scalars or vectors.
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[..., float]
|
|
required
|
x0
|
float | list[float]
|
|
required
|
lr
|
float
|
|
ADAM_DEFAULT_LR
|
beta1
|
float
|
|
ADAM_DEFAULT_BETAS[0]
|
beta2
|
float
|
|
ADAM_DEFAULT_BETAS[1]
|
eps
|
float
|
numerical stability constant
|
ADAM_DEFAULT_EPS
|
tol
|
float
|
|
DEFAULT_TOLERANCE
|
max_iter
|
int
|
|
10000
|
h
|
float
|
step for numerical gradient
|
DEFAULT_FD_STEP
|
state
|
AdamState | None
|
optional dictionary to resume optimization (contains m, v, t)
|
None
|
grad_f
|
Callable[..., float | list[float]] | None
|
optional gradient function. If None, numerical gradient is used.
|
None
|
Source code in src\cds\optimization\minimize.py
| def adam(
f: Callable[..., float],
x0: float | list[float],
lr: float = ADAM_DEFAULT_LR,
beta1: float = ADAM_DEFAULT_BETAS[0],
beta2: float = ADAM_DEFAULT_BETAS[1],
eps: float = ADAM_DEFAULT_EPS,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 10000,
h: float = DEFAULT_FD_STEP,
state: AdamState | None = None,
grad_f: Callable[..., float | list[float]] | None = None,
) -> OptResult[float] | OptResult[list[float]]:
"""Minimize using Adam optimizer (adaptive learning rate) for scalars or vectors.
Args:
f: objective function
x0: starting point
lr: learning rate
beta1: first moment decay
beta2: second moment decay
eps: numerical stability constant
tol: convergence tolerance
max_iter: iteration limit
h: step for numerical gradient
state: optional dictionary to resume optimization (contains m, v, t)
grad_f: optional gradient function. If None, numerical gradient is used.
"""
if isinstance(x0, (int, float)):
x_scalar: float = float(x0)
if state is None:
m_s = 0.0
v_s = 0.0
t_start = 1
else:
m_s = float(cast(float, state["m"]))
v_s = float(cast(float, state["v"]))
t_start = int(state["t"]) + 1
last_t = t_start - 1
for i in range(t_start, t_start + max_iter):
last_t = i
if grad_f:
grad_s: float = float(cast(float, grad_f(x_scalar)))
else:
grad_s = _compute_gradient(f, x_scalar, h)
if abs(grad_s) < tol:
return OptResult(
x=x_scalar,
value=f(x_scalar),
iterations=i - t_start + 1,
converged=True,
state={"m": m_s, "v": v_s, "t": i},
)
m_s = beta1 * m_s + (1 - beta1) * grad_s
v_s = beta2 * v_s + (1 - beta2) * grad_s**2
m_hat = m_s / (1 - beta1**i)
v_hat = v_s / (1 - beta2**i)
x_scalar -= lr * m_hat / (math.sqrt(v_hat) + eps)
return OptResult(
x=x_scalar,
value=f(x_scalar),
iterations=max_iter,
converged=False,
state={"m": m_s, "v": v_s, "t": last_t},
)
else:
x_list: list[float] = list(x0)
if state is None:
m_l = [0.0] * len(x_list)
v_l = [0.0] * len(x_list)
t_start = 1
else:
m_l = list(cast(list[float], state["m"]))
v_l = list(cast(list[float], state["v"]))
t_start = int(state["t"]) + 1
last_t = t_start - 1
for i in range(t_start, t_start + max_iter):
last_t = i
if grad_f:
grad_l: list[float] = list(cast(list[float], grad_f(x_list)))
else:
grad_l = _compute_gradient(f, x_list, h)
if _magnitude(grad_l) < tol:
return OptResult(
x=x_list,
value=f(x_list),
iterations=i - t_start + 1,
converged=True,
state={"m": m_l, "v": v_l, "t": i},
)
for j in range(len(x_list)):
m_l[j] = beta1 * m_l[j] + (1 - beta1) * grad_l[j]
v_l[j] = beta2 * v_l[j] + (1 - beta2) * grad_l[j] ** 2
m_hat = m_l[j] / (1 - beta1**i)
v_hat = v_l[j] / (1 - beta2**i)
x_list[j] -= lr * m_hat / (math.sqrt(v_hat) + eps)
return OptResult(
x=x_list,
value=f(x_list),
iterations=max_iter,
converged=False,
state={"m": m_l, "v": v_l, "t": last_t},
)
|
gradient_descent
gradient_descent(
f: Callable[..., float],
x0: float,
lr: float = GD_DEFAULT_LR,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 10000,
h: float = DEFAULT_FD_STEP,
) -> OptResult[float]
gradient_descent(
f: Callable[..., float],
x0: list[float],
lr: float = GD_DEFAULT_LR,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 10000,
h: float = DEFAULT_FD_STEP,
) -> OptResult[list[float]]
gradient_descent(
f: Callable[..., float],
x0: float | list[float],
lr: float = GD_DEFAULT_LR,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 10000,
h: float = DEFAULT_FD_STEP,
) -> OptResult[float] | OptResult[list[float]]
Minimize a scalar or vector function using gradient descent.
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[..., float]
|
|
required
|
x0
|
float | list[float]
|
starting point (scalar or list of floats)
|
required
|
lr
|
float
|
|
GD_DEFAULT_LR
|
tol
|
float
|
convergence tolerance on gradient magnitude
|
DEFAULT_TOLERANCE
|
max_iter
|
int
|
|
10000
|
h
|
float
|
step size for numerical gradient
|
DEFAULT_FD_STEP
|
Source code in src\cds\optimization\minimize.py
| def gradient_descent(
f: Callable[..., float],
x0: float | list[float],
lr: float = GD_DEFAULT_LR,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 10000,
h: float = DEFAULT_FD_STEP,
) -> OptResult[float] | OptResult[list[float]]:
"""Minimize a scalar or vector function using gradient descent.
Args:
f: objective function
x0: starting point (scalar or list of floats)
lr: learning rate
tol: convergence tolerance on gradient magnitude
max_iter: iteration limit
h: step size for numerical gradient
"""
if isinstance(x0, (int, float)):
# Scalar branch — typed so OptResult[float] is returned without a cast.
x: float = x0
for i in range(max_iter):
grad = _compute_gradient(f, x, h)
if _magnitude(grad) < tol:
return OptResult(x=x, value=f(x), iterations=i, converged=True)
x = _update_x(x, grad, lr)
return OptResult(x=x, value=f(x), iterations=max_iter, converged=False)
# Vector branch — typed so OptResult[list[float]] is returned without a cast.
# ``grad_vec`` (not ``grad``) so mypy keeps the scalar/vector types separate:
# the scalar branch above binds the name ``grad`` to ``float``, and a single
# function scope can't hold both ``float`` and ``list[float]`` for one name.
x_vec: list[float] = list(x0)
for i in range(max_iter):
grad_vec = _compute_gradient(f, x_vec, h)
if _magnitude(grad_vec) < tol:
return OptResult(x=x_vec, value=f(x_vec), iterations=i, converged=True)
x_vec = _update_x(x_vec, grad_vec, lr)
return OptResult(x=x_vec, value=f(x_vec), iterations=max_iter, converged=False)
|
line_search
line_search(
f: Callable[[float], float],
a: float,
b: float,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 100,
) -> OptResult[float]
Golden section search for minimum in [a, b].
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float], float]
|
unimodal function to minimize
|
required
|
a
|
float
|
|
required
|
b
|
float
|
|
required
|
tol
|
float
|
convergence tolerance on interval width
|
DEFAULT_TOLERANCE
|
max_iter
|
int
|
|
100
|
Source code in src\cds\optimization\minimize.py
| def line_search(
f: Callable[[float], float],
a: float,
b: float,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 100,
) -> OptResult[float]:
"""Golden section search for minimum in [a, b].
Args:
f: unimodal function to minimize
a: left bound
b: right bound
tol: convergence tolerance on interval width
max_iter: iteration limit
"""
phi = (math.sqrt(5) - 1) / 2
for i in range(max_iter):
if abs(b - a) < tol:
mid = (a + b) / 2
return OptResult(
x=mid,
value=f(mid),
iterations=i,
converged=True,
)
x1 = b - phi * (b - a)
x2 = a + phi * (b - a)
if f(x1) < f(x2):
b = x2
else:
a = x1
mid = (a + b) / 2
return OptResult(
x=mid,
value=f(mid),
iterations=max_iter,
converged=False,
)
|
newton_method
newton_method(
f: Callable[[float], float],
x0: float,
tol: float = NEWTON_TOLERANCE,
max_iter: int = 1000,
h_base: float = NEWTON_DERIVATIVE_STEP,
) -> OptResult[float]
Find a root of f using Newton-Raphson method with adaptive step size.
Parameters:
| Name |
Type |
Description |
Default |
f
|
Callable[[float], float]
|
function whose root to find
|
required
|
x0
|
float
|
|
required
|
tol
|
float
|
|
NEWTON_TOLERANCE
|
max_iter
|
int
|
|
1000
|
h_base
|
float
|
base step for numerical derivative
|
NEWTON_DERIVATIVE_STEP
|
Source code in src\cds\optimization\minimize.py
| def newton_method(
f: Callable[[float], float],
x0: float,
tol: float = NEWTON_TOLERANCE,
max_iter: int = 1000,
h_base: float = NEWTON_DERIVATIVE_STEP,
) -> OptResult[float]:
"""Find a root of f using Newton-Raphson method with adaptive step size.
Args:
f: function whose root to find
x0: starting point
tol: convergence tolerance
max_iter: iteration limit
h_base: base step for numerical derivative
"""
x = float(x0)
for i in range(max_iter):
fx = f(x)
if abs(fx) < tol:
return OptResult(x=x, value=fx, iterations=i, converged=True)
# Newton's derivative comes from the same central-difference kernel as
# the gradient methods (``h_base`` defaults to NEWTON_DERIVATIVE_STEP,
# finer than the gradient DEFAULT_FD_STEP).
dfx = _compute_gradient(f, x, h_base)
if abs(dfx) < NEAR_ZERO:
break
x -= fx / dfx
return OptResult(
x=x,
value=f(x),
iterations=max_iter,
converged=False,
)
|
Machine Learning
Pure-Python neural networks: an MLP with Adam-based training.
cds.ml
Machine Learning module for CDS.
Classes
MLP
Multi-Layer Perceptron (Pure Python).
Source code in src\cds\ml\neural.py
| class MLP:
"""Multi-Layer Perceptron (Pure Python)."""
def __init__(self, layers: list[Layer]):
self.layers = layers
self.optimizer_state: AdamState | None = None
def predict(self, x: list[float]) -> list[float]:
"""Compute the network output."""
curr = x
for layer in self.layers:
curr = layer.forward(curr)
return curr
def get_parameters(self) -> list[float]:
"""Flatten all weights and biases into a single list."""
params = []
for layer in self.layers:
for row in layer.weights:
params.extend(row)
params.extend(layer.biases)
return params
def set_parameters(self, params: list[float]) -> None:
"""Unflatten parameters back into weights and biases."""
idx = 0
for layer in self.layers:
for i in range(len(layer.weights)):
for j in range(len(layer.weights[i])):
layer.weights[i][j] = params[idx]
idx += 1
for i in range(len(layer.biases)):
layer.biases[i] = params[idx]
idx += 1
def get_gradients(self) -> list[float]:
"""Flatten all accumulated gradients into a single list."""
grads = []
for layer in self.layers:
for row in layer.grad_weights:
grads.extend(row)
grads.extend(layer.grad_biases)
return grads
def zero_grads(self) -> None:
"""Reset all parameter gradients to zero."""
for layer in self.layers:
for i in range(len(layer.grad_weights)):
for j in range(len(layer.grad_weights[i])):
layer.grad_weights[i][j] = 0.0
for i in range(len(layer.grad_biases)):
layer.grad_biases[i] = 0.0
def train(
self,
X: list[list[float]],
y: list[list[float]],
epochs: int = 100,
lr: float = GD_DEFAULT_LR,
) -> dict[str, float | bool]:
"""Train the network using the Adam optimizer with backpropagation and state persistence."""
def loss_fn(params: list[float]) -> float:
"""Mean squared error over the training set for these parameters."""
self.set_parameters(params)
total_loss = 0.0
for xi, yi in zip(X, y):
pred = self.predict(xi)
total_loss += sum((p - target) ** 2 for p, target in zip(pred, yi))
return total_loss / len(X)
def grad_fn(params: list[float]) -> list[float]:
"""Parameter gradients via backpropagation over the training set."""
self.set_parameters(params)
self.zero_grads()
for xi, yi in zip(X, y):
pred = self.predict(xi)
# MSE gradient: dL/dp = 2/N * (p - y)
grad_out = [2.0 * (p - target) / len(X) for p, target in zip(pred, yi)]
curr_grad = grad_out
for layer in reversed(self.layers):
curr_grad = layer.backward(curr_grad)
return self.get_gradients()
p0 = self.get_parameters()
res = adam(loss_fn, p0, lr=lr, max_iter=epochs, state=self.optimizer_state, grad_f=grad_fn)
# adam()'s overload for a list x0 returns OptResult[list[float]], so
# res.x is statically a list[float] — no cast or isinstance narrowing.
final_params = res.x
self.set_parameters(final_params)
self.optimizer_state = res.state # Store state for next training call
return {"final_loss": res.value, "iterations": res.iterations, "converged": res.converged}
|
Methods:
predict
predict(x: list[float]) -> list[float]
Compute the network output.
Source code in src\cds\ml\neural.py
| def predict(self, x: list[float]) -> list[float]:
"""Compute the network output."""
curr = x
for layer in self.layers:
curr = layer.forward(curr)
return curr
|
get_parameters
get_parameters() -> list[float]
Flatten all weights and biases into a single list.
Source code in src\cds\ml\neural.py
| def get_parameters(self) -> list[float]:
"""Flatten all weights and biases into a single list."""
params = []
for layer in self.layers:
for row in layer.weights:
params.extend(row)
params.extend(layer.biases)
return params
|
set_parameters
set_parameters(params: list[float]) -> None
Unflatten parameters back into weights and biases.
Source code in src\cds\ml\neural.py
| def set_parameters(self, params: list[float]) -> None:
"""Unflatten parameters back into weights and biases."""
idx = 0
for layer in self.layers:
for i in range(len(layer.weights)):
for j in range(len(layer.weights[i])):
layer.weights[i][j] = params[idx]
idx += 1
for i in range(len(layer.biases)):
layer.biases[i] = params[idx]
idx += 1
|
get_gradients
get_gradients() -> list[float]
Flatten all accumulated gradients into a single list.
Source code in src\cds\ml\neural.py
| def get_gradients(self) -> list[float]:
"""Flatten all accumulated gradients into a single list."""
grads = []
for layer in self.layers:
for row in layer.grad_weights:
grads.extend(row)
grads.extend(layer.grad_biases)
return grads
|
zero_grads
Reset all parameter gradients to zero.
Source code in src\cds\ml\neural.py
| def zero_grads(self) -> None:
"""Reset all parameter gradients to zero."""
for layer in self.layers:
for i in range(len(layer.grad_weights)):
for j in range(len(layer.grad_weights[i])):
layer.grad_weights[i][j] = 0.0
for i in range(len(layer.grad_biases)):
layer.grad_biases[i] = 0.0
|
train
train(
X: list[list[float]],
y: list[list[float]],
epochs: int = 100,
lr: float = GD_DEFAULT_LR,
) -> dict[str, float | bool]
Train the network using the Adam optimizer with backpropagation and state persistence.
Source code in src\cds\ml\neural.py
| def train(
self,
X: list[list[float]],
y: list[list[float]],
epochs: int = 100,
lr: float = GD_DEFAULT_LR,
) -> dict[str, float | bool]:
"""Train the network using the Adam optimizer with backpropagation and state persistence."""
def loss_fn(params: list[float]) -> float:
"""Mean squared error over the training set for these parameters."""
self.set_parameters(params)
total_loss = 0.0
for xi, yi in zip(X, y):
pred = self.predict(xi)
total_loss += sum((p - target) ** 2 for p, target in zip(pred, yi))
return total_loss / len(X)
def grad_fn(params: list[float]) -> list[float]:
"""Parameter gradients via backpropagation over the training set."""
self.set_parameters(params)
self.zero_grads()
for xi, yi in zip(X, y):
pred = self.predict(xi)
# MSE gradient: dL/dp = 2/N * (p - y)
grad_out = [2.0 * (p - target) / len(X) for p, target in zip(pred, yi)]
curr_grad = grad_out
for layer in reversed(self.layers):
curr_grad = layer.backward(curr_grad)
return self.get_gradients()
p0 = self.get_parameters()
res = adam(loss_fn, p0, lr=lr, max_iter=epochs, state=self.optimizer_state, grad_f=grad_fn)
# adam()'s overload for a list x0 returns OptResult[list[float]], so
# res.x is statically a list[float] — no cast or isinstance narrowing.
final_params = res.x
self.set_parameters(final_params)
self.optimizer_state = res.state # Store state for next training call
return {"final_loss": res.value, "iterations": res.iterations, "converged": res.converged}
|
Layer
A basic dense (fully-connected) neural network layer.
Source code in src\cds\ml\neural.py
| class Layer:
"""A basic dense (fully-connected) neural network layer."""
def __init__(self, input_size: int, output_size: int, activation: str = "relu"):
# Xavier/Glorot initialization for weights
limit = (6.0 / (input_size + output_size)) ** 0.5
self.weights = [
[random.uniform(-limit, limit) for _ in range(input_size)] for _ in range(output_size)
]
self.biases = [0.0] * output_size
self.activation = activation
# State for backpropagation
self.last_x: list[float] = []
self.last_z: list[float] = []
self.last_a: list[float] = []
self.grad_weights = [[0.0 for _ in range(input_size)] for _ in range(output_size)]
self.grad_biases = [0.0] * output_size
def forward(self, x: list[float]) -> list[float]:
"""Compute layer output for a single input vector and store state for backward pass."""
self.last_x = x
self.last_z = []
self.last_a = []
for i in range(len(self.weights)):
z = sum(w * xi for w, xi in zip(self.weights[i], x)) + self.biases[i]
self.last_z.append(z)
self.last_a.append(self._activate(z))
return self.last_a
def backward(self, grad_out: list[float]) -> list[float]:
"""Backpropagate error gradient through the layer."""
# dL/dz = dL/da * da/dz
grad_z = [
go * self._activate_derivative(z, a)
for go, z, a in zip(grad_out, self.last_z, self.last_a)
]
# dL/dw_ij = dL/dz_i * x_j
for i in range(len(self.weights)):
gz_i = grad_z[i]
for j in range(len(self.weights[i])):
self.grad_weights[i][j] += gz_i * self.last_x[j]
# dL/db_i = dL/dz_i
for i in range(len(self.biases)):
self.grad_biases[i] += grad_z[i]
# dL/dx_j = sum_i (dL/dz_i * w_ij)
grad_in = [0.0] * len(self.last_x)
for j in range(len(self.last_x)):
grad_in[j] = sum(grad_z[i] * self.weights[i][j] for i in range(len(self.weights)))
return grad_in
def _activate(self, z: float) -> float:
if self.activation == "relu":
return max(0.0, z)
if self.activation == "sigmoid":
# Numerically stable logistic sigmoid. The two branches keep the
# argument to exp() non-positive so it never overflows; for very
# large |z| the exp() underflows to 0.0, which we map to the
# asymptotic limits 1.0 (z -> +inf) / 0.0 (z -> -inf). The
# OverflowError guard is kept defensively for platforms whose
# libm raises on subnormal results rather than returning 0.0.
if z >= 0:
try:
return 1.0 / (1.0 + math.exp(-z))
except (OverflowError, ValueError): # pragma: no cover - non-CPython libm
return 1.0
else:
try:
ez = math.exp(z)
except (OverflowError, ValueError): # pragma: no cover - non-CPython libm
return 0.0
return ez / (1.0 + ez)
return z # identity
def _activate_derivative(self, z: float, a: float) -> float:
if self.activation == "relu":
return 1.0 if z > 0 else 0.0
if self.activation == "sigmoid":
return a * (1.0 - a)
return 1.0 # identity
|
Methods:
forward
forward(x: list[float]) -> list[float]
Compute layer output for a single input vector and store state for backward pass.
Source code in src\cds\ml\neural.py
| def forward(self, x: list[float]) -> list[float]:
"""Compute layer output for a single input vector and store state for backward pass."""
self.last_x = x
self.last_z = []
self.last_a = []
for i in range(len(self.weights)):
z = sum(w * xi for w, xi in zip(self.weights[i], x)) + self.biases[i]
self.last_z.append(z)
self.last_a.append(self._activate(z))
return self.last_a
|
backward
backward(grad_out: list[float]) -> list[float]
Backpropagate error gradient through the layer.
Source code in src\cds\ml\neural.py
| def backward(self, grad_out: list[float]) -> list[float]:
"""Backpropagate error gradient through the layer."""
# dL/dz = dL/da * da/dz
grad_z = [
go * self._activate_derivative(z, a)
for go, z, a in zip(grad_out, self.last_z, self.last_a)
]
# dL/dw_ij = dL/dz_i * x_j
for i in range(len(self.weights)):
gz_i = grad_z[i]
for j in range(len(self.weights[i])):
self.grad_weights[i][j] += gz_i * self.last_x[j]
# dL/db_i = dL/dz_i
for i in range(len(self.biases)):
self.grad_biases[i] += grad_z[i]
# dL/dx_j = sum_i (dL/dz_i * w_ij)
grad_in = [0.0] * len(self.last_x)
for j in range(len(self.last_x)):
grad_in[j] = sum(grad_z[i] * self.weights[i][j] for i in range(len(self.weights)))
return grad_in
|
Signal Processing
DFT, radix-2 FFT/IFFT, convolution, and digital filters.
cds.signals
Signal processing tools.
Functions:
convolve
convolve(a: list[float], b: list[float]) -> list[float]
Linear convolution using the FFT Theorem (O(N log N)).
Source code in src\cds\signals\processing.py
| def convolve(a: list[float], b: list[float]) -> list[float]:
"""Linear convolution using the FFT Theorem (O(N log N))."""
if not a or not b:
return []
na, nb = len(a), len(b)
n_out = na + nb - 1
# Next power of 2 for FFT speed
n_fft = 1 << (n_out - 1).bit_length()
# Transform to frequency domain
fa = fft(list(a) + [0j] * (n_fft - na))
fb = fft(list(b) + [0j] * (n_fft - nb))
# Multiplication in frequency domain
fc = [xa * xb for xa, xb in zip(fa, fb)]
# Inverse transform
full_conv = ifft(fc)
# Return truncated to correct length
return [x.real for x in full_conv[:n_out]]
|
dft
dft(signal: list[float | complex]) -> list[complex]
Discrete Fourier Transform (direct computation).
Parameters:
| Name |
Type |
Description |
Default |
signal
|
list[float | complex]
|
|
required
|
Returns:
| Type |
Description |
list[complex]
|
list of N complex frequency components
|
Source code in src\cds\signals\processing.py
| def dft(signal: list[float | complex]) -> list[complex]:
"""Discrete Fourier Transform (direct computation).
Args:
signal: input signal of length N
Returns:
list of N complex frequency components
"""
n = len(signal)
result = []
for k in range(n):
s = 0 + 0j
for t in range(n):
angle = -2 * math.pi * k * t / n
s += signal[t] * cmath.exp(1j * angle)
result.append(s)
return result
|
fft2
fft2(
matrix: list[list[float | complex]],
) -> list[list[complex]]
2-D Discrete Fourier Transform (O(N log N)).
Source code in src\cds\signals\processing.py
| def fft2(matrix: list[list[float | complex]]) -> list[list[complex]]:
"""2-D Discrete Fourier Transform (O(N log N))."""
rows = len(matrix)
if rows == 0:
raise ValueError("matrix must be non-empty")
cols = len(matrix[0])
if any(len(row) != cols for row in matrix):
raise ValueError("all rows must have the same length (ragged matrix detected)")
# Row-wise FFT
row_fft = [fft(list(row)) for row in matrix]
# Column-wise FFT
transposed = list(zip(*row_fft))
col_fft = [fft(list(col)) for col in transposed]
return [list(row) for row in zip(*col_fft)]
|
fft_radix2
fft_radix2(signal: list[float | complex]) -> list[complex]
Cooley-Tukey radix-2 FFT. Input length must be a power of 2.
Parameters:
| Name |
Type |
Description |
Default |
signal
|
list[float | complex]
|
input signal (length must be power of 2)
|
required
|
Returns:
| Type |
Description |
list[complex]
|
list of complex frequency components
|
Raises:
| Type |
Description |
ValueError
|
if length is not a power of 2
|
Source code in src\cds\signals\processing.py
| def fft_radix2(signal: list[float | complex]) -> list[complex]:
"""Cooley-Tukey radix-2 FFT. Input length must be a power of 2.
Args:
signal: input signal (length must be power of 2)
Returns:
list of complex frequency components
Raises:
ValueError: if length is not a power of 2
"""
n = len(signal)
if n == 0:
return []
if n & (n - 1) != 0:
raise ValueError(
f"signal length must be a power of 2 for FFT (got {n}); pad with zeros or use dft() for arbitrary lengths"
)
if n == 1:
return list(signal)
even = fft_radix2(signal[0::2])
odd = fft_radix2(signal[1::2])
result = [0 + 0j] * n
for k in range(n // 2):
w = cmath.exp(-2j * math.pi * k / n)
result[k] = even[k] + w * odd[k]
result[k + n // 2] = even[k] - w * odd[k]
return result
|
idft
idft(spectrum: list[float | complex]) -> list[complex]
Inverse Discrete Fourier Transform.
Parameters:
| Name |
Type |
Description |
Default |
spectrum
|
list[float | complex]
|
frequency-domain signal of length N
|
required
|
Returns:
| Type |
Description |
list[complex]
|
list of N complex time-domain samples
|
Source code in src\cds\signals\processing.py
| def idft(spectrum: list[float | complex]) -> list[complex]:
"""Inverse Discrete Fourier Transform.
Args:
spectrum: frequency-domain signal of length N
Returns:
list of N complex time-domain samples
"""
n = len(spectrum)
result = []
for t in range(n):
s = 0 + 0j
for k in range(n):
angle = 2 * math.pi * k * t / n
s += spectrum[k] * cmath.exp(1j * angle)
result.append(s / n)
return result
|
ifft2
ifft2(
spectrum: list[list[float | complex]],
) -> list[list[complex]]
Inverse 2-D DFT (O(N log N)).
Source code in src\cds\signals\processing.py
| def ifft2(spectrum: list[list[float | complex]]) -> list[list[complex]]:
"""Inverse 2-D DFT (O(N log N))."""
rows = len(spectrum)
if rows == 0:
raise ValueError("matrix must be non-empty")
cols = len(spectrum[0])
if any(len(row) != cols for row in spectrum):
raise ValueError("all rows must have the same length (ragged matrix detected)")
# Row-wise IFFT
row_inv = [ifft(list(row)) for row in spectrum]
# Column-wise IFFT
transposed = list(zip(*row_inv))
col_inv = [ifft(list(col)) for col in transposed]
return [list(row) for row in zip(*col_inv)]
|
low_pass_filter
low_pass_filter(
signal: list[float | complex], cutoff: int
) -> list[complex]
Simple frequency-domain low-pass filter.
Source code in src\cds\signals\processing.py
| def low_pass_filter(signal: list[float | complex], cutoff: int) -> list[complex]:
"""Simple frequency-domain low-pass filter."""
n = len(signal)
if n == 0:
return []
# Choose best transform
if (n & (n - 1) == 0) and n > 0:
spectrum = fft_radix2(signal)
inv_func = ifft
else:
spectrum = dft(signal)
inv_func = idft
for k in range(n):
if cutoff <= k <= n - cutoff:
spectrum[k] = 0 + 0j
return inv_func(spectrum)
|
power_spectrum
power_spectrum(
signal: list[float | complex],
) -> list[float]
Compute the power spectrum |X[k]|^2 / N.
Source code in src\cds\signals\processing.py
| def power_spectrum(signal: list[float | complex]) -> list[float]:
"""Compute the power spectrum |X[k]|^2 / N."""
n = len(signal)
if n == 0:
return []
# Use FFT if possible (O(N log N))
if (n & (n - 1) == 0) and n > 0:
spectrum = fft_radix2(signal)
else:
spectrum = dft(signal)
return [abs(x) ** 2 / n for x in spectrum]
|
Quantum Computing
Single- and multi-qubit state-vector simulation with O(1) sampling.
cds.quantum
Quantum computing simulation tools.
Classes
QuantumCircuit
dataclass
Simple circuit that applies gates sequentially to a single qubit.
Source code in src\cds\quantum\circuit.py
| @dataclass
class QuantumCircuit:
"""Simple circuit that applies gates sequentially to a single qubit."""
gates: list[QuantumGate] = field(default_factory=list)
def add(self, gate: QuantumGate) -> QuantumCircuit:
"""Append a gate to the circuit (returns self for fluent chaining)."""
self.gates.append(gate)
return self
def run(self, initial: Qubit | None = None) -> Qubit:
"""Apply all gates sequentially; starts from `initial` or |0> if None."""
q = initial or Qubit()
for g in self.gates:
q = g.apply(q)
return q
def __len__(self) -> int:
return len(self.gates)
|
Methods:
add
add(gate: QuantumGate) -> QuantumCircuit
Append a gate to the circuit (returns self for fluent chaining).
Source code in src\cds\quantum\circuit.py
| def add(self, gate: QuantumGate) -> QuantumCircuit:
"""Append a gate to the circuit (returns self for fluent chaining)."""
self.gates.append(gate)
return self
|
run
run(initial: Qubit | None = None) -> Qubit
Apply all gates sequentially; starts from initial or |0> if None.
Source code in src\cds\quantum\circuit.py
| def run(self, initial: Qubit | None = None) -> Qubit:
"""Apply all gates sequentially; starts from `initial` or |0> if None."""
q = initial or Qubit()
for g in self.gates:
q = g.apply(q)
return q
|
QuantumGate
dataclass
A 2x2 unitary gate stored as flat list [a, b, c, d].
Source code in src\cds\quantum\circuit.py
| @dataclass
class QuantumGate:
"""A 2x2 unitary gate stored as flat list [a, b, c, d]."""
name: str
matrix: list[complex]
def apply(self, q: Qubit) -> Qubit:
"""Apply this gate to `q` and return a new Qubit (state is not mutated)."""
a, b, c, d = self.matrix
new_alpha = a * q.alpha + b * q.beta
new_beta = c * q.alpha + d * q.beta
return Qubit(alpha=new_alpha, beta=new_beta)
|
Methods:
apply
Apply this gate to q and return a new Qubit (state is not mutated).
Source code in src\cds\quantum\circuit.py
| def apply(self, q: Qubit) -> Qubit:
"""Apply this gate to `q` and return a new Qubit (state is not mutated)."""
a, b, c, d = self.matrix
new_alpha = a * q.alpha + b * q.beta
new_beta = c * q.alpha + d * q.beta
return Qubit(alpha=new_alpha, beta=new_beta)
|
Qubit
dataclass
Single qubit state as (alpha, beta) amplitudes.
Source code in src\cds\quantum\circuit.py
| @dataclass
class Qubit:
"""Single qubit state as (alpha, beta) amplitudes."""
alpha: complex = 1 + 0j
beta: complex = 0 + 0j
def probabilities(self) -> tuple[float, float]:
"""Return (P(|0>), P(|1>)) measurement probabilities for this qubit."""
p0 = abs(self.alpha) ** 2
p1 = abs(self.beta) ** 2
return (p0, p1)
def normalize(self) -> None:
"""Renormalize the state amplitudes in-place to unit length."""
# Complex amplitudes: the norm is sqrt(|alpha|^2 + |beta|^2). We sum
# the squared magnitudes first and take a single sqrt — numerically
# equivalent to math.hypot for reals, but math.hypot rejects complex
# inputs, so this is the correct hypotenuse for the complex plane.
mag = (abs(self.alpha) ** 2) + (abs(self.beta) ** 2)
norm = math.sqrt(mag)
if norm > 0:
self.alpha /= norm
self.beta /= norm
|
Methods:
probabilities
probabilities() -> tuple[float, float]
Return (P(|0>), P(|1>)) measurement probabilities for this qubit.
Source code in src\cds\quantum\circuit.py
| def probabilities(self) -> tuple[float, float]:
"""Return (P(|0>), P(|1>)) measurement probabilities for this qubit."""
p0 = abs(self.alpha) ** 2
p1 = abs(self.beta) ** 2
return (p0, p1)
|
normalize
Renormalize the state amplitudes in-place to unit length.
Source code in src\cds\quantum\circuit.py
| def normalize(self) -> None:
"""Renormalize the state amplitudes in-place to unit length."""
# Complex amplitudes: the norm is sqrt(|alpha|^2 + |beta|^2). We sum
# the squared magnitudes first and take a single sqrt — numerically
# equivalent to math.hypot for reals, but math.hypot rejects complex
# inputs, so this is the correct hypotenuse for the complex plane.
mag = (abs(self.alpha) ** 2) + (abs(self.beta) ** 2)
norm = math.sqrt(mag)
if norm > 0:
self.alpha /= norm
self.beta /= norm
|
QuantumRegister
dataclass
N-qubit state vector. Amplitudes stored as list of 2^n complex numbers.
Source code in src\cds\quantum\multi_qubit.py
| @dataclass
class QuantumRegister:
"""N-qubit state vector. Amplitudes stored as list of 2^n complex numbers."""
n_qubits: int
amplitudes: list[complex]
@classmethod
def zeros(cls, n: int) -> QuantumRegister:
"""All qubits in |0> state."""
amps: list[complex] = [0 + 0j] * (2**n)
amps[0] = 1 + 0j
return cls(n_qubits=n, amplitudes=amps)
@classmethod
def from_bits(cls, n: int, value: int) -> QuantumRegister:
"""Computational basis state |value>."""
amps: list[complex] = [0 + 0j] * (2**n)
amps[value] = 1 + 0j
return cls(n_qubits=n, amplitudes=amps)
@property
def size(self) -> int:
"""Number of amplitudes in the state vector (= 2**n_qubits)."""
return len(self.amplitudes)
def probabilities(self) -> list[float]:
"""List of |amplitude|^2 for each computational basis state."""
return [abs(a) ** 2 for a in self.amplitudes]
def normalize(self) -> None:
"""Renormalize the state vector in-place to unit length."""
norm = math.sqrt(sum(abs(a) ** 2 for a in self.amplitudes))
if norm > 0:
self.amplitudes = [a / norm for a in self.amplitudes]
def measure(self, seed: int | None = None) -> int:
"""Measure the register and collapse its state vector."""
rng = random.Random(seed)
probs = self.probabilities()
r = rng.random()
cumulative = 0.0
for i, p in enumerate(probs):
cumulative += p
if p > 0 and r <= cumulative:
# State Collapse logic
# All other amplitudes become 0, measured state becomes 1.0
new_amps = [0.0 + 0j] * len(self.amplitudes)
new_amps[i] = 1.0 + 0j
self.amplitudes = new_amps
return i
# Fallback for floating point edge cases
final_idx = len(probs) - 1
new_amps = [0.0 + 0j] * len(self.amplitudes)
new_amps[final_idx] = 1.0 + 0j
self.amplitudes = new_amps
return final_idx
def measure_shots(
self,
shots: int = 1000,
seed: int | None = None,
) -> dict[str, int]:
"""Run multiple measurements, return counts as binary strings."""
rng = random.Random(seed)
counts: dict[str, int] = {}
probs = self.probabilities()
for _ in range(shots):
r = rng.random()
cumulative = 0.0
result = len(probs) - 1
for i, p in enumerate(probs):
cumulative += p
if r < cumulative:
result = i
break
label = format(result, f"0{self.n_qubits}b")
counts[label] = counts.get(label, 0) + 1
return counts
def expectation(self) -> float:
"""Expected value treating basis index as eigenvalue."""
return sum(i * abs(a) ** 2 for i, a in enumerate(self.amplitudes))
|
Attributes
size
property
Number of amplitudes in the state vector (= 2**n_qubits).
Methods:
zeros
classmethod
zeros(n: int) -> QuantumRegister
All qubits in |0> state.
Source code in src\cds\quantum\multi_qubit.py
| @classmethod
def zeros(cls, n: int) -> QuantumRegister:
"""All qubits in |0> state."""
amps: list[complex] = [0 + 0j] * (2**n)
amps[0] = 1 + 0j
return cls(n_qubits=n, amplitudes=amps)
|
from_bits
classmethod
from_bits(n: int, value: int) -> QuantumRegister
Computational basis state |value>.
Source code in src\cds\quantum\multi_qubit.py
| @classmethod
def from_bits(cls, n: int, value: int) -> QuantumRegister:
"""Computational basis state |value>."""
amps: list[complex] = [0 + 0j] * (2**n)
amps[value] = 1 + 0j
return cls(n_qubits=n, amplitudes=amps)
|
probabilities
probabilities() -> list[float]
List of |amplitude|^2 for each computational basis state.
Source code in src\cds\quantum\multi_qubit.py
| def probabilities(self) -> list[float]:
"""List of |amplitude|^2 for each computational basis state."""
return [abs(a) ** 2 for a in self.amplitudes]
|
normalize
Renormalize the state vector in-place to unit length.
Source code in src\cds\quantum\multi_qubit.py
| def normalize(self) -> None:
"""Renormalize the state vector in-place to unit length."""
norm = math.sqrt(sum(abs(a) ** 2 for a in self.amplitudes))
if norm > 0:
self.amplitudes = [a / norm for a in self.amplitudes]
|
measure
measure(seed: int | None = None) -> int
Measure the register and collapse its state vector.
Source code in src\cds\quantum\multi_qubit.py
| def measure(self, seed: int | None = None) -> int:
"""Measure the register and collapse its state vector."""
rng = random.Random(seed)
probs = self.probabilities()
r = rng.random()
cumulative = 0.0
for i, p in enumerate(probs):
cumulative += p
if p > 0 and r <= cumulative:
# State Collapse logic
# All other amplitudes become 0, measured state becomes 1.0
new_amps = [0.0 + 0j] * len(self.amplitudes)
new_amps[i] = 1.0 + 0j
self.amplitudes = new_amps
return i
# Fallback for floating point edge cases
final_idx = len(probs) - 1
new_amps = [0.0 + 0j] * len(self.amplitudes)
new_amps[final_idx] = 1.0 + 0j
self.amplitudes = new_amps
return final_idx
|
measure_shots
measure_shots(
shots: int = 1000, seed: int | None = None
) -> dict[str, int]
Run multiple measurements, return counts as binary strings.
Source code in src\cds\quantum\multi_qubit.py
| def measure_shots(
self,
shots: int = 1000,
seed: int | None = None,
) -> dict[str, int]:
"""Run multiple measurements, return counts as binary strings."""
rng = random.Random(seed)
counts: dict[str, int] = {}
probs = self.probabilities()
for _ in range(shots):
r = rng.random()
cumulative = 0.0
result = len(probs) - 1
for i, p in enumerate(probs):
cumulative += p
if r < cumulative:
result = i
break
label = format(result, f"0{self.n_qubits}b")
counts[label] = counts.get(label, 0) + 1
return counts
|
expectation
Expected value treating basis index as eigenvalue.
Source code in src\cds\quantum\multi_qubit.py
| def expectation(self) -> float:
"""Expected value treating basis index as eigenvalue."""
return sum(i * abs(a) ** 2 for i, a in enumerate(self.amplitudes))
|
Functions:
hadamard
hadamard() -> QuantumGate
Hadamard gate H = (1/sqrt(2)) * [[1, 1], [1, -1]].
Source code in src\cds\quantum\circuit.py
| def hadamard() -> QuantumGate:
"""Hadamard gate H = (1/sqrt(2)) * [[1, 1], [1, -1]]."""
s = 1 / math.sqrt(2)
return QuantumGate("H", [s, s, s, -s])
|
pauli_x
Pauli-X (NOT) gate X = [[0, 1], [1, 0]].
Source code in src\cds\quantum\circuit.py
| def pauli_x() -> QuantumGate:
"""Pauli-X (NOT) gate X = [[0, 1], [1, 0]]."""
return QuantumGate("X", [0, 1, 1, 0])
|
pauli_z
Pauli-Z gate Z = [[1, 0], [0, -1]].
Source code in src\cds\quantum\circuit.py
| def pauli_z() -> QuantumGate:
"""Pauli-Z gate Z = [[1, 0], [0, -1]]."""
return QuantumGate("Z", [1, 0, 0, -1])
|
phase_gate
phase_gate(theta: float) -> QuantumGate
Phase rotation gate P(theta) = diag(1, e^{i*theta}).
Source code in src\cds\quantum\circuit.py
| def phase_gate(theta: float) -> QuantumGate:
"""Phase rotation gate P(theta) = diag(1, e^{i*theta})."""
return QuantumGate(f"P({theta:.2f})", [1, 0, 0, complex(math.cos(theta), math.sin(theta))])
|
bell_state
bell_state(which: int = 0) -> QuantumRegister
Create one of the 4 Bell states (0-3).
0: |Φ+> = (|00> + |11>) / √2
1: |Φ-> = (|00> - |11>) / √2
2: |Ψ+> = (|01> + |10>) / √2
3: |Ψ-> = (|01> - |10>) / √2
Source code in src\cds\quantum\multi_qubit.py
| def bell_state(which: int = 0) -> QuantumRegister:
"""Create one of the 4 Bell states (0-3).
0: |Φ+> = (|00> + |11>) / √2
1: |Φ-> = (|00> - |11>) / √2
2: |Ψ+> = (|01> + |10>) / √2
3: |Ψ-> = (|01> - |10>) / √2
"""
reg = QuantumRegister.zeros(2)
if which in (2, 3):
reg = x_gate(reg, 1)
reg = h_gate(reg, 0)
reg = cnot(reg, 0, 1)
if which in (1, 3):
reg = z_gate(reg, 0)
return reg
|
cnot
cnot(
reg: QuantumRegister, control: int, target: int
) -> QuantumRegister
Controlled-NOT gate.
Source code in src\cds\quantum\multi_qubit.py
| def cnot(
reg: QuantumRegister,
control: int,
target: int,
) -> QuantumRegister:
"""Controlled-NOT gate."""
n = reg.n_qubits
new_amps = list(reg.amplitudes)
for i in range(1 << n):
if i & (1 << control):
j = i ^ (1 << target)
if j > i:
new_amps[i], new_amps[j] = reg.amplitudes[j], reg.amplitudes[i]
return QuantumRegister(n_qubits=n, amplitudes=new_amps)
|
cz
cz(
reg: QuantumRegister, control: int, target: int
) -> QuantumRegister
Controlled-Z gate.
Source code in src\cds\quantum\multi_qubit.py
| def cz(
reg: QuantumRegister,
control: int,
target: int,
) -> QuantumRegister:
"""Controlled-Z gate."""
n = reg.n_qubits
new_amps = list(reg.amplitudes)
for i in range(1 << n):
if (i & (1 << control)) and (i & (1 << target)):
new_amps[i] = -reg.amplitudes[i]
return QuantumRegister(n_qubits=n, amplitudes=new_amps)
|
ghz_state
ghz_state(n: int) -> QuantumRegister
GHZ state: (|00...0> + |11...1>) / √2
Source code in src\cds\quantum\multi_qubit.py
| def ghz_state(n: int) -> QuantumRegister:
"""GHZ state: (|00...0> + |11...1>) / √2"""
reg = QuantumRegister.zeros(n)
reg = h_gate(reg, 0)
for i in range(1, n):
reg = cnot(reg, 0, i)
return reg
|
h_gate
h_gate(
reg: QuantumRegister, target: int
) -> QuantumRegister
Hadamard on qubit target.
Source code in src\cds\quantum\multi_qubit.py
| def h_gate(reg: QuantumRegister, target: int) -> QuantumRegister:
"""Hadamard on qubit `target`."""
s = 1 / math.sqrt(2)
return _gate_2x2(reg, target, [s, s, s, -s])
|
is_entangled
is_entangled(reg: QuantumRegister) -> bool
Check if a 2-qubit state is entangled (not separable).
Uses concurrence: for |ψ> = a|00> + b|01> + c|10> + d|11>,
concurrence = 2|ad - bc|. If > 0, it's entangled.
Source code in src\cds\quantum\multi_qubit.py
| def is_entangled(reg: QuantumRegister) -> bool:
"""Check if a 2-qubit state is entangled (not separable).
Uses concurrence: for |ψ> = a|00> + b|01> + c|10> + d|11>,
concurrence = 2|ad - bc|. If > 0, it's entangled.
"""
if reg.n_qubits != 2:
raise ValueError("entanglement check only for 2-qubit states")
a, b, c, d = reg.amplitudes
concurrence = 2 * abs(a * d - b * c)
return concurrence > CONCURRENCE_THRESHOLD
|
rz_gate
rz_gate(
reg: QuantumRegister, target: int, theta: float
) -> QuantumRegister
Rotation around Z axis.
Source code in src\cds\quantum\multi_qubit.py
| def rz_gate(
reg: QuantumRegister,
target: int,
theta: float,
) -> QuantumRegister:
"""Rotation around Z axis."""
e_neg = complex(math.cos(theta / 2), -math.sin(theta / 2))
e_pos = complex(math.cos(theta / 2), math.sin(theta / 2))
return _gate_2x2(reg, target, [e_neg, 0, 0, e_pos])
|
swap
swap(
reg: QuantumRegister, q1: int, q2: int
) -> QuantumRegister
SWAP gate — exchange two qubits.
Source code in src\cds\quantum\multi_qubit.py
| def swap(
reg: QuantumRegister,
q1: int,
q2: int,
) -> QuantumRegister:
"""SWAP gate — exchange two qubits."""
reg = cnot(reg, q1, q2)
reg = cnot(reg, q2, q1)
reg = cnot(reg, q1, q2)
return reg
|
toffoli
toffoli(
reg: QuantumRegister, c1: int, c2: int, target: int
) -> QuantumRegister
Toffoli (CCNOT) gate — 3-qubit controlled-controlled-NOT.
Source code in src\cds\quantum\multi_qubit.py
| def toffoli(
reg: QuantumRegister,
c1: int,
c2: int,
target: int,
) -> QuantumRegister:
"""Toffoli (CCNOT) gate — 3-qubit controlled-controlled-NOT."""
n = reg.n_qubits
new_amps = list(reg.amplitudes)
for i in range(1 << n):
if (i & (1 << c1)) and (i & (1 << c2)):
j = i ^ (1 << target)
if j > i:
new_amps[i], new_amps[j] = (
reg.amplitudes[j],
reg.amplitudes[i],
)
return QuantumRegister(n_qubits=n, amplitudes=new_amps)
|
x_gate
x_gate(
reg: QuantumRegister, target: int
) -> QuantumRegister
Pauli-X (NOT) on qubit target.
Source code in src\cds\quantum\multi_qubit.py
| def x_gate(reg: QuantumRegister, target: int) -> QuantumRegister:
"""Pauli-X (NOT) on qubit `target`."""
return _gate_2x2(reg, target, [0, 1, 1, 0])
|
y_gate
y_gate(
reg: QuantumRegister, target: int
) -> QuantumRegister
Pauli-Y on qubit target.
Source code in src\cds\quantum\multi_qubit.py
| def y_gate(reg: QuantumRegister, target: int) -> QuantumRegister:
"""Pauli-Y on qubit `target`."""
return _gate_2x2(reg, target, [0, -1j, 1j, 0])
|
z_gate
z_gate(
reg: QuantumRegister, target: int
) -> QuantumRegister
Pauli-Z on qubit target.
Source code in src\cds\quantum\multi_qubit.py
| def z_gate(reg: QuantumRegister, target: int) -> QuantumRegister:
"""Pauli-Z on qubit `target`."""
return _gate_2x2(reg, target, [1, 0, 0, -1])
|
simulate
simulate(
circuit: QuantumCircuit,
shots: int = 1000,
seed: int | None = None,
) -> dict[int, int]
Run a circuit many times and collect measurement statistics.
Optimized to compute the state vector only once, then probabilistically sample.
Source code in src\cds\quantum\simulator.py
| def simulate(circuit: QuantumCircuit, shots: int = 1000, seed: int | None = None) -> dict[int, int]:
"""Run a circuit many times and collect measurement statistics.
Optimized to compute the state vector only once, then probabilistically sample.
"""
rng = random.Random(seed)
# Compute the final quantum state exactly once (Massive performance boost)
q = circuit.run()
p0, _ = q.probabilities()
# Probabilistically sample the distribution 'shots' times
results = [0 if rng.random() < p0 else 1 for _ in range(shots)]
return dict(Counter(results))
|
Scientific Computing
Curated physical constants and classical physics formulas (mechanics, waves, relativity, thermo).
cds.scientific
Scientific computing utilities.
Functions:
get_constant
get_constant(name: str) -> float
Look up a physical/mathematical constant by short name and return its value.
Raises:
| Type |
Description |
KeyError
|
if name is not in the table (lists valid names in the message).
|
Source code in src\cds\scientific\constants.py
| def get_constant(name: str) -> float:
"""Look up a physical/mathematical constant by short name and return its value.
Raises:
KeyError: if `name` is not in the table (lists valid names in the message).
"""
if name not in CONSTANTS:
raise KeyError(f"unknown constant: {name}. available: {list(CONSTANTS.keys())}")
return CONSTANTS[name][0]
|
de_broglie_wavelength
de_broglie_wavelength(
mass: float, velocity: float
) -> float
lambda = h / (m * v)
Source code in src\cds\scientific\formulas.py
| def de_broglie_wavelength(mass: float, velocity: float) -> float:
"""lambda = h / (m * v)"""
h = get_constant("h")
mv = mass * velocity
if mv == 0:
raise ValueError("momentum can't be zero")
return h / mv
|
escape_velocity
escape_velocity(mass: float, radius: float) -> float
v_esc = sqrt(2GM/r)
Source code in src\cds\scientific\formulas.py
| def escape_velocity(mass: float, radius: float) -> float:
"""v_esc = sqrt(2GM/r)"""
G = get_constant("G")
return math.sqrt(2 * G * mass / radius)
|
gravitational_force
gravitational_force(
m1: float, m2: float, r: float
) -> float
F = G * m1 * m2 / r^2
Source code in src\cds\scientific\formulas.py
| def gravitational_force(m1: float, m2: float, r: float) -> float:
"""F = G * m1 * m2 / r^2"""
G = get_constant("G")
if r == 0:
raise ValueError("distance can't be zero")
return G * m1 * m2 / r**2
|
ideal_gas_pressure
ideal_gas_pressure(
n_moles: float, temperature: float, volume: float
) -> float
PV = nRT => P = nRT/V
Source code in src\cds\scientific\formulas.py
| def ideal_gas_pressure(n_moles: float, temperature: float, volume: float) -> float:
"""PV = nRT => P = nRT/V"""
R = get_constant("R")
if volume <= 0:
raise ValueError("volume must be positive")
return n_moles * R * temperature / volume
|
kinetic_energy
kinetic_energy(mass: float, velocity: float) -> float
KE = 0.5 * m * v^2
Source code in src\cds\scientific\formulas.py
| def kinetic_energy(mass: float, velocity: float) -> float:
"""KE = 0.5 * m * v^2"""
return 0.5 * mass * velocity**2
|
photon_energy
photon_energy(frequency: float) -> float
E = h * f
Source code in src\cds\scientific\formulas.py
| def photon_energy(frequency: float) -> float:
"""E = h * f"""
h = get_constant("h")
return h * frequency
|
schwarzschild_radius
schwarzschild_radius(mass: float) -> float
r_s = 2GM/c^2
Source code in src\cds\scientific\formulas.py
| def schwarzschild_radius(mass: float) -> float:
"""r_s = 2GM/c^2"""
G = get_constant("G")
c = get_constant("c")
return 2 * G * mass / c**2
|
wave_frequency
wave_frequency(wavelength: float) -> float
f = c / lambda
Source code in src\cds\scientific\formulas.py
| def wave_frequency(wavelength: float) -> float:
"""f = c / lambda"""
c = get_constant("c")
if wavelength <= 0:
raise ValueError("wavelength must be positive")
return c / wavelength
|
Graph Theory
BFS, DFS, Dijkstra shortest paths, Kruskal MST, topological sort, cycle detection.
cds.graph
Graph theory algorithms — BFS, DFS, Dijkstra, Kruskal MST.
Classes
Graph
dataclass
Adjacency-list graph representation.
Supports both directed and undirected graphs with weighted edges.
Source code in src\cds\graph\algorithms.py
| @dataclass
class Graph:
"""Adjacency-list graph representation.
Supports both directed and undirected graphs with weighted edges.
"""
n_vertices: int
directed: bool = False
adj: dict[int, list[tuple[int, float]]] = field(default_factory=dict)
edges: list[Edge] = field(default_factory=list)
def add_edge(self, u: int, v: int, weight: float = 1.0) -> None:
"""Add an edge from u to v (and v to u if undirected)."""
self.adj.setdefault(u, []).append((v, weight))
if not self.directed:
self.adj.setdefault(v, []).append((u, weight))
self.edges.append(Edge(u, v, weight))
|
Methods:
add_edge
add_edge(u: int, v: int, weight: float = 1.0) -> None
Add an edge from u to v (and v to u if undirected).
Source code in src\cds\graph\algorithms.py
| def add_edge(self, u: int, v: int, weight: float = 1.0) -> None:
"""Add an edge from u to v (and v to u if undirected)."""
self.adj.setdefault(u, []).append((v, weight))
if not self.directed:
self.adj.setdefault(v, []).append((u, weight))
self.edges.append(Edge(u, v, weight))
|
Functions:
bfs
bfs(graph: Graph, start: int) -> list[int]
Breadth-first search traversal.
Returns vertices in BFS order starting from start.
Time complexity: O(V + E) [CLRS §22.2]
Source code in src\cds\graph\algorithms.py
| def bfs(graph: Graph, start: int) -> list[int]:
"""Breadth-first search traversal.
Returns vertices in BFS order starting from `start`.
Time complexity: O(V + E) [CLRS §22.2]
"""
visited: set[int] = set()
order: list[int] = []
queue: deque[int] = deque([start])
visited.add(start)
while queue:
node = queue.popleft()
order.append(node)
for neighbor, _ in graph.adj.get(node, []):
if neighbor not in visited:
visited.add(neighbor)
queue.append(neighbor)
return order
|
dfs
dfs(graph: Graph, start: int) -> list[int]
Depth-first search traversal (iterative).
Returns vertices in DFS order starting from start.
Time complexity: O(V + E) [CLRS §22.3]
Source code in src\cds\graph\algorithms.py
| def dfs(graph: Graph, start: int) -> list[int]:
"""Depth-first search traversal (iterative).
Returns vertices in DFS order starting from `start`.
Time complexity: O(V + E) [CLRS §22.3]
"""
visited: set[int] = set()
order: list[int] = []
stack: list[int] = [start]
while stack:
node = stack.pop()
if node in visited:
continue
visited.add(node)
order.append(node)
for neighbor, _ in reversed(graph.adj.get(node, [])):
if neighbor not in visited:
stack.append(neighbor)
return order
|
dijkstra
dijkstra(
graph: Graph, start: int
) -> tuple[dict[int, float], dict[int, int | None]]
Dijkstra's shortest path algorithm.
Returns (distances, predecessors) from start to all reachable vertices.
Time complexity: O((V + E) log V) with binary heap [Dijkstra 1959]
Parameters:
| Name |
Type |
Description |
Default |
graph
|
Graph
|
weighted graph (non-negative weights)
|
required
|
start
|
int
|
|
required
|
Returns:
| Name | Type |
Description |
distances |
dict[int, float]
|
dict mapping vertex -> shortest distance from start
|
predecessors |
dict[int, int | None]
|
dict mapping vertex -> previous vertex on shortest path
|
Raises:
| Type |
Description |
ValueError
|
if a negative weight is encountered
|
Source code in src\cds\graph\algorithms.py
| def dijkstra(
graph: Graph,
start: int,
) -> tuple[dict[int, float], dict[int, int | None]]:
"""Dijkstra's shortest path algorithm.
Returns (distances, predecessors) from `start` to all reachable vertices.
Time complexity: O((V + E) log V) with binary heap [Dijkstra 1959]
Args:
graph: weighted graph (non-negative weights)
start: source vertex
Returns:
distances: dict mapping vertex -> shortest distance from start
predecessors: dict mapping vertex -> previous vertex on shortest path
Raises:
ValueError: if a negative weight is encountered
"""
dist: dict[int, float] = {start: 0.0}
prev: dict[int, int | None] = {start: None}
heap: list[tuple[float, int]] = [(0.0, start)]
visited: set[int] = set()
while heap:
d, u = heapq.heappop(heap)
if u in visited:
continue
visited.add(u)
for v, w in graph.adj.get(u, []):
if w < 0:
raise ValueError("negative edge weights not supported")
new_dist = d + w
if v not in dist or new_dist < dist[v]:
dist[v] = new_dist
prev[v] = u
heapq.heappush(heap, (new_dist, v))
return dist, prev
|
has_cycle
has_cycle(graph: Graph) -> bool
Detect if a directed graph has a cycle using DFS coloring.
Time complexity: O(V + E)
WHITE=0 (unvisited), GRAY=1 (in current DFS path), BLACK=2 (finished).
Source code in src\cds\graph\algorithms.py
| def has_cycle(graph: Graph) -> bool:
"""Detect if a directed graph has a cycle using DFS coloring.
Time complexity: O(V + E)
WHITE=0 (unvisited), GRAY=1 (in current DFS path), BLACK=2 (finished).
"""
WHITE, GRAY, BLACK = 0, 1, 2
color: dict[int, int] = {i: WHITE for i in range(graph.n_vertices)}
def _dfs(u: int) -> bool:
color[u] = GRAY
for v, _ in graph.adj.get(u, []):
if color[v] == GRAY:
return True
if color[v] == WHITE and _dfs(v):
return True
color[u] = BLACK
return False
return any(_dfs(v) for v in range(graph.n_vertices) if color[v] == WHITE)
|
kruskal_mst
kruskal_mst(graph: Graph) -> tuple[list[Edge], float]
Kruskal's minimum spanning tree algorithm.
Time complexity: O(E log E) [Kruskal 1956]
Parameters:
| Name |
Type |
Description |
Default |
graph
|
Graph
|
undirected weighted graph
|
required
|
Returns:
| Name | Type |
Description |
mst_edges |
list[Edge]
|
|
total_weight |
float
|
sum of edge weights in the MST
|
Source code in src\cds\graph\algorithms.py
| def kruskal_mst(graph: Graph) -> tuple[list[Edge], float]:
"""Kruskal's minimum spanning tree algorithm.
Time complexity: O(E log E) [Kruskal 1956]
Args:
graph: undirected weighted graph
Returns:
mst_edges: list of edges in the MST
total_weight: sum of edge weights in the MST
"""
sorted_edges = sorted(graph.edges, key=lambda e: e.weight)
parent = {i: i for i in range(graph.n_vertices)}
rank = {i: 0 for i in range(graph.n_vertices)}
mst: list[Edge] = []
total = 0.0
for edge in sorted_edges:
if _union(parent, rank, edge.src, edge.dst):
mst.append(edge)
total += edge.weight
if len(mst) == graph.n_vertices - 1:
break
return mst, total
|
topological_sort
topological_sort(graph: Graph) -> list[int]
Kahn's algorithm for topological sort of a DAG.
Time complexity: O(V + E) [CLRS §22.4]
Parameters:
| Name |
Type |
Description |
Default |
graph
|
Graph
|
|
required
|
Returns:
| Type |
Description |
list[int]
|
list of vertices in topological order
|
Raises:
| Type |
Description |
ValueError
|
if graph contains a cycle
|
Source code in src\cds\graph\algorithms.py
| def topological_sort(graph: Graph) -> list[int]:
"""Kahn's algorithm for topological sort of a DAG.
Time complexity: O(V + E) [CLRS §22.4]
Args:
graph: directed acyclic graph
Returns:
list of vertices in topological order
Raises:
ValueError: if graph contains a cycle
"""
in_degree: dict[int, int] = {i: 0 for i in range(graph.n_vertices)}
for u in graph.adj:
for v, _ in graph.adj[u]:
in_degree[v] = in_degree.get(v, 0) + 1
queue: deque[int] = deque(v for v in range(graph.n_vertices) if in_degree.get(v, 0) == 0)
order: list[int] = []
while queue:
u = queue.popleft()
order.append(u)
for v, _ in graph.adj.get(u, []):
in_degree[v] -= 1
if in_degree[v] == 0:
queue.append(v)
if len(order) != graph.n_vertices:
raise ValueError("graph contains a cycle")
return order
|
Mathematical Modeling
Symbolic algebra for equation development: an expression tree (+, -, *, /, **, sin, cos, exp, log, sqrt) with symbolic differentiation, simplification, LaTeX export, named MathModel systems of equations, and numeric solvers (root finding and parameter fitting) built on cds.optimization.
cds.modeling
Mathematical modeling — symbolic expressions, equation systems, and solvers.
Classes
Add
Bases: _Binary
left + right.
Source code in src\cds\modeling\expression.py
| class Add(_Binary):
"""``left + right``."""
def evaluate(self, env: dict[str, float]) -> float:
return self.left.evaluate(env) + self.right.evaluate(env)
def diff(self, var: str) -> Expression:
return Add(self.left.diff(var), self.right.diff(var))
def simplify(self) -> Expression:
left = self.left.simplify()
right = self.right.simplify()
if isinstance(left, Constant) and isinstance(right, Constant):
return Constant(left.value + right.value)
if isinstance(left, Constant) and left.value == 0.0:
return right
if isinstance(right, Constant) and right.value == 0.0:
return left
return Add(left, right)
def to_str(self) -> str:
return f"({self.left.to_str()} + {self.right.to_str()})"
def to_latex(self) -> str:
return f"{self.left.to_latex()} + {self.right.to_latex()}"
|
Constant
Bases: Expression
A literal numeric value in an expression tree.
Source code in src\cds\modeling\expression.py
| class Constant(Expression):
"""A literal numeric value in an expression tree."""
__slots__ = ("value",)
def __init__(self, value: float) -> None:
self.value = float(value)
def evaluate(self, env: dict[str, float]) -> float:
return self.value
def diff(self, var: str) -> Expression:
return Constant(0.0)
def variables(self) -> set[str]:
return set()
def to_str(self) -> str:
# Render integers without a trailing ".0" for readability.
if self.value == int(self.value):
return str(int(self.value))
return repr(self.value)
def to_latex(self) -> str:
if self.value == int(self.value):
return str(int(self.value))
return repr(self.value)
def __eq__(self, other: object) -> bool:
return isinstance(other, Constant) and other.value == self.value
def __hash__(self) -> int:
return hash(("Constant", self.value))
|
Cos
Bases: _Unary
cos(operand).
Source code in src\cds\modeling\expression.py
| class Cos(_Unary):
"""``cos(operand)``."""
def evaluate(self, env: dict[str, float]) -> float:
return math.cos(self.operand.evaluate(env))
def diff(self, var: str) -> Expression:
# Chain rule: d/dx cos(u) = -sin(u) * u'
return Mul(Mul(Constant(-1.0), Sin(self.operand)), self.operand.diff(var))
def simplify(self) -> Expression:
inner = self.operand.simplify()
if isinstance(inner, Constant):
return Constant(math.cos(inner.value))
return Cos(inner)
def to_str(self) -> str:
return f"cos({self.operand.to_str()})"
def to_latex(self) -> str:
return f"\\cos\\left({self.operand.to_latex()}\\right)"
|
Div
Bases: _Binary
left / right (quotient rule for differentiation).
Source code in src\cds\modeling\expression.py
| class Div(_Binary):
"""``left / right`` (quotient rule for differentiation)."""
def evaluate(self, env: dict[str, float]) -> float:
return self.left.evaluate(env) / self.right.evaluate(env)
def diff(self, var: str) -> Expression:
# Quotient rule: d(u/v) = (u'v - uv') / v^2
return Div(
Sub(
Mul(self.left.diff(var), self.right),
Mul(self.left, self.right.diff(var)),
),
Pow(self.right, Constant(2.0)),
)
def simplify(self) -> Expression:
left = self.left.simplify()
right = self.right.simplify()
if isinstance(left, Constant) and isinstance(right, Constant):
return Constant(left.value / right.value)
if isinstance(left, Constant) and left.value == 0.0:
return Constant(0.0)
if isinstance(right, Constant) and right.value == 1.0:
return left
return Div(left, right)
def to_str(self) -> str:
return f"({self.left.to_str()} / {self.right.to_str()})"
def to_latex(self) -> str:
return f"\\frac{{{self.left.to_latex()}}}{{{self.right.to_latex()}}}"
|
Exp
Bases: _Unary
e ** operand (the exponential function).
Source code in src\cds\modeling\expression.py
| class Exp(_Unary):
"""``e ** operand`` (the exponential function)."""
def evaluate(self, env: dict[str, float]) -> float:
return math.exp(self.operand.evaluate(env))
def diff(self, var: str) -> Expression:
# Chain rule: d/dx exp(u) = exp(u) * u'
return Mul(Exp(self.operand), self.operand.diff(var))
def simplify(self) -> Expression:
inner = self.operand.simplify()
if isinstance(inner, Constant):
return Constant(math.exp(inner.value))
return Exp(inner)
def to_str(self) -> str:
return f"exp({self.operand.to_str()})"
def to_latex(self) -> str:
return f"e^{{{self.operand.to_latex()}}}"
|
Expression
Abstract base for every node in the symbolic expression tree.
Subclasses implement :meth:evaluate, :meth:diff, :meth:variables,
:meth:simplify, :meth:to_latex, and :meth:to_str. The dunders
(__add__ etc.) live here so arithmetic works uniformly for all nodes
and for mixing nodes with plain Python numbers.
Source code in src\cds\modeling\expression.py
| class Expression:
"""Abstract base for every node in the symbolic expression tree.
Subclasses implement :meth:`evaluate`, :meth:`diff`, :meth:`variables`,
:meth:`simplify`, :meth:`to_latex`, and :meth:`to_str`. The dunders
(``__add__`` etc.) live here so arithmetic works uniformly for all nodes
and for mixing nodes with plain Python numbers.
"""
# ------------------------------------------------------------------ #
# Abstract interface — every subclass overrides these.
# ------------------------------------------------------------------ #
def evaluate(self, env: dict[str, float]) -> float: # pragma: no cover
"""Evaluate this expression to a float using the variable bindings in ``env``."""
raise NotImplementedError
def diff(self, var: str) -> Expression: # pragma: no cover
"""Return the symbolic derivative of this expression w.r.t. ``var``."""
raise NotImplementedError
def variables(self) -> set[str]: # pragma: no cover
"""Return the set of free variable names appearing in this expression."""
raise NotImplementedError
def to_str(self) -> str: # pragma: no cover
"""Render this expression as a human-readable infix string."""
raise NotImplementedError
def to_latex(self) -> str: # pragma: no cover
"""Render this expression as a LaTeX math string."""
raise NotImplementedError
# ------------------------------------------------------------------ #
# Convenience — built on the abstract methods, so shared by all nodes.
# ------------------------------------------------------------------ #
def simplify(self) -> Expression:
"""Constant-fold and apply algebraic identities to simplify this tree."""
return self
def subs(self, **values: float) -> Expression:
"""Substitute the given ``name=value`` pairs, returning a new expression.
Names not present in this expression are ignored, so partial
substitution is safe.
"""
return _subs(self, values)
def to_func(self, *var_names: str) -> Callable[..., float]:
"""Compile this expression into a callable ``f(*args) -> float``.
The positional argument order is ``var_names``; this is the shape
:mod:`cds.optimization` and :mod:`cds.modeling.solver` expect.
"""
unknown = set(var_names) - self.variables()
if unknown:
raise ValueError(f"to_func var_names not in expression: {sorted(unknown)}")
for needed in self.variables():
if needed not in var_names:
raise ValueError(f"to_func missing variable {needed!r} in var_names")
names = list(var_names)
def _f(*args: float) -> float:
if len(args) != len(names):
raise ValueError(f"expected {len(names)} args, got {len(args)}")
env = dict(zip(names, args))
return self.evaluate(env)
return _f
# ------------------------------------------------------------------ #
# Operator overloads — promote numbers to Constant so math reads naturally.
# ------------------------------------------------------------------ #
def __add__(self, other: Expression | float | int) -> Expression:
return Add(self, _coerce(other))
def __radd__(self, other: float | int) -> Expression:
return Add(_coerce(other), self)
def __sub__(self, other: Expression | float | int) -> Expression:
return Sub(self, _coerce(other))
def __rsub__(self, other: float | int) -> Expression:
return Sub(_coerce(other), self)
def __mul__(self, other: Expression | float | int) -> Expression:
return Mul(self, _coerce(other))
def __rmul__(self, other: float | int) -> Expression:
return Mul(_coerce(other), self)
def __truediv__(self, other: Expression | float | int) -> Expression:
return Div(self, _coerce(other))
def __rtruediv__(self, other: float | int) -> Expression:
return Div(_coerce(other), self)
def __pow__(self, exponent: Expression | float | int) -> Expression:
return Pow(self, _coerce(exponent))
def __rpow__(self, base: float | int) -> Expression:
return Pow(_coerce(base), self)
def __neg__(self) -> Expression:
return Mul(Constant(-1.0), self)
def __pos__(self) -> Expression:
return self
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.to_str()})"
|
Methods:
evaluate
evaluate(env: dict[str, float]) -> float
Evaluate this expression to a float using the variable bindings in env.
Source code in src\cds\modeling\expression.py
| def evaluate(self, env: dict[str, float]) -> float: # pragma: no cover
"""Evaluate this expression to a float using the variable bindings in ``env``."""
raise NotImplementedError
|
diff
diff(var: str) -> Expression
Return the symbolic derivative of this expression w.r.t. var.
Source code in src\cds\modeling\expression.py
| def diff(self, var: str) -> Expression: # pragma: no cover
"""Return the symbolic derivative of this expression w.r.t. ``var``."""
raise NotImplementedError
|
variables
Return the set of free variable names appearing in this expression.
Source code in src\cds\modeling\expression.py
| def variables(self) -> set[str]: # pragma: no cover
"""Return the set of free variable names appearing in this expression."""
raise NotImplementedError
|
to_str
Render this expression as a human-readable infix string.
Source code in src\cds\modeling\expression.py
| def to_str(self) -> str: # pragma: no cover
"""Render this expression as a human-readable infix string."""
raise NotImplementedError
|
to_latex
Render this expression as a LaTeX math string.
Source code in src\cds\modeling\expression.py
| def to_latex(self) -> str: # pragma: no cover
"""Render this expression as a LaTeX math string."""
raise NotImplementedError
|
simplify
Constant-fold and apply algebraic identities to simplify this tree.
Source code in src\cds\modeling\expression.py
| def simplify(self) -> Expression:
"""Constant-fold and apply algebraic identities to simplify this tree."""
return self
|
subs
subs(**values: float) -> Expression
Substitute the given name=value pairs, returning a new expression.
Names not present in this expression are ignored, so partial
substitution is safe.
Source code in src\cds\modeling\expression.py
| def subs(self, **values: float) -> Expression:
"""Substitute the given ``name=value`` pairs, returning a new expression.
Names not present in this expression are ignored, so partial
substitution is safe.
"""
return _subs(self, values)
|
to_func
to_func(*var_names: str) -> Callable[..., float]
Compile this expression into a callable f(*args) -> float.
The positional argument order is var_names; this is the shape
:mod:cds.optimization and :mod:cds.modeling.solver expect.
Source code in src\cds\modeling\expression.py
| def to_func(self, *var_names: str) -> Callable[..., float]:
"""Compile this expression into a callable ``f(*args) -> float``.
The positional argument order is ``var_names``; this is the shape
:mod:`cds.optimization` and :mod:`cds.modeling.solver` expect.
"""
unknown = set(var_names) - self.variables()
if unknown:
raise ValueError(f"to_func var_names not in expression: {sorted(unknown)}")
for needed in self.variables():
if needed not in var_names:
raise ValueError(f"to_func missing variable {needed!r} in var_names")
names = list(var_names)
def _f(*args: float) -> float:
if len(args) != len(names):
raise ValueError(f"expected {len(names)} args, got {len(args)}")
env = dict(zip(names, args))
return self.evaluate(env)
return _f
|
Log
Bases: _Unary
ln(operand) (the natural logarithm, base e).
Source code in src\cds\modeling\expression.py
| class Log(_Unary):
"""``ln(operand)`` (the natural logarithm, base *e*)."""
def evaluate(self, env: dict[str, float]) -> float:
return math.log(self.operand.evaluate(env))
def diff(self, var: str) -> Expression:
# Chain rule: d/dx ln(u) = u' / u
return Div(self.operand.diff(var), self.operand)
def simplify(self) -> Expression:
inner = self.operand.simplify()
if isinstance(inner, Constant):
return Constant(math.log(inner.value))
return Log(inner)
def to_str(self) -> str:
return f"log({self.operand.to_str()})"
def to_latex(self) -> str:
return f"\\ln\\left({self.operand.to_latex()}\\right)"
|
Mul
Bases: _Binary
left * right (product rule for differentiation).
Source code in src\cds\modeling\expression.py
| class Mul(_Binary):
"""``left * right`` (product rule for differentiation)."""
def evaluate(self, env: dict[str, float]) -> float:
return self.left.evaluate(env) * self.right.evaluate(env)
def diff(self, var: str) -> Expression:
# Product rule: d(uv) = u'v + uv'
return Add(
Mul(self.left.diff(var), self.right),
Mul(self.left, self.right.diff(var)),
)
def simplify(self) -> Expression:
left = self.left.simplify()
right = self.right.simplify()
if isinstance(left, Constant) and isinstance(right, Constant):
return Constant(left.value * right.value)
if isinstance(left, Constant):
if left.value == 0.0:
return Constant(0.0)
if left.value == 1.0:
return right
if isinstance(right, Constant):
if right.value == 0.0:
return Constant(0.0)
if right.value == 1.0:
return left
return Mul(left, right)
def to_str(self) -> str:
return f"({self.left.to_str()} * {self.right.to_str()})"
def to_latex(self) -> str:
return f"{self.left.to_latex()} \\cdot {self.right.to_latex()}"
|
Pow
Bases: _Binary
base ** exponent.
Differentiation handles two useful cases: a constant exponent
(d/dx u^c = c * u^(c-1) * u') and a constant base
(d/dx c^u = c^u * ln(c) * u'). The fully general case
u^v is handled via logarithmic differentiation:
u^v * (v' * ln(u) + v * u'/u).
Source code in src\cds\modeling\expression.py
| class Pow(_Binary):
"""``base ** exponent``.
Differentiation handles two useful cases: a constant exponent
(``d/dx u^c = c * u^(c-1) * u'``) and a constant base
(``d/dx c^u = c^u * ln(c) * u'``). The fully general case
``u^v`` is handled via logarithmic differentiation:
``u^v * (v' * ln(u) + v * u'/u)``.
"""
def evaluate(self, env: dict[str, float]) -> float:
return float(self.left.evaluate(env) ** self.right.evaluate(env))
def diff(self, var: str) -> Expression:
base = self.left
exp = self.right
base_has = var in base.variables()
exp_has = var in exp.variables()
if not base_has and not exp_has:
return Constant(0.0)
if exp_has and not base_has:
# d/dx c^u = c^u * ln(c) * u'
return Mul(
Mul(Pow(base, exp), Log(base)),
exp.diff(var),
)
if base_has and not exp_has:
# d/dx u^c = c * u^(c-1) * u'
return Mul(
Mul(exp, Pow(base, Sub(exp, Constant(1.0)))),
base.diff(var),
)
# General case u^v: u^v * (v' * ln(u) + v * u'/u)
return Mul(
Pow(base, exp),
Add(
Mul(exp.diff(var), Log(base)),
Div(Mul(exp, base.diff(var)), base),
),
)
def simplify(self) -> Expression:
base = self.left.simplify()
exp = self.right.simplify()
if isinstance(base, Constant) and isinstance(exp, Constant):
return Constant(base.value**exp.value)
if isinstance(exp, Constant):
if exp.value == 0.0:
return Constant(1.0)
if exp.value == 1.0:
return base
return Pow(base, exp)
def to_str(self) -> str:
return f"({self.left.to_str()} ** {self.right.to_str()})"
def to_latex(self) -> str:
return f"{self.left.to_latex()}^{{{self.right.to_latex()}}}"
|
Sin
Bases: _Unary
sin(operand).
Source code in src\cds\modeling\expression.py
| class Sin(_Unary):
"""``sin(operand)``."""
def evaluate(self, env: dict[str, float]) -> float:
return math.sin(self.operand.evaluate(env))
def diff(self, var: str) -> Expression:
# Chain rule: d/dx sin(u) = cos(u) * u'
return Mul(Cos(self.operand), self.operand.diff(var))
def simplify(self) -> Expression:
inner = self.operand.simplify()
if isinstance(inner, Constant):
return Constant(math.sin(inner.value))
return Sin(inner)
def to_str(self) -> str:
return f"sin({self.operand.to_str()})"
def to_latex(self) -> str:
return f"\\sin\\left({self.operand.to_latex()}\\right)"
|
Sqrt
Bases: _Unary
sqrt(operand) (the principal square root).
Source code in src\cds\modeling\expression.py
| class Sqrt(_Unary):
"""``sqrt(operand)`` (the principal square root)."""
def evaluate(self, env: dict[str, float]) -> float:
return math.sqrt(self.operand.evaluate(env))
def diff(self, var: str) -> Expression:
# Chain rule: d/dx sqrt(u) = u' / (2 * sqrt(u))
return Div(
self.operand.diff(var),
Mul(Constant(2.0), Sqrt(self.operand)),
)
def simplify(self) -> Expression:
inner = self.operand.simplify()
if isinstance(inner, Constant):
return Constant(math.sqrt(inner.value))
return Sqrt(inner)
def to_str(self) -> str:
return f"sqrt({self.operand.to_str()})"
def to_latex(self) -> str:
return f"\\sqrt{{{self.operand.to_latex()}}}"
|
Sub
Bases: _Binary
left - right.
Source code in src\cds\modeling\expression.py
| class Sub(_Binary):
"""``left - right``."""
def evaluate(self, env: dict[str, float]) -> float:
return self.left.evaluate(env) - self.right.evaluate(env)
def diff(self, var: str) -> Expression:
return Sub(self.left.diff(var), self.right.diff(var))
def simplify(self) -> Expression:
left = self.left.simplify()
right = self.right.simplify()
if isinstance(left, Constant) and isinstance(right, Constant):
return Constant(left.value - right.value)
if isinstance(right, Constant) and right.value == 0.0:
return left
return Sub(left, right)
def to_str(self) -> str:
return f"({self.left.to_str()} - {self.right.to_str()})"
def to_latex(self) -> str:
return f"{self.left.to_latex()} - {self.right.to_latex()}"
|
Variable
Bases: Expression
A named symbolic variable (e.g. x, theta).
Source code in src\cds\modeling\expression.py
| class Variable(Expression):
"""A named symbolic variable (e.g. ``x``, ``theta``)."""
__slots__ = ("name",)
def __init__(self, name: str) -> None:
self.name = name
def evaluate(self, env: dict[str, float]) -> float:
if self.name not in env:
raise ValueError(f"no value bound for variable {self.name!r}")
return env[self.name]
def diff(self, var: str) -> Expression:
return Constant(1.0) if var == self.name else Constant(0.0)
def variables(self) -> set[str]:
return {self.name}
def to_str(self) -> str:
return self.name
def to_latex(self) -> str:
return self.name
def __eq__(self, other: object) -> bool:
return isinstance(other, Variable) and other.name == self.name
def __hash__(self) -> int:
return hash(("Variable", self.name))
|
MathModel
dataclass
A named system of symbolic equations sharing parameters and variables.
Attributes:
| Name |
Type |
Description |
name |
str
|
human-readable model title (used in :meth:to_markdown).
|
equations |
list[tuple[str, Expression]]
|
ordered (label, expression) pairs. Labels are the
equation names callers refer to (e.g. "velocity");
expressions are symbolic :class:Expression trees.
|
parameters |
dict[str, float]
|
constant values substituted during evaluation. They
shadow variables of the same name, mirroring how a physicist
treats g vs t.
|
variables |
list[str]
|
the declared free variables. Recorded explicitly so the
model is self-describing even before any equation is inspected.
|
description |
str | None
|
optional one-line summary.
|
Source code in src\cds\modeling\model.py
| @dataclass
class MathModel:
"""A named system of symbolic equations sharing parameters and variables.
Attributes:
name: human-readable model title (used in :meth:`to_markdown`).
equations: ordered ``(label, expression)`` pairs. Labels are the
equation names callers refer to (e.g. ``"velocity"``);
expressions are symbolic :class:`Expression` trees.
parameters: constant values substituted during evaluation. They
shadow variables of the same name, mirroring how a physicist
treats ``g`` vs ``t``.
variables: the declared free variables. Recorded explicitly so the
model is self-describing even before any equation is inspected.
description: optional one-line summary.
"""
name: str
equations: list[tuple[str, Expression]] = field(default_factory=list)
parameters: dict[str, float] = field(default_factory=dict)
variables: list[str] = field(default_factory=list)
description: str | None = None
# ------------------------------------------------------------------ #
# Construction helpers
# ------------------------------------------------------------------ #
def add_equation(self, label: str, expr: Expression) -> None:
"""Append a named equation to the system."""
self.equations.append((label, expr))
def set_parameter(self, name: str, value: float) -> None:
"""Bind or update a named parameter value."""
self.parameters[name] = float(value)
# ------------------------------------------------------------------ #
# Analysis
# ------------------------------------------------------------------ #
def evaluate(self, env: dict[str, float]) -> dict[str, float]:
"""Evaluate every equation, merging parameters into the bindings.
Args:
env: values for the free variables (parameters override these
if a name collides).
Returns:
mapping of equation label to its evaluated numeric value.
Raises:
ValueError: if a free variable has no binding (propagated from
:meth:`Expression.evaluate`).
"""
merged: dict[str, float] = {**env, **self.parameters}
return {label: expr.evaluate(merged) for label, expr in self.equations}
def equation(self, label: str) -> Expression:
"""Return the expression for a named equation.
Raises:
KeyError: if ``label`` is not in this model.
"""
for name, expr in self.equations:
if name == label:
return expr
raise KeyError(f"no equation labelled {label!r} in model {self.name!r}")
def gradient(self, label: str, var: str) -> Expression:
"""Symbolic partial derivative of one equation w.r.t. one variable."""
return self.equation(label).diff(var)
def jacobian(self, var: str) -> dict[str, Expression]:
"""Symbolic partial derivative of *every* equation w.r.t. ``var``.
Returns:
mapping of equation label to its derivative expression.
"""
return {label: expr.diff(var) for label, expr in self.equations}
def free_variables(self) -> set[str]:
"""All variable names that actually appear in some equation."""
seen: set[str] = set()
for _, expr in self.equations:
seen |= expr.variables()
return seen - set(self.parameters)
# ------------------------------------------------------------------ #
# Rendering
# ------------------------------------------------------------------ #
def to_markdown(self) -> str:
"""Render this model as a structured Markdown document."""
lines: list[str] = [f"# Model: {self.name}", ""]
if self.description:
lines += [self.description, ""]
if self.parameters:
lines.append("## Parameters")
for name, value in self.parameters.items():
lines.append(f"- `{name}` = {value}")
lines.append("")
if self.variables:
lines += [
"## Variables",
", ".join(f"`{v}`" for v in self.variables),
"",
]
lines += ["## Equations"]
for label, expr in self.equations:
lines.append(f"- **{label}**: `{expr.to_str()}`")
lines.append("")
return "\n".join(lines)
def to_latex(self) -> str:
"""Render the model's equations as a LaTeX align block."""
rows = [f"{label} &= {expr.to_latex()}" for label, expr in self.equations]
body = " \\\\ ".join(rows)
return f"\\begin{{align*}}\n{body}\n\\end{{align*}}"
|
Methods:
add_equation
add_equation(label: str, expr: Expression) -> None
Append a named equation to the system.
Source code in src\cds\modeling\model.py
| def add_equation(self, label: str, expr: Expression) -> None:
"""Append a named equation to the system."""
self.equations.append((label, expr))
|
set_parameter
set_parameter(name: str, value: float) -> None
Bind or update a named parameter value.
Source code in src\cds\modeling\model.py
| def set_parameter(self, name: str, value: float) -> None:
"""Bind or update a named parameter value."""
self.parameters[name] = float(value)
|
evaluate
evaluate(env: dict[str, float]) -> dict[str, float]
Evaluate every equation, merging parameters into the bindings.
Parameters:
| Name |
Type |
Description |
Default |
env
|
dict[str, float]
|
values for the free variables (parameters override these
if a name collides).
|
required
|
Returns:
| Type |
Description |
dict[str, float]
|
mapping of equation label to its evaluated numeric value.
|
Raises:
| Type |
Description |
ValueError
|
if a free variable has no binding (propagated from
:meth:Expression.evaluate).
|
Source code in src\cds\modeling\model.py
| def evaluate(self, env: dict[str, float]) -> dict[str, float]:
"""Evaluate every equation, merging parameters into the bindings.
Args:
env: values for the free variables (parameters override these
if a name collides).
Returns:
mapping of equation label to its evaluated numeric value.
Raises:
ValueError: if a free variable has no binding (propagated from
:meth:`Expression.evaluate`).
"""
merged: dict[str, float] = {**env, **self.parameters}
return {label: expr.evaluate(merged) for label, expr in self.equations}
|
equation
equation(label: str) -> Expression
Return the expression for a named equation.
Raises:
| Type |
Description |
KeyError
|
if label is not in this model.
|
Source code in src\cds\modeling\model.py
| def equation(self, label: str) -> Expression:
"""Return the expression for a named equation.
Raises:
KeyError: if ``label`` is not in this model.
"""
for name, expr in self.equations:
if name == label:
return expr
raise KeyError(f"no equation labelled {label!r} in model {self.name!r}")
|
gradient
gradient(label: str, var: str) -> Expression
Symbolic partial derivative of one equation w.r.t. one variable.
Source code in src\cds\modeling\model.py
| def gradient(self, label: str, var: str) -> Expression:
"""Symbolic partial derivative of one equation w.r.t. one variable."""
return self.equation(label).diff(var)
|
jacobian
jacobian(var: str) -> dict[str, Expression]
Symbolic partial derivative of every equation w.r.t. var.
Returns:
| Type |
Description |
dict[str, Expression]
|
mapping of equation label to its derivative expression.
|
Source code in src\cds\modeling\model.py
| def jacobian(self, var: str) -> dict[str, Expression]:
"""Symbolic partial derivative of *every* equation w.r.t. ``var``.
Returns:
mapping of equation label to its derivative expression.
"""
return {label: expr.diff(var) for label, expr in self.equations}
|
free_variables
free_variables() -> set[str]
All variable names that actually appear in some equation.
Source code in src\cds\modeling\model.py
| def free_variables(self) -> set[str]:
"""All variable names that actually appear in some equation."""
seen: set[str] = set()
for _, expr in self.equations:
seen |= expr.variables()
return seen - set(self.parameters)
|
to_markdown
Render this model as a structured Markdown document.
Source code in src\cds\modeling\model.py
| def to_markdown(self) -> str:
"""Render this model as a structured Markdown document."""
lines: list[str] = [f"# Model: {self.name}", ""]
if self.description:
lines += [self.description, ""]
if self.parameters:
lines.append("## Parameters")
for name, value in self.parameters.items():
lines.append(f"- `{name}` = {value}")
lines.append("")
if self.variables:
lines += [
"## Variables",
", ".join(f"`{v}`" for v in self.variables),
"",
]
lines += ["## Equations"]
for label, expr in self.equations:
lines.append(f"- **{label}**: `{expr.to_str()}`")
lines.append("")
return "\n".join(lines)
|
to_latex
Render the model's equations as a LaTeX align block.
Source code in src\cds\modeling\model.py
| def to_latex(self) -> str:
"""Render the model's equations as a LaTeX align block."""
rows = [f"{label} &= {expr.to_latex()}" for label, expr in self.equations]
body = " \\\\ ".join(rows)
return f"\\begin{{align*}}\n{body}\n\\end{{align*}}"
|
FitResult
dataclass
Outcome of :func:fit_parameters (least-squares fitting).
Attributes:
| Name |
Type |
Description |
parameters |
dict[str, float]
|
fitted values, keyed by parameter name.
|
residual |
float
|
final sum-of-squared-residuals objective value.
|
iterations |
int
|
number of gradient-descent steps taken.
|
converged |
bool
|
whether the gradient magnitude dropped below tolerance.
|
Source code in src\cds\modeling\solver.py
| @dataclass
class FitResult:
"""Outcome of :func:`fit_parameters` (least-squares fitting).
Attributes:
parameters: fitted values, keyed by parameter name.
residual: final sum-of-squared-residuals objective value.
iterations: number of gradient-descent steps taken.
converged: whether the gradient magnitude dropped below tolerance.
"""
parameters: dict[str, float]
residual: float
iterations: int
converged: bool
|
SolveResult
dataclass
Outcome of :func:solve_equation (root finding).
Attributes:
| Name |
Type |
Description |
x |
float
|
|
residual |
float
|
|f(x)| at the root (should be near zero on convergence).
|
iterations |
int
|
number of Newton steps taken.
|
converged |
bool
|
whether the residual dropped below tolerance.
|
Source code in src\cds\modeling\solver.py
| @dataclass
class SolveResult:
"""Outcome of :func:`solve_equation` (root finding).
Attributes:
x: the root found.
residual: ``|f(x)|`` at the root (should be near zero on convergence).
iterations: number of Newton steps taken.
converged: whether the residual dropped below tolerance.
"""
x: float
residual: float
iterations: int
converged: bool
|
Functions:
fit_parameters
fit_parameters(
model: MathModel,
observed: Sequence[tuple[dict[str, float], float]],
parameter_names: Sequence[str],
x0: Sequence[float] | None = None,
*,
target_label: str | None = None,
lr: float = GD_DEFAULT_LR,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 10000
) -> FitResult
Fit a model's parameters to observed data via least squares.
The objective minimised is the residual sum of squares between the
model's prediction and the observed values, summed over all observations::
L(p) = Σ_i (model.evaluate(obs_env_i)[target] - observed_value_i) ** 2
where the target equation is either target_label or, if omitted,
the model's first equation.
Parameters:
| Name |
Type |
Description |
Default |
model
|
MathModel
|
the :class:MathModel whose parameters are tuned.
|
required
|
observed
|
Sequence[tuple[dict[str, float], float]]
|
a sequence of (env, value) pairs; each env provides
the free-variable values for one observation and value the
measured outcome to fit.
|
required
|
parameter_names
|
Sequence[str]
|
parameter names to fit (order matches x0 and the
returned :attr:FitResult.parameters).
|
required
|
x0
|
Sequence[float] | None
|
starting guesses, positionally aligned with parameter_names.
Defaults to all-zeros.
|
None
|
target_label
|
str | None
|
which equation's output to fit. If None, the first
equation in model.equations is used.
|
None
|
lr
|
float
|
gradient-descent learning rate.
|
GD_DEFAULT_LR
|
tol
|
float
|
convergence tolerance on gradient magnitude.
|
DEFAULT_TOLERANCE
|
max_iter
|
int
|
|
10000
|
Returns:
| Name | Type |
Description |
a |
FitResult
|
class:FitResult with the fitted parameters.
|
Raises:
| Type |
Description |
ValueError
|
if parameter_names is empty, if target_label is
unknown, or if there are no observations.
|
Source code in src\cds\modeling\solver.py
| def fit_parameters(
model: MathModel,
observed: Sequence[tuple[dict[str, float], float]],
parameter_names: Sequence[str],
x0: Sequence[float] | None = None,
*,
target_label: str | None = None,
lr: float = GD_DEFAULT_LR,
tol: float = DEFAULT_TOLERANCE,
max_iter: int = 10000,
) -> FitResult:
"""Fit a model's parameters to observed data via least squares.
The objective minimised is the residual sum of squares between the
model's prediction and the observed values, summed over all observations::
L(p) = Σ_i (model.evaluate(obs_env_i)[target] - observed_value_i) ** 2
where the ``target`` equation is either ``target_label`` or, if omitted,
the model's first equation.
Args:
model: the :class:`MathModel` whose parameters are tuned.
observed: a sequence of ``(env, value)`` pairs; each ``env`` provides
the free-variable values for one observation and ``value`` the
measured outcome to fit.
parameter_names: parameter names to fit (order matches ``x0`` and the
returned :attr:`FitResult.parameters`).
x0: starting guesses, positionally aligned with ``parameter_names``.
Defaults to all-zeros.
target_label: which equation's output to fit. If ``None``, the first
equation in ``model.equations`` is used.
lr: gradient-descent learning rate.
tol: convergence tolerance on gradient magnitude.
max_iter: iteration cap.
Returns:
a :class:`FitResult` with the fitted parameters.
Raises:
ValueError: if ``parameter_names`` is empty, if ``target_label`` is
unknown, or if there are no observations.
"""
names = list(parameter_names)
if not names:
raise ValueError("parameter_names must list at least one parameter to fit")
observations = list(observed)
if not observations:
raise ValueError("observed must contain at least one (env, value) pair")
# Resolve the target equation once.
if target_label is None:
target_label = model.equations[0][0]
target_expr = model.equation(target_label)
base_params = dict(model.parameters)
def objective(params: list[float]) -> float:
env_overrides = dict(zip(names, params))
params_full = {**base_params, **env_overrides}
total = 0.0
for env, observed_value in observations:
merged = {**env, **params_full}
predicted = target_expr.evaluate(merged)
residual = predicted - observed_value
total += residual * residual
return total
start = list(x0) if x0 is not None else [0.0] * len(names)
opt = gradient_descent(objective, x0=start, lr=lr, tol=tol, max_iter=max_iter)
# gradient_descent's list-input overload returns OptResult[list[float]], so
# opt.x is statically a list[float] — no runtime narrowing needed.
return FitResult(
parameters=dict(zip(names, opt.x)),
residual=opt.value,
iterations=opt.iterations,
converged=opt.converged,
)
|
solve_equation
solve_equation(
expr: Expression,
variable: str,
x0: float = 1.0,
tol: float = NEWTON_TOLERANCE,
max_iter: int = 1000,
) -> SolveResult
Find a root of expr (i.e. solve expr = 0) for one variable.
Compiles expr to a callable and hands it to Newton-Raphson.
Parameters:
| Name |
Type |
Description |
Default |
expr
|
Expression
|
the symbolic expression whose root to find.
|
required
|
variable
|
str
|
the single free variable to solve for.
|
required
|
x0
|
float
|
|
1.0
|
tol
|
float
|
convergence tolerance on |expr(x)|.
|
NEWTON_TOLERANCE
|
max_iter
|
int
|
|
1000
|
Returns:
| Name | Type |
Description |
a |
SolveResult
|
class:SolveResult describing the root found.
|
Raises:
| Type |
Description |
ValueError
|
if variable is not free in expr (propagated from
:meth:Expression.to_func).
|
Source code in src\cds\modeling\solver.py
| def solve_equation(
expr: Expression,
variable: str,
x0: float = 1.0,
tol: float = NEWTON_TOLERANCE,
max_iter: int = 1000,
) -> SolveResult:
"""Find a root of ``expr`` (i.e. solve ``expr = 0``) for one variable.
Compiles ``expr`` to a callable and hands it to Newton-Raphson.
Args:
expr: the symbolic expression whose root to find.
variable: the single free variable to solve for.
x0: starting guess.
tol: convergence tolerance on ``|expr(x)|``.
max_iter: iteration cap.
Returns:
a :class:`SolveResult` describing the root found.
Raises:
ValueError: if ``variable`` is not free in ``expr`` (propagated from
:meth:`Expression.to_func`).
"""
f = expr.to_func(variable)
opt = newton_method(f, x0=x0, tol=tol, max_iter=max_iter)
# newton_method returns OptResult[float] (scalar root-finder), so opt.x is
# statically a float — no runtime narrowing needed.
return SolveResult(
x=opt.x,
residual=abs(opt.value),
iterations=opt.iterations,
converged=opt.converged,
)
|
Knowledge Organization
A knowledge graph of named concepts connected by typed, directed relations (is-a, depends-on, …) with undirected traversal (shortest path, transitive closure, cycle detection), a notebook of research notes linked to concept names, and ranked structured retrieval across both. Persistence is JSON via the stdlib.
cds.knowledge
Knowledge organization — concept graphs, research notes, and structured retrieval.
A self-contained subsystem for organising research knowledge:
- :class:
KnowledgeGraph of named :class:Concept nodes connected by typed,
directed :class:Relation edges ("is-a", "depends-on", …).
- :class:
Notebook of :class:Note records linked to concept names.
- :func:
search for structured retrieval across both, ranked by relevance.
All of it is pure Python (stdlib :mod:json for persistence) and decoupled
from :mod:cds.graph, whose dense integer-vertex, untyped-edge model is a
poor fit for named concepts with typed relationships.
Classes
Concept
dataclass
A named node in the knowledge graph.
Attributes:
| Name |
Type |
Description |
name |
str
|
the unique, human-readable concept identifier (also its
dictionary key inside a :class:KnowledgeGraph).
|
description |
str | None
|
optional one- or two-line summary of the concept.
|
tags |
list[str]
|
free-form labels for grouping and retrieval (e.g.
["physics", "mechanics"]).
|
metadata |
dict[str, str]
|
additional string-valued properties (e.g. source URLs,
units) that don't fit the structured fields above.
|
Source code in src\cds\knowledge\graph.py
| @dataclass
class Concept:
"""A named node in the knowledge graph.
Attributes:
name: the unique, human-readable concept identifier (also its
dictionary key inside a :class:`KnowledgeGraph`).
description: optional one- or two-line summary of the concept.
tags: free-form labels for grouping and retrieval (e.g.
``["physics", "mechanics"]``).
metadata: additional string-valued properties (e.g. source URLs,
units) that don't fit the structured fields above.
"""
name: str
description: str | None = None
tags: list[str] = field(default_factory=list)
metadata: dict[str, str] = field(default_factory=dict)
def __eq__(self, other: object) -> bool:
return isinstance(other, Concept) and other.name == self.name
def __hash__(self) -> int:
return hash(("Concept", self.name))
def to_dict(self) -> dict[str, object]:
"""Serialize this concept to a JSON-friendly dict."""
return {
"name": self.name,
"description": self.description,
"tags": list(self.tags),
"metadata": dict(self.metadata),
}
@classmethod
def from_dict(cls, data: dict[str, object]) -> Concept:
"""Reconstruct a :class:`Concept` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys or has the wrong types.
"""
name = data["name"]
if not isinstance(name, str):
raise ValueError(f"Invalid concept name: {name!r}")
description = data["description"]
if description is not None and not isinstance(description, str):
raise ValueError(f"Invalid concept description: {description!r}")
tags_raw = data["tags"]
if not isinstance(tags_raw, list) or not all(isinstance(t, str) for t in tags_raw):
raise ValueError(f"Invalid concept tags: {tags_raw!r}")
metadata_raw = data["metadata"]
if not isinstance(metadata_raw, dict) or not all(
isinstance(k, str) and isinstance(v, str) for k, v in metadata_raw.items()
):
raise ValueError(f"Invalid concept metadata: {metadata_raw!r}")
return cls(
name=name,
description=description,
tags=list(tags_raw),
metadata=dict(metadata_raw),
)
|
Methods:
to_dict
to_dict() -> dict[str, object]
Serialize this concept to a JSON-friendly dict.
Source code in src\cds\knowledge\graph.py
| def to_dict(self) -> dict[str, object]:
"""Serialize this concept to a JSON-friendly dict."""
return {
"name": self.name,
"description": self.description,
"tags": list(self.tags),
"metadata": dict(self.metadata),
}
|
from_dict
classmethod
from_dict(data: dict[str, object]) -> Concept
Reconstruct a :class:Concept from :meth:to_dict output.
Raises:
| Type |
Description |
ValueError
|
if data is missing keys or has the wrong types.
|
Source code in src\cds\knowledge\graph.py
| @classmethod
def from_dict(cls, data: dict[str, object]) -> Concept:
"""Reconstruct a :class:`Concept` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys or has the wrong types.
"""
name = data["name"]
if not isinstance(name, str):
raise ValueError(f"Invalid concept name: {name!r}")
description = data["description"]
if description is not None and not isinstance(description, str):
raise ValueError(f"Invalid concept description: {description!r}")
tags_raw = data["tags"]
if not isinstance(tags_raw, list) or not all(isinstance(t, str) for t in tags_raw):
raise ValueError(f"Invalid concept tags: {tags_raw!r}")
metadata_raw = data["metadata"]
if not isinstance(metadata_raw, dict) or not all(
isinstance(k, str) and isinstance(v, str) for k, v in metadata_raw.items()
):
raise ValueError(f"Invalid concept metadata: {metadata_raw!r}")
return cls(
name=name,
description=description,
tags=list(tags_raw),
metadata=dict(metadata_raw),
)
|
KnowledgeGraph
dataclass
A knowledge graph of named concepts and typed relations.
Attributes:
| Name |
Type |
Description |
name |
str
|
human-readable graph title (used in :meth:to_markdown).
|
concepts |
dict[str, Concept]
|
mapping of concept name to :class:Concept node.
|
relations |
list[Relation]
|
ordered list of :class:Relation edges.
|
Source code in src\cds\knowledge\graph.py
| @dataclass
class KnowledgeGraph:
"""A knowledge graph of named concepts and typed relations.
Attributes:
name: human-readable graph title (used in :meth:`to_markdown`).
concepts: mapping of concept name to :class:`Concept` node.
relations: ordered list of :class:`Relation` edges.
"""
name: str
concepts: dict[str, Concept] = field(default_factory=dict)
relations: list[Relation] = field(default_factory=list)
# ------------------------------------------------------------------ #
# Construction
# ------------------------------------------------------------------ #
def add_concept(
self,
name: str,
description: str | None = None,
tags: list[str] | None = None,
metadata: dict[str, str] | None = None,
) -> Concept:
"""Add a concept, returning the stored node.
If ``name`` already exists, the existing concept is returned
unchanged (idempotent) rather than overwritten.
Args:
name: unique concept identifier.
description: optional summary.
tags: optional grouping labels.
metadata: optional string-valued properties.
Returns:
the stored :class:`Concept` (newly created or the pre-existing one).
"""
if name in self.concepts:
return self.concepts[name]
concept = Concept(
name=name,
description=description,
tags=list(tags) if tags else [],
metadata=dict(metadata) if metadata else {},
)
self.concepts[name] = concept
return concept
def add_relation(
self,
source: str,
target: str,
kind: str,
weight: float = 1.0,
) -> Relation:
"""Add a typed, directed relation ``source -> target``.
Both endpoints must already exist as concepts (use
:meth:`link_concepts` to auto-create them).
Raises:
KeyError: if ``source`` or ``target`` is not a known concept.
"""
for endpoint, label in ((source, "source"), (target, "target")):
if endpoint not in self.concepts:
raise KeyError(f"unknown {label} concept: {endpoint!r}")
relation = Relation(source=source, target=target, kind=kind, weight=weight)
self.relations.append(relation)
return relation
def link_concepts(
self,
source: str,
target: str,
kind: str,
weight: float = 1.0,
) -> Relation:
"""Auto-create both concepts (if missing) and add a relation between them."""
self.add_concept(source)
self.add_concept(target)
return self.add_relation(source, target, kind, weight)
# ------------------------------------------------------------------ #
# Queries
# ------------------------------------------------------------------ #
def neighbors(self, name: str, kind: str | None = None) -> list[str]:
"""Undirected neighbors of ``name`` — every concept directly linked.
A relation touching ``name`` at either endpoint contributes its
*other* endpoint. ``kind`` optionally restricts to one relation type.
Raises:
KeyError: if ``name`` is not a known concept.
"""
self._require_concept(name)
found: list[str] = []
for relation in self.relations:
if kind is not None and relation.kind != kind:
continue
if relation.source == name and relation.target not in found:
found.append(relation.target)
elif relation.target == name and relation.source not in found:
found.append(relation.source)
return found
def neighbors_out(self, name: str, kind: str | None = None) -> list[str]:
"""Concepts that ``name`` points to via outgoing relations.
Raises:
KeyError: if ``name`` is not a known concept.
"""
self._require_concept(name)
found: list[str] = []
for relation in self.relations:
if kind is not None and relation.kind != kind:
continue
if relation.source == name and relation.target not in found:
found.append(relation.target)
return found
def neighbors_in(self, name: str, kind: str | None = None) -> list[str]:
"""Concepts that point at ``name`` via incoming relations.
Raises:
KeyError: if ``name`` is not a known concept.
"""
self._require_concept(name)
found: list[str] = []
for relation in self.relations:
if kind is not None and relation.kind != kind:
continue
if relation.target == name and relation.source not in found:
found.append(relation.source)
return found
def find_path(self, source: str, target: str) -> list[str] | None:
"""Shortest undirected path (by hop count) from ``source`` to ``target``.
Returns the sequence of concept names ``[source, ..., target]``, or
``None`` if no path exists or the endpoints are unknown. A path of
length 1 (``source == target``) returns ``[source]``.
Uses BFS following the edges in either direction, so the returned
path may traverse relations against their direction.
"""
if source not in self.concepts or target not in self.concepts:
return None
if source == target:
return [source]
predecessor: dict[str, str] = {source: source}
queue: deque[str] = deque([source])
while queue:
node = queue.popleft()
for neighbor in self.neighbors(node):
if neighbor in predecessor:
continue
predecessor[neighbor] = node
if neighbor == target:
return _reconstruct_path(predecessor, target)
queue.append(neighbor)
return None
def reachable(self, start: str) -> set[str]:
"""All concepts reachable from ``start`` over undirected edges (incl. itself).
Returns an empty set if ``start`` is not a known concept.
"""
if start not in self.concepts:
return set()
seen: set[str] = set()
queue: deque[str] = deque([start])
seen.add(start)
while queue:
node = queue.popleft()
for neighbor in self.neighbors(node):
if neighbor not in seen:
seen.add(neighbor)
queue.append(neighbor)
return seen
def find_cycles(self) -> list[list[str]]:
"""Return every directed cycle in the graph as a list of concept names.
Each cycle is reported once, normalized to start at its
lexicographically smallest member so the same cycle is not reported
from every starting rotation. Self-loops (a relation whose source and
target are equal) are returned as ``[name]``.
Uses DFS back-edge detection with an explicit recursion-emulating
stack so deep graphs do not hit Python's recursion limit.
"""
adj: dict[str, list[str]] = {name: [] for name in self.concepts}
for relation in self.relations:
adj[relation.source].append(relation.target)
found: set[tuple[str, ...]] = set()
WHITE, GRAY, BLACK = 0, 1, 2
color: dict[str, int] = {name: WHITE for name in self.concepts}
for root in sorted(self.concepts):
if color[root] != WHITE:
continue
# Each stack frame: the node plus an iterator position over its successors.
stack: list[tuple[str, list[str]]] = [(root, list(adj[root]))]
color[root] = GRAY
path: list[str] = [root]
while stack:
node, succs = stack[-1]
advanced = False
while succs:
nxt = succs.pop()
if color[nxt] == GRAY:
# Back edge: a cycle from nxt back along the current
# DFS path. A node is GRAY iff it is on ``path`` (we
# always append to ``path`` in lockstep with marking
# GRAY below), so ``nxt`` is guaranteed to be present.
cycle = path[path.index(nxt) :]
found.add(_normalize_cycle(cycle))
# Do not descend into the gray node; keep scanning successors.
continue
# The only remaining color is WHITE: descend into it.
color[nxt] = GRAY
path.append(nxt)
stack.append((nxt, list(adj[nxt])))
advanced = True
break
if not advanced:
# Exhausted this node's successors: mark black and pop.
color[node] = BLACK
path.pop()
stack.pop()
return [list(cycle) for cycle in sorted(found)]
def _require_concept(self, name: str) -> None:
if name not in self.concepts:
raise KeyError(f"unknown concept: {name!r}")
# ------------------------------------------------------------------ #
# Rendering & serialization
# ------------------------------------------------------------------ #
def to_markdown(self) -> str:
"""Render this graph as a structured Markdown document."""
lines: list[str] = [f"# Knowledge Graph: {self.name}", ""]
if not self.concepts:
lines += ["_No concepts._", ""]
else:
lines += ["## Concepts", ""]
for name in sorted(self.concepts):
lines.append(f"- **{name}**")
lines.append("")
if not self.relations:
lines += ["_No relations._", ""]
else:
lines += ["## Relations", ""]
for relation in self.relations:
lines.append(f"- `{relation.source}` --{relation.kind}--> `{relation.target}`")
lines.append("")
return "\n".join(lines)
def to_dict(self) -> dict[str, object]:
"""Serialize the whole graph to a JSON-friendly dict."""
return {
"name": self.name,
"concepts": [concept.to_dict() for concept in self.concepts.values()],
"relations": [relation.to_dict() for relation in self.relations],
}
@classmethod
def from_dict(cls, data: dict[str, object]) -> KnowledgeGraph:
"""Reconstruct a :class:`KnowledgeGraph` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys or has the wrong types.
"""
name = data["name"]
if not isinstance(name, str):
raise ValueError(f"Invalid graph name: {name!r}")
concepts_raw = data["concepts"]
relations_raw = data["relations"]
if not isinstance(concepts_raw, list):
raise ValueError(f"Invalid concepts list: {concepts_raw!r}")
if not isinstance(relations_raw, list):
raise ValueError(f"Invalid relations list: {relations_raw!r}")
graph = cls(name=name)
for item in concepts_raw:
if not isinstance(item, dict):
raise ValueError(f"Invalid concept entry: {item!r}")
concept = Concept.from_dict(item)
graph.concepts[concept.name] = concept
for item in relations_raw:
if not isinstance(item, dict):
raise ValueError(f"Invalid relation entry: {item!r}")
graph.relations.append(Relation.from_dict(item))
return graph
def save(self, path: str | Path) -> None:
"""Write this graph to ``path`` as indented UTF-8 JSON."""
Path(path).write_text(
json.dumps(self.to_dict(), ensure_ascii=False, indent=2),
encoding="utf-8",
)
@classmethod
def load(cls, path: str | Path) -> KnowledgeGraph:
"""Read a graph previously written by :meth:`save`.
Raises:
ValueError: if the file does not contain valid graph JSON.
"""
data = json.loads(Path(path).read_text(encoding="utf-8"))
if not isinstance(data, dict):
raise ValueError(f"Invalid graph file (expected object): {data!r}")
return cls.from_dict(data)
|
Methods:
add_concept
add_concept(
name: str,
description: str | None = None,
tags: list[str] | None = None,
metadata: dict[str, str] | None = None,
) -> Concept
Add a concept, returning the stored node.
If name already exists, the existing concept is returned
unchanged (idempotent) rather than overwritten.
Parameters:
| Name |
Type |
Description |
Default |
name
|
str
|
unique concept identifier.
|
required
|
description
|
str | None
|
|
None
|
tags
|
list[str] | None
|
optional grouping labels.
|
None
|
metadata
|
dict[str, str] | None
|
optional string-valued properties.
|
None
|
Returns:
| Type |
Description |
Concept
|
the stored :class:Concept (newly created or the pre-existing one).
|
Source code in src\cds\knowledge\graph.py
| def add_concept(
self,
name: str,
description: str | None = None,
tags: list[str] | None = None,
metadata: dict[str, str] | None = None,
) -> Concept:
"""Add a concept, returning the stored node.
If ``name`` already exists, the existing concept is returned
unchanged (idempotent) rather than overwritten.
Args:
name: unique concept identifier.
description: optional summary.
tags: optional grouping labels.
metadata: optional string-valued properties.
Returns:
the stored :class:`Concept` (newly created or the pre-existing one).
"""
if name in self.concepts:
return self.concepts[name]
concept = Concept(
name=name,
description=description,
tags=list(tags) if tags else [],
metadata=dict(metadata) if metadata else {},
)
self.concepts[name] = concept
return concept
|
add_relation
add_relation(
source: str, target: str, kind: str, weight: float = 1.0
) -> Relation
Add a typed, directed relation source -> target.
Both endpoints must already exist as concepts (use
:meth:link_concepts to auto-create them).
Raises:
| Type |
Description |
KeyError
|
if source or target is not a known concept.
|
Source code in src\cds\knowledge\graph.py
| def add_relation(
self,
source: str,
target: str,
kind: str,
weight: float = 1.0,
) -> Relation:
"""Add a typed, directed relation ``source -> target``.
Both endpoints must already exist as concepts (use
:meth:`link_concepts` to auto-create them).
Raises:
KeyError: if ``source`` or ``target`` is not a known concept.
"""
for endpoint, label in ((source, "source"), (target, "target")):
if endpoint not in self.concepts:
raise KeyError(f"unknown {label} concept: {endpoint!r}")
relation = Relation(source=source, target=target, kind=kind, weight=weight)
self.relations.append(relation)
return relation
|
link_concepts
link_concepts(
source: str, target: str, kind: str, weight: float = 1.0
) -> Relation
Auto-create both concepts (if missing) and add a relation between them.
Source code in src\cds\knowledge\graph.py
| def link_concepts(
self,
source: str,
target: str,
kind: str,
weight: float = 1.0,
) -> Relation:
"""Auto-create both concepts (if missing) and add a relation between them."""
self.add_concept(source)
self.add_concept(target)
return self.add_relation(source, target, kind, weight)
|
neighbors
neighbors(name: str, kind: str | None = None) -> list[str]
Undirected neighbors of name — every concept directly linked.
A relation touching name at either endpoint contributes its
other endpoint. kind optionally restricts to one relation type.
Raises:
| Type |
Description |
KeyError
|
if name is not a known concept.
|
Source code in src\cds\knowledge\graph.py
| def neighbors(self, name: str, kind: str | None = None) -> list[str]:
"""Undirected neighbors of ``name`` — every concept directly linked.
A relation touching ``name`` at either endpoint contributes its
*other* endpoint. ``kind`` optionally restricts to one relation type.
Raises:
KeyError: if ``name`` is not a known concept.
"""
self._require_concept(name)
found: list[str] = []
for relation in self.relations:
if kind is not None and relation.kind != kind:
continue
if relation.source == name and relation.target not in found:
found.append(relation.target)
elif relation.target == name and relation.source not in found:
found.append(relation.source)
return found
|
neighbors_out
neighbors_out(
name: str, kind: str | None = None
) -> list[str]
Concepts that name points to via outgoing relations.
Raises:
| Type |
Description |
KeyError
|
if name is not a known concept.
|
Source code in src\cds\knowledge\graph.py
| def neighbors_out(self, name: str, kind: str | None = None) -> list[str]:
"""Concepts that ``name`` points to via outgoing relations.
Raises:
KeyError: if ``name`` is not a known concept.
"""
self._require_concept(name)
found: list[str] = []
for relation in self.relations:
if kind is not None and relation.kind != kind:
continue
if relation.source == name and relation.target not in found:
found.append(relation.target)
return found
|
neighbors_in
neighbors_in(
name: str, kind: str | None = None
) -> list[str]
Concepts that point at name via incoming relations.
Raises:
| Type |
Description |
KeyError
|
if name is not a known concept.
|
Source code in src\cds\knowledge\graph.py
| def neighbors_in(self, name: str, kind: str | None = None) -> list[str]:
"""Concepts that point at ``name`` via incoming relations.
Raises:
KeyError: if ``name`` is not a known concept.
"""
self._require_concept(name)
found: list[str] = []
for relation in self.relations:
if kind is not None and relation.kind != kind:
continue
if relation.target == name and relation.source not in found:
found.append(relation.source)
return found
|
find_path
find_path(source: str, target: str) -> list[str] | None
Shortest undirected path (by hop count) from source to target.
Returns the sequence of concept names [source, ..., target], or
None if no path exists or the endpoints are unknown. A path of
length 1 (source == target) returns [source].
Uses BFS following the edges in either direction, so the returned
path may traverse relations against their direction.
Source code in src\cds\knowledge\graph.py
| def find_path(self, source: str, target: str) -> list[str] | None:
"""Shortest undirected path (by hop count) from ``source`` to ``target``.
Returns the sequence of concept names ``[source, ..., target]``, or
``None`` if no path exists or the endpoints are unknown. A path of
length 1 (``source == target``) returns ``[source]``.
Uses BFS following the edges in either direction, so the returned
path may traverse relations against their direction.
"""
if source not in self.concepts or target not in self.concepts:
return None
if source == target:
return [source]
predecessor: dict[str, str] = {source: source}
queue: deque[str] = deque([source])
while queue:
node = queue.popleft()
for neighbor in self.neighbors(node):
if neighbor in predecessor:
continue
predecessor[neighbor] = node
if neighbor == target:
return _reconstruct_path(predecessor, target)
queue.append(neighbor)
return None
|
reachable
reachable(start: str) -> set[str]
All concepts reachable from start over undirected edges (incl. itself).
Returns an empty set if start is not a known concept.
Source code in src\cds\knowledge\graph.py
| def reachable(self, start: str) -> set[str]:
"""All concepts reachable from ``start`` over undirected edges (incl. itself).
Returns an empty set if ``start`` is not a known concept.
"""
if start not in self.concepts:
return set()
seen: set[str] = set()
queue: deque[str] = deque([start])
seen.add(start)
while queue:
node = queue.popleft()
for neighbor in self.neighbors(node):
if neighbor not in seen:
seen.add(neighbor)
queue.append(neighbor)
return seen
|
find_cycles
find_cycles() -> list[list[str]]
Return every directed cycle in the graph as a list of concept names.
Each cycle is reported once, normalized to start at its
lexicographically smallest member so the same cycle is not reported
from every starting rotation. Self-loops (a relation whose source and
target are equal) are returned as [name].
Uses DFS back-edge detection with an explicit recursion-emulating
stack so deep graphs do not hit Python's recursion limit.
Source code in src\cds\knowledge\graph.py
| def find_cycles(self) -> list[list[str]]:
"""Return every directed cycle in the graph as a list of concept names.
Each cycle is reported once, normalized to start at its
lexicographically smallest member so the same cycle is not reported
from every starting rotation. Self-loops (a relation whose source and
target are equal) are returned as ``[name]``.
Uses DFS back-edge detection with an explicit recursion-emulating
stack so deep graphs do not hit Python's recursion limit.
"""
adj: dict[str, list[str]] = {name: [] for name in self.concepts}
for relation in self.relations:
adj[relation.source].append(relation.target)
found: set[tuple[str, ...]] = set()
WHITE, GRAY, BLACK = 0, 1, 2
color: dict[str, int] = {name: WHITE for name in self.concepts}
for root in sorted(self.concepts):
if color[root] != WHITE:
continue
# Each stack frame: the node plus an iterator position over its successors.
stack: list[tuple[str, list[str]]] = [(root, list(adj[root]))]
color[root] = GRAY
path: list[str] = [root]
while stack:
node, succs = stack[-1]
advanced = False
while succs:
nxt = succs.pop()
if color[nxt] == GRAY:
# Back edge: a cycle from nxt back along the current
# DFS path. A node is GRAY iff it is on ``path`` (we
# always append to ``path`` in lockstep with marking
# GRAY below), so ``nxt`` is guaranteed to be present.
cycle = path[path.index(nxt) :]
found.add(_normalize_cycle(cycle))
# Do not descend into the gray node; keep scanning successors.
continue
# The only remaining color is WHITE: descend into it.
color[nxt] = GRAY
path.append(nxt)
stack.append((nxt, list(adj[nxt])))
advanced = True
break
if not advanced:
# Exhausted this node's successors: mark black and pop.
color[node] = BLACK
path.pop()
stack.pop()
return [list(cycle) for cycle in sorted(found)]
|
to_markdown
Render this graph as a structured Markdown document.
Source code in src\cds\knowledge\graph.py
| def to_markdown(self) -> str:
"""Render this graph as a structured Markdown document."""
lines: list[str] = [f"# Knowledge Graph: {self.name}", ""]
if not self.concepts:
lines += ["_No concepts._", ""]
else:
lines += ["## Concepts", ""]
for name in sorted(self.concepts):
lines.append(f"- **{name}**")
lines.append("")
if not self.relations:
lines += ["_No relations._", ""]
else:
lines += ["## Relations", ""]
for relation in self.relations:
lines.append(f"- `{relation.source}` --{relation.kind}--> `{relation.target}`")
lines.append("")
return "\n".join(lines)
|
to_dict
to_dict() -> dict[str, object]
Serialize the whole graph to a JSON-friendly dict.
Source code in src\cds\knowledge\graph.py
| def to_dict(self) -> dict[str, object]:
"""Serialize the whole graph to a JSON-friendly dict."""
return {
"name": self.name,
"concepts": [concept.to_dict() for concept in self.concepts.values()],
"relations": [relation.to_dict() for relation in self.relations],
}
|
from_dict
classmethod
from_dict(data: dict[str, object]) -> KnowledgeGraph
Reconstruct a :class:KnowledgeGraph from :meth:to_dict output.
Raises:
| Type |
Description |
ValueError
|
if data is missing keys or has the wrong types.
|
Source code in src\cds\knowledge\graph.py
| @classmethod
def from_dict(cls, data: dict[str, object]) -> KnowledgeGraph:
"""Reconstruct a :class:`KnowledgeGraph` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys or has the wrong types.
"""
name = data["name"]
if not isinstance(name, str):
raise ValueError(f"Invalid graph name: {name!r}")
concepts_raw = data["concepts"]
relations_raw = data["relations"]
if not isinstance(concepts_raw, list):
raise ValueError(f"Invalid concepts list: {concepts_raw!r}")
if not isinstance(relations_raw, list):
raise ValueError(f"Invalid relations list: {relations_raw!r}")
graph = cls(name=name)
for item in concepts_raw:
if not isinstance(item, dict):
raise ValueError(f"Invalid concept entry: {item!r}")
concept = Concept.from_dict(item)
graph.concepts[concept.name] = concept
for item in relations_raw:
if not isinstance(item, dict):
raise ValueError(f"Invalid relation entry: {item!r}")
graph.relations.append(Relation.from_dict(item))
return graph
|
save
save(path: str | Path) -> None
Write this graph to path as indented UTF-8 JSON.
Source code in src\cds\knowledge\graph.py
| def save(self, path: str | Path) -> None:
"""Write this graph to ``path`` as indented UTF-8 JSON."""
Path(path).write_text(
json.dumps(self.to_dict(), ensure_ascii=False, indent=2),
encoding="utf-8",
)
|
load
classmethod
load(path: str | Path) -> KnowledgeGraph
Read a graph previously written by :meth:save.
Raises:
| Type |
Description |
ValueError
|
if the file does not contain valid graph JSON.
|
Source code in src\cds\knowledge\graph.py
| @classmethod
def load(cls, path: str | Path) -> KnowledgeGraph:
"""Read a graph previously written by :meth:`save`.
Raises:
ValueError: if the file does not contain valid graph JSON.
"""
data = json.loads(Path(path).read_text(encoding="utf-8"))
if not isinstance(data, dict):
raise ValueError(f"Invalid graph file (expected object): {data!r}")
return cls.from_dict(data)
|
Relation
dataclass
A typed, directed edge source -> target between two concepts.
Attributes:
| Name |
Type |
Description |
source |
str
|
name of the origin concept.
|
target |
str
|
name of the destination concept.
|
kind |
str
|
the relationship type (e.g. "is-a", "depends-on",
"related-to"). Semantics are caller-defined; the graph does
not interpret kinds beyond using them for filtering.
|
weight |
float
|
optional numeric strength (default 1.0). Higher is stronger;
used by callers for ranking, not by the core traversal.
|
Source code in src\cds\knowledge\graph.py
| @dataclass
class Relation:
"""A typed, directed edge ``source -> target`` between two concepts.
Attributes:
source: name of the origin concept.
target: name of the destination concept.
kind: the relationship type (e.g. ``"is-a"``, ``"depends-on"``,
``"related-to"``). Semantics are caller-defined; the graph does
not interpret kinds beyond using them for filtering.
weight: optional numeric strength (default 1.0). Higher is stronger;
used by callers for ranking, not by the core traversal.
"""
source: str
target: str
kind: str
weight: float = 1.0
def to_dict(self) -> dict[str, object]:
"""Serialize this relation to a JSON-friendly dict."""
return {
"source": self.source,
"target": self.target,
"kind": self.kind,
"weight": self.weight,
}
@classmethod
def from_dict(cls, data: dict[str, object]) -> Relation:
"""Reconstruct a :class:`Relation` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys or has the wrong types.
"""
source = data["source"]
target = data["target"]
kind = data["kind"]
if not isinstance(source, str):
raise ValueError(f"Invalid relation source: {source!r}")
if not isinstance(target, str):
raise ValueError(f"Invalid relation target: {target!r}")
if not isinstance(kind, str):
raise ValueError(f"Invalid relation kind: {kind!r}")
weight = data["weight"]
if not isinstance(weight, int | float) or isinstance(weight, bool):
raise ValueError(f"Invalid relation weight: {weight!r}")
return cls(source=source, target=target, kind=kind, weight=float(weight))
|
Methods:
to_dict
to_dict() -> dict[str, object]
Serialize this relation to a JSON-friendly dict.
Source code in src\cds\knowledge\graph.py
| def to_dict(self) -> dict[str, object]:
"""Serialize this relation to a JSON-friendly dict."""
return {
"source": self.source,
"target": self.target,
"kind": self.kind,
"weight": self.weight,
}
|
from_dict
classmethod
from_dict(data: dict[str, object]) -> Relation
Reconstruct a :class:Relation from :meth:to_dict output.
Raises:
| Type |
Description |
ValueError
|
if data is missing keys or has the wrong types.
|
Source code in src\cds\knowledge\graph.py
| @classmethod
def from_dict(cls, data: dict[str, object]) -> Relation:
"""Reconstruct a :class:`Relation` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys or has the wrong types.
"""
source = data["source"]
target = data["target"]
kind = data["kind"]
if not isinstance(source, str):
raise ValueError(f"Invalid relation source: {source!r}")
if not isinstance(target, str):
raise ValueError(f"Invalid relation target: {target!r}")
if not isinstance(kind, str):
raise ValueError(f"Invalid relation kind: {kind!r}")
weight = data["weight"]
if not isinstance(weight, int | float) or isinstance(weight, bool):
raise ValueError(f"Invalid relation weight: {weight!r}")
return cls(source=source, target=target, kind=kind, weight=float(weight))
|
Note
dataclass
A single research note linked to zero or more concept names.
Attributes:
| Name |
Type |
Description |
id |
str
|
unique note identifier within a :class:Notebook.
|
title |
str
|
short human-readable heading.
|
body |
str
|
the note's free-form text content.
|
tags |
list[str]
|
grouping labels (e.g. ["experiment", "failed"]).
|
linked_concepts |
list[str]
|
names of concepts this note references. These are
plain strings, not references — they need not exist in any
particular :class:KnowledgeGraph.
|
created |
str | None
|
optional ISO-8601 timestamp (or any caller-defined marker).
|
Source code in src\cds\knowledge\notes.py
| @dataclass
class Note:
"""A single research note linked to zero or more concept names.
Attributes:
id: unique note identifier within a :class:`Notebook`.
title: short human-readable heading.
body: the note's free-form text content.
tags: grouping labels (e.g. ``["experiment", "failed"]``).
linked_concepts: names of concepts this note references. These are
plain strings, not references — they need not exist in any
particular :class:`KnowledgeGraph`.
created: optional ISO-8601 timestamp (or any caller-defined marker).
"""
id: str
title: str
body: str
tags: list[str] = field(default_factory=list)
linked_concepts: list[str] = field(default_factory=list)
created: str | None = None
def to_dict(self) -> dict[str, object]:
"""Serialize this note to a JSON-friendly dict."""
return {
"id": self.id,
"title": self.title,
"body": self.body,
"tags": list(self.tags),
"linked_concepts": list(self.linked_concepts),
"created": self.created,
}
@classmethod
def from_dict(cls, data: dict[str, object]) -> Note:
"""Reconstruct a :class:`Note` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys or has the wrong types.
"""
note_id = data["id"]
title = data["title"]
body = data["body"]
if not isinstance(note_id, str):
raise ValueError(f"Invalid note id: {note_id!r}")
if not isinstance(title, str):
raise ValueError(f"Invalid note title: {title!r}")
if not isinstance(body, str):
raise ValueError(f"Invalid note body: {body!r}")
tags_raw = data["tags"]
if not isinstance(tags_raw, list) or not all(isinstance(t, str) for t in tags_raw):
raise ValueError(f"Invalid note tags: {tags_raw!r}")
linked_raw = data["linked_concepts"]
if not isinstance(linked_raw, list) or not all(isinstance(c, str) for c in linked_raw):
raise ValueError(f"Invalid note linked_concepts: {linked_raw!r}")
created = data["created"]
if created is not None and not isinstance(created, str):
raise ValueError(f"Invalid note created: {created!r}")
return cls(
id=note_id,
title=title,
body=body,
tags=list(tags_raw),
linked_concepts=list(linked_raw),
created=created,
)
def to_markdown(self) -> str:
"""Render this note as a self-contained Markdown document."""
lines: list[str] = [f"# {self.title}", ""]
if self.created:
lines += [f"_Created: {self.created}_", ""]
if self.tags:
lines += ["Tags: " + ", ".join(f"`{tag}`" for tag in self.tags), ""]
lines += [self.body, ""]
if self.linked_concepts:
lines += ["## Linked concepts", ""]
for concept in self.linked_concepts:
lines.append(f"- `{concept}`")
lines.append("")
return "\n".join(lines)
|
Methods:
to_dict
to_dict() -> dict[str, object]
Serialize this note to a JSON-friendly dict.
Source code in src\cds\knowledge\notes.py
| def to_dict(self) -> dict[str, object]:
"""Serialize this note to a JSON-friendly dict."""
return {
"id": self.id,
"title": self.title,
"body": self.body,
"tags": list(self.tags),
"linked_concepts": list(self.linked_concepts),
"created": self.created,
}
|
from_dict
classmethod
from_dict(data: dict[str, object]) -> Note
Reconstruct a :class:Note from :meth:to_dict output.
Raises:
| Type |
Description |
ValueError
|
if data is missing keys or has the wrong types.
|
Source code in src\cds\knowledge\notes.py
| @classmethod
def from_dict(cls, data: dict[str, object]) -> Note:
"""Reconstruct a :class:`Note` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys or has the wrong types.
"""
note_id = data["id"]
title = data["title"]
body = data["body"]
if not isinstance(note_id, str):
raise ValueError(f"Invalid note id: {note_id!r}")
if not isinstance(title, str):
raise ValueError(f"Invalid note title: {title!r}")
if not isinstance(body, str):
raise ValueError(f"Invalid note body: {body!r}")
tags_raw = data["tags"]
if not isinstance(tags_raw, list) or not all(isinstance(t, str) for t in tags_raw):
raise ValueError(f"Invalid note tags: {tags_raw!r}")
linked_raw = data["linked_concepts"]
if not isinstance(linked_raw, list) or not all(isinstance(c, str) for c in linked_raw):
raise ValueError(f"Invalid note linked_concepts: {linked_raw!r}")
created = data["created"]
if created is not None and not isinstance(created, str):
raise ValueError(f"Invalid note created: {created!r}")
return cls(
id=note_id,
title=title,
body=body,
tags=list(tags_raw),
linked_concepts=list(linked_raw),
created=created,
)
|
to_markdown
Render this note as a self-contained Markdown document.
Source code in src\cds\knowledge\notes.py
| def to_markdown(self) -> str:
"""Render this note as a self-contained Markdown document."""
lines: list[str] = [f"# {self.title}", ""]
if self.created:
lines += [f"_Created: {self.created}_", ""]
if self.tags:
lines += ["Tags: " + ", ".join(f"`{tag}`" for tag in self.tags), ""]
lines += [self.body, ""]
if self.linked_concepts:
lines += ["## Linked concepts", ""]
for concept in self.linked_concepts:
lines.append(f"- `{concept}`")
lines.append("")
return "\n".join(lines)
|
Notebook
dataclass
An ordered collection of research notes keyed by id.
Attributes:
| Name |
Type |
Description |
name |
str
|
human-readable notebook title (used in :meth:to_markdown).
|
notes |
dict[str, Note]
|
mapping of note id to :class:Note.
|
Source code in src\cds\knowledge\notes.py
| @dataclass
class Notebook:
"""An ordered collection of research notes keyed by id.
Attributes:
name: human-readable notebook title (used in :meth:`to_markdown`).
notes: mapping of note id to :class:`Note`.
"""
name: str
notes: dict[str, Note] = field(default_factory=dict)
# ------------------------------------------------------------------ #
# Construction & lookup
# ------------------------------------------------------------------ #
def add_note(
self,
note_id: str,
title: str,
body: str,
tags: list[str] | None = None,
linked_concepts: list[str] | None = None,
created: str | None = None,
) -> Note:
"""Add a note, returning the stored :class:`Note`.
If ``note_id`` already exists it is overwritten (last-write-wins),
matching how a researcher edits a numbered entry in place.
Returns:
the stored :class:`Note`.
"""
note = Note(
id=note_id,
title=title,
body=body,
tags=list(tags) if tags else [],
linked_concepts=list(linked_concepts) if linked_concepts else [],
created=created,
)
self.notes[note_id] = note
return note
def get_note(self, note_id: str) -> Note:
"""Return the note with ``note_id``.
Raises:
KeyError: if ``note_id`` is not in this notebook.
"""
if note_id not in self.notes:
raise KeyError(f"unknown note: {note_id!r}")
return self.notes[note_id]
def notes_for_concept(self, concept: str) -> list[Note]:
"""All notes that reference ``concept`` (by linked_concepts membership)."""
return [note for note in self.notes.values() if concept in note.linked_concepts]
def notes_by_tag(self, tag: str) -> list[Note]:
"""All notes carrying ``tag``."""
return [note for note in self.notes.values() if tag in note.tags]
# ------------------------------------------------------------------ #
# Rendering & serialization
# ------------------------------------------------------------------ #
def to_markdown(self) -> str:
"""Render a compact index of this notebook's notes as Markdown."""
lines: list[str] = [f"# Notebook: {self.name}", ""]
if not self.notes:
lines += ["_No notes._", ""]
else:
lines += ["## Notes", ""]
for note_id in sorted(self.notes):
note = self.notes[note_id]
tags = f" ({', '.join(note.tags)})" if note.tags else ""
lines.append(f"- **{note_id}**: {note.title}{tags}")
lines.append("")
return "\n".join(lines)
def to_dict(self) -> dict[str, object]:
"""Serialize the notebook to a JSON-friendly dict."""
return {
"name": self.name,
"notes": [note.to_dict() for note in self.notes.values()],
}
@classmethod
def from_dict(cls, data: dict[str, object]) -> Notebook:
"""Reconstruct a :class:`Notebook` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys or has the wrong types.
"""
name = data["name"]
if not isinstance(name, str):
raise ValueError(f"Invalid notebook name: {name!r}")
notes_raw = data["notes"]
if not isinstance(notes_raw, list):
raise ValueError(f"Invalid notes list: {notes_raw!r}")
notebook = cls(name=name)
for item in notes_raw:
if not isinstance(item, dict):
raise ValueError(f"Invalid note entry: {item!r}")
note = Note.from_dict(item)
notebook.notes[note.id] = note
return notebook
def save(self, path: str | Path) -> None:
"""Write this notebook to ``path`` as indented UTF-8 JSON."""
Path(path).write_text(
json.dumps(self.to_dict(), ensure_ascii=False, indent=2),
encoding="utf-8",
)
@classmethod
def load(cls, path: str | Path) -> Notebook:
"""Read a notebook previously written by :meth:`save`.
Raises:
ValueError: if the file does not contain valid notebook JSON.
"""
data = json.loads(Path(path).read_text(encoding="utf-8"))
if not isinstance(data, dict):
raise ValueError(f"Invalid notebook file (expected object): {data!r}")
return cls.from_dict(data)
|
Methods:
add_note
add_note(
note_id: str,
title: str,
body: str,
tags: list[str] | None = None,
linked_concepts: list[str] | None = None,
created: str | None = None,
) -> Note
Add a note, returning the stored :class:Note.
If note_id already exists it is overwritten (last-write-wins),
matching how a researcher edits a numbered entry in place.
Returns:
Source code in src\cds\knowledge\notes.py
| def add_note(
self,
note_id: str,
title: str,
body: str,
tags: list[str] | None = None,
linked_concepts: list[str] | None = None,
created: str | None = None,
) -> Note:
"""Add a note, returning the stored :class:`Note`.
If ``note_id`` already exists it is overwritten (last-write-wins),
matching how a researcher edits a numbered entry in place.
Returns:
the stored :class:`Note`.
"""
note = Note(
id=note_id,
title=title,
body=body,
tags=list(tags) if tags else [],
linked_concepts=list(linked_concepts) if linked_concepts else [],
created=created,
)
self.notes[note_id] = note
return note
|
get_note
get_note(note_id: str) -> Note
Return the note with note_id.
Raises:
| Type |
Description |
KeyError
|
if note_id is not in this notebook.
|
Source code in src\cds\knowledge\notes.py
| def get_note(self, note_id: str) -> Note:
"""Return the note with ``note_id``.
Raises:
KeyError: if ``note_id`` is not in this notebook.
"""
if note_id not in self.notes:
raise KeyError(f"unknown note: {note_id!r}")
return self.notes[note_id]
|
notes_for_concept
notes_for_concept(concept: str) -> list[Note]
All notes that reference concept (by linked_concepts membership).
Source code in src\cds\knowledge\notes.py
| def notes_for_concept(self, concept: str) -> list[Note]:
"""All notes that reference ``concept`` (by linked_concepts membership)."""
return [note for note in self.notes.values() if concept in note.linked_concepts]
|
notes_by_tag
notes_by_tag(tag: str) -> list[Note]
All notes carrying tag.
Source code in src\cds\knowledge\notes.py
| def notes_by_tag(self, tag: str) -> list[Note]:
"""All notes carrying ``tag``."""
return [note for note in self.notes.values() if tag in note.tags]
|
to_markdown
Render a compact index of this notebook's notes as Markdown.
Source code in src\cds\knowledge\notes.py
| def to_markdown(self) -> str:
"""Render a compact index of this notebook's notes as Markdown."""
lines: list[str] = [f"# Notebook: {self.name}", ""]
if not self.notes:
lines += ["_No notes._", ""]
else:
lines += ["## Notes", ""]
for note_id in sorted(self.notes):
note = self.notes[note_id]
tags = f" ({', '.join(note.tags)})" if note.tags else ""
lines.append(f"- **{note_id}**: {note.title}{tags}")
lines.append("")
return "\n".join(lines)
|
to_dict
to_dict() -> dict[str, object]
Serialize the notebook to a JSON-friendly dict.
Source code in src\cds\knowledge\notes.py
| def to_dict(self) -> dict[str, object]:
"""Serialize the notebook to a JSON-friendly dict."""
return {
"name": self.name,
"notes": [note.to_dict() for note in self.notes.values()],
}
|
from_dict
classmethod
from_dict(data: dict[str, object]) -> Notebook
Reconstruct a :class:Notebook from :meth:to_dict output.
Raises:
| Type |
Description |
ValueError
|
if data is missing keys or has the wrong types.
|
Source code in src\cds\knowledge\notes.py
| @classmethod
def from_dict(cls, data: dict[str, object]) -> Notebook:
"""Reconstruct a :class:`Notebook` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys or has the wrong types.
"""
name = data["name"]
if not isinstance(name, str):
raise ValueError(f"Invalid notebook name: {name!r}")
notes_raw = data["notes"]
if not isinstance(notes_raw, list):
raise ValueError(f"Invalid notes list: {notes_raw!r}")
notebook = cls(name=name)
for item in notes_raw:
if not isinstance(item, dict):
raise ValueError(f"Invalid note entry: {item!r}")
note = Note.from_dict(item)
notebook.notes[note.id] = note
return notebook
|
save
save(path: str | Path) -> None
Write this notebook to path as indented UTF-8 JSON.
Source code in src\cds\knowledge\notes.py
| def save(self, path: str | Path) -> None:
"""Write this notebook to ``path`` as indented UTF-8 JSON."""
Path(path).write_text(
json.dumps(self.to_dict(), ensure_ascii=False, indent=2),
encoding="utf-8",
)
|
load
classmethod
load(path: str | Path) -> Notebook
Read a notebook previously written by :meth:save.
Raises:
| Type |
Description |
ValueError
|
if the file does not contain valid notebook JSON.
|
Source code in src\cds\knowledge\notes.py
| @classmethod
def load(cls, path: str | Path) -> Notebook:
"""Read a notebook previously written by :meth:`save`.
Raises:
ValueError: if the file does not contain valid notebook JSON.
"""
data = json.loads(Path(path).read_text(encoding="utf-8"))
if not isinstance(data, dict):
raise ValueError(f"Invalid notebook file (expected object): {data!r}")
return cls.from_dict(data)
|
SearchResult
dataclass
A single ranked retrieval hit.
Attributes:
| Name |
Type |
Description |
concept_name |
str | None
|
the matched concept name, if the hit is a concept;
None otherwise.
|
note_id |
str | None
|
the matched note id, if the hit is a note; None otherwise.
|
score |
float
|
relevance in [0, 1] — higher is better.
|
matched_on |
str
|
short label of the field that matched
(e.g. "name", "description", "title").
|
Source code in src\cds\knowledge\retrieval.py
| @dataclass
class SearchResult:
"""A single ranked retrieval hit.
Attributes:
concept_name: the matched concept name, if the hit is a concept;
``None`` otherwise.
note_id: the matched note id, if the hit is a note; ``None`` otherwise.
score: relevance in ``[0, 1]`` — higher is better.
matched_on: short label of the field that matched
(e.g. ``"name"``, ``"description"``, ``"title"``).
"""
concept_name: str | None
note_id: str | None
score: float
matched_on: str
|
Functions:
search
search(
graph: KnowledgeGraph,
notebook: Notebook,
query: str,
tag: str | None = None,
) -> list[SearchResult]
Combined ranked search over both a graph's concepts and a notebook's notes.
Results from :func:search_concepts and :func:search_notes are merged
and re-ranked by score (desc) then by identifier (asc).
Parameters:
| Name |
Type |
Description |
Default |
graph
|
KnowledgeGraph
|
the :class:KnowledgeGraph whose concepts to search.
|
required
|
notebook
|
Notebook
|
the :class:Notebook whose notes to search.
|
required
|
query
|
str
|
case-insensitive search text.
|
required
|
tag
|
str | None
|
optional tag filter applied to both concepts and notes.
|
None
|
Returns:
| Name | Type |
Description |
ranked |
list[SearchResult]
|
class:SearchResult list (best first, ties alphabetical).
|
Source code in src\cds\knowledge\retrieval.py
| def search(
graph: KnowledgeGraph,
notebook: Notebook,
query: str,
tag: str | None = None,
) -> list[SearchResult]:
"""Combined ranked search over both a graph's concepts and a notebook's notes.
Results from :func:`search_concepts` and :func:`search_notes` are merged
and re-ranked by score (desc) then by identifier (asc).
Args:
graph: the :class:`KnowledgeGraph` whose concepts to search.
notebook: the :class:`Notebook` whose notes to search.
query: case-insensitive search text.
tag: optional tag filter applied to both concepts and notes.
Returns:
ranked :class:`SearchResult` list (best first, ties alphabetical).
"""
combined = search_concepts(graph, query, tag) + search_notes(notebook, query, tag)
combined.sort(key=lambda r: (-r.score, r.concept_name or r.note_id or ""))
return combined
|
search_concepts
search_concepts(
graph: KnowledgeGraph,
query: str,
tag: str | None = None,
) -> list[SearchResult]
Find concepts in graph matching query.
A concept matches if its name matches the query exactly (score 1.0) or
its name or description contains the query as a substring (score 0.5).
When tag is given, only concepts carrying that tag are considered.
Parameters:
| Name |
Type |
Description |
Default |
graph
|
KnowledgeGraph
|
the :class:KnowledgeGraph to search.
|
required
|
query
|
str
|
case-insensitive search text.
|
required
|
tag
|
str | None
|
optional tag filter; None disables filtering.
|
None
|
Returns:
| Name | Type |
Description |
ranked |
list[SearchResult]
|
class:SearchResult list (best first, ties alphabetical).
|
Source code in src\cds\knowledge\retrieval.py
| def search_concepts(
graph: KnowledgeGraph,
query: str,
tag: str | None = None,
) -> list[SearchResult]:
"""Find concepts in ``graph`` matching ``query``.
A concept matches if its name matches the query exactly (score 1.0) or
its name or description contains the query as a substring (score 0.5).
When ``tag`` is given, only concepts carrying that tag are considered.
Args:
graph: the :class:`KnowledgeGraph` to search.
query: case-insensitive search text.
tag: optional tag filter; ``None`` disables filtering.
Returns:
ranked :class:`SearchResult` list (best first, ties alphabetical).
"""
needle = query.casefold()
results: list[SearchResult] = []
for name in sorted(graph.concepts):
concept = graph.concepts[name]
if tag is not None and tag not in concept.tags:
continue
name_folded = name.casefold()
if name_folded == needle:
results.append(
SearchResult(
concept_name=name, note_id=None, score=NAME_TAG_SCORE, matched_on="name"
)
)
elif needle in name_folded:
results.append(
SearchResult(
concept_name=name, note_id=None, score=SUBSTRING_SCORE, matched_on="name"
)
)
elif concept.description is not None and needle in concept.description.casefold():
results.append(
SearchResult(
concept_name=name, note_id=None, score=SUBSTRING_SCORE, matched_on="description"
)
)
results.sort(key=lambda r: (-r.score, r.concept_name or ""))
return results
|
search_notes
search_notes(
notebook: Notebook, query: str, tag: str | None = None
) -> list[SearchResult]
Find notes in notebook matching query.
A note matches if its title matches exactly (score 1.0) or its title or
body contains the query as a substring (score 0.5). When tag is
given, only notes carrying that tag are considered.
Parameters:
| Name |
Type |
Description |
Default |
notebook
|
Notebook
|
the :class:Notebook to search.
|
required
|
query
|
str
|
case-insensitive search text.
|
required
|
tag
|
str | None
|
optional tag filter; None disables filtering.
|
None
|
Returns:
| Name | Type |
Description |
ranked |
list[SearchResult]
|
class:SearchResult list (best first, ties alphabetical).
|
Source code in src\cds\knowledge\retrieval.py
| def search_notes(
notebook: Notebook,
query: str,
tag: str | None = None,
) -> list[SearchResult]:
"""Find notes in ``notebook`` matching ``query``.
A note matches if its title matches exactly (score 1.0) or its title or
body contains the query as a substring (score 0.5). When ``tag`` is
given, only notes carrying that tag are considered.
Args:
notebook: the :class:`Notebook` to search.
query: case-insensitive search text.
tag: optional tag filter; ``None`` disables filtering.
Returns:
ranked :class:`SearchResult` list (best first, ties alphabetical).
"""
needle = query.casefold()
results: list[SearchResult] = []
for note_id in sorted(notebook.notes):
note = notebook.notes[note_id]
if tag is not None and tag not in note.tags:
continue
title_folded = note.title.casefold()
if title_folded == needle:
results.append(
SearchResult(
concept_name=None, note_id=note_id, score=NAME_TAG_SCORE, matched_on="title"
)
)
elif needle in title_folded:
results.append(
SearchResult(
concept_name=None, note_id=note_id, score=SUBSTRING_SCORE, matched_on="title"
)
)
elif needle in note.body.casefold():
results.append(
SearchResult(
concept_name=None, note_id=note_id, score=SUBSTRING_SCORE, matched_on="body"
)
)
results.sort(key=lambda r: (-r.score, r.note_id or ""))
return results
|
Data Analysis
CSV loading, normalisation, smoothing, and ASCII visualisation.
cds.data_analysis
Data loading, analysis and visualization helpers.
Classes
DataSet
A lightweight, pure Python 'DataFrame' for structured data.
Data is stored internally as a list of dictionaries where keys are column names.
Source code in src\cds\data_analysis\dataset.py
| class DataSet:
"""A lightweight, pure Python 'DataFrame' for structured data.
Data is stored internally as a list of dictionaries where keys are column names.
"""
def __init__(self, data: list[Row]):
self.data = data
self._columns = list(data[0].keys()) if data else []
@property
def columns(self) -> list[str]:
"""Return the list of column names."""
return self._columns
@property
def shape(self) -> tuple[int, int]:
"""Return (rows, columns) tuple."""
return len(self.data), len(self._columns)
def __len__(self) -> int:
return len(self.data)
def __getitem__(self, idx: int) -> Row:
return self.data[idx]
def column(self, name: str) -> list[Scalar]:
"""Extract a single column as a list."""
if name not in self._columns:
raise ValueError(f"Column '{name}' not found. Available: {self._columns}")
return [row[name] for row in self.data]
def filter(self, predicate: Callable[[Row], bool]) -> DataSet:
"""Filter the dataset based on a predicate function."""
filtered_data = [row for row in self.data if predicate(row)]
return DataSet(filtered_data)
def head(self, n: int = 5) -> DataSet:
"""Return the first n rows."""
return DataSet(self.data[:n])
def tail(self, n: int = 5) -> DataSet:
"""Return the last n rows."""
return DataSet(self.data[-n:])
def select(self, *names: str) -> DataSet:
"""Select a subset of columns."""
for name in names:
if name not in self._columns:
raise ValueError(f"Column '{name}' not found.")
new_data = [{name: row[name] for name in names} for row in self.data]
return DataSet(new_data)
def group_by(self, column_name: str) -> DataGroup:
"""Group data by a specific column for aggregation."""
if column_name not in self._columns:
raise ValueError(f"Column '{column_name}' not found.")
groups: dict[Scalar, list[Row]] = {}
for row in self.data:
key = row[column_name]
if key not in groups:
groups[key] = []
groups[key].append(row)
return DataGroup(groups, column_name)
def to_list(self) -> list[Row]:
"""Export data as a list of dictionaries."""
return [row.copy() for row in self.data]
def __repr__(self) -> str:
if not self.data:
return "DataSet(empty)"
return f"DataSet(rows={len(self.data)}, cols={len(self._columns)})"
|
Attributes
columns
property
Return the list of column names.
shape
property
Return (rows, columns) tuple.
Methods:
column
column(name: str) -> list[Scalar]
Extract a single column as a list.
Source code in src\cds\data_analysis\dataset.py
| def column(self, name: str) -> list[Scalar]:
"""Extract a single column as a list."""
if name not in self._columns:
raise ValueError(f"Column '{name}' not found. Available: {self._columns}")
return [row[name] for row in self.data]
|
filter
filter(predicate: Callable[[Row], bool]) -> DataSet
Filter the dataset based on a predicate function.
Source code in src\cds\data_analysis\dataset.py
| def filter(self, predicate: Callable[[Row], bool]) -> DataSet:
"""Filter the dataset based on a predicate function."""
filtered_data = [row for row in self.data if predicate(row)]
return DataSet(filtered_data)
|
head
head(n: int = 5) -> DataSet
Return the first n rows.
Source code in src\cds\data_analysis\dataset.py
| def head(self, n: int = 5) -> DataSet:
"""Return the first n rows."""
return DataSet(self.data[:n])
|
tail
tail(n: int = 5) -> DataSet
Return the last n rows.
Source code in src\cds\data_analysis\dataset.py
| def tail(self, n: int = 5) -> DataSet:
"""Return the last n rows."""
return DataSet(self.data[-n:])
|
select
select(*names: str) -> DataSet
Select a subset of columns.
Source code in src\cds\data_analysis\dataset.py
| def select(self, *names: str) -> DataSet:
"""Select a subset of columns."""
for name in names:
if name not in self._columns:
raise ValueError(f"Column '{name}' not found.")
new_data = [{name: row[name] for name in names} for row in self.data]
return DataSet(new_data)
|
group_by
group_by(column_name: str) -> DataGroup
Group data by a specific column for aggregation.
Source code in src\cds\data_analysis\dataset.py
| def group_by(self, column_name: str) -> DataGroup:
"""Group data by a specific column for aggregation."""
if column_name not in self._columns:
raise ValueError(f"Column '{column_name}' not found.")
groups: dict[Scalar, list[Row]] = {}
for row in self.data:
key = row[column_name]
if key not in groups:
groups[key] = []
groups[key].append(row)
return DataGroup(groups, column_name)
|
to_list
Export data as a list of dictionaries.
Source code in src\cds\data_analysis\dataset.py
| def to_list(self) -> list[Row]:
"""Export data as a list of dictionaries."""
return [row.copy() for row in self.data]
|
DataTable
dataclass
In-memory tabular data: a header row plus a list of string rows.
Source code in src\cds\data_analysis\loader.py
| @dataclass
class DataTable:
"""In-memory tabular data: a header row plus a list of string rows."""
headers: list[str] = field(default_factory=list)
rows: list[list[str]] = field(default_factory=list)
@property
def n_rows(self) -> int:
"""Number of data rows (excluding the header)."""
return len(self.rows)
@property
def n_cols(self) -> int:
"""Number of columns (i.e. number of header entries)."""
return len(self.headers)
def column(self, name: str) -> list[str]:
"""Return all values in the column identified by `name`."""
idx = self.headers.index(name)
return [row[idx] for row in self.rows]
def column_as_float(self, name: str) -> list[float]:
"""Return a column as floats; raises ValueError if a cell is non-numeric."""
return [float(v) for v in self.column(name)]
def head(self, n: int = 5) -> list[list[str]]:
"""Return the first `n` rows (default 5) for quick inspection."""
return self.rows[:n]
def describe(self) -> dict[str, dict[str, float]]:
"""Quick summary stats for numeric columns."""
from cds.stats.descriptive import mean, median, stdev
result: dict[str, dict[str, float]] = {}
for h in self.headers:
try:
vals = self.column_as_float(h)
result[h] = {
"count": len(vals),
"mean": mean(vals),
"std": stdev(vals),
"min": min(vals),
"median": median(vals),
"max": max(vals),
}
except (ValueError, TypeError):
# Non-numeric column — skip aggregation, keep prior result.
pass
return result
|
Attributes
n_rows
property
Number of data rows (excluding the header).
n_cols
property
Number of columns (i.e. number of header entries).
Methods:
column
column(name: str) -> list[str]
Return all values in the column identified by name.
Source code in src\cds\data_analysis\loader.py
| def column(self, name: str) -> list[str]:
"""Return all values in the column identified by `name`."""
idx = self.headers.index(name)
return [row[idx] for row in self.rows]
|
column_as_float
column_as_float(name: str) -> list[float]
Return a column as floats; raises ValueError if a cell is non-numeric.
Source code in src\cds\data_analysis\loader.py
| def column_as_float(self, name: str) -> list[float]:
"""Return a column as floats; raises ValueError if a cell is non-numeric."""
return [float(v) for v in self.column(name)]
|
head
head(n: int = 5) -> list[list[str]]
Return the first n rows (default 5) for quick inspection.
Source code in src\cds\data_analysis\loader.py
| def head(self, n: int = 5) -> list[list[str]]:
"""Return the first `n` rows (default 5) for quick inspection."""
return self.rows[:n]
|
describe
describe() -> dict[str, dict[str, float]]
Quick summary stats for numeric columns.
Source code in src\cds\data_analysis\loader.py
| def describe(self) -> dict[str, dict[str, float]]:
"""Quick summary stats for numeric columns."""
from cds.stats.descriptive import mean, median, stdev
result: dict[str, dict[str, float]] = {}
for h in self.headers:
try:
vals = self.column_as_float(h)
result[h] = {
"count": len(vals),
"mean": mean(vals),
"std": stdev(vals),
"min": min(vals),
"median": median(vals),
"max": max(vals),
}
except (ValueError, TypeError):
# Non-numeric column — skip aggregation, keep prior result.
pass
return result
|
Functions:
load_csv
load_csv(path: str | Path) -> DataTable
Load a CSV file into a DataTable.
The first row is treated as headers; remaining rows are stored as strings.
Raises FileNotFoundError if path does not exist.
Source code in src\cds\data_analysis\loader.py
| def load_csv(path: str | Path) -> DataTable:
"""Load a CSV file into a DataTable.
The first row is treated as headers; remaining rows are stored as strings.
Raises FileNotFoundError if `path` does not exist.
"""
p = Path(path)
if not p.exists():
raise FileNotFoundError(f"no such file: {p}")
with open(p, newline="") as f:
reader = csv.reader(f)
headers = next(reader)
rows = list(reader)
return DataTable(headers=headers, rows=rows)
|
moving_average
moving_average(
data: list[float], window: int = 3
) -> list[float]
Trailing moving average with the given window size (>= 1).
Source code in src\cds\data_analysis\transform.py
| def moving_average(data: list[float], window: int = 3) -> list[float]:
"""Trailing moving average with the given window size (>= 1)."""
if window < 1:
raise ValueError("window must be >= 1")
result = []
for i in range(len(data)):
start = max(0, i - window + 1)
chunk = data[start : i + 1]
result.append(sum(chunk) / len(chunk))
return result
|
normalize
normalize(data: list[float]) -> list[float]
Min-max normalization to [0, 1].
Source code in src\cds\data_analysis\transform.py
| def normalize(data: list[float]) -> list[float]:
"""Min-max normalization to [0, 1]."""
lo, hi = min(data), max(data)
rng = hi - lo
if rng == 0:
return [0.0] * len(data)
return [(x - lo) / rng for x in data]
|
z_score
z_score(data: list[float]) -> list[float]
Standardize to mean=0, std=1.
Source code in src\cds\data_analysis\transform.py
| def z_score(data: list[float]) -> list[float]:
"""Standardize to mean=0, std=1."""
m = mean(data)
s = stdev(data)
if s == 0:
return [0.0] * len(data)
return [(x - m) / s for x in data]
|
plot_bar
plot_bar(
data: dict[str, float],
title: str = "Bar Chart",
width: int = 50,
) -> str
Generate an ASCII bar chart from a dictionary.
Parameters:
| Name |
Type |
Description |
Default |
data
|
dict[str, float]
|
Mapping from label to numeric value.
|
required
|
title
|
str
|
|
'Bar Chart'
|
width
|
int
|
Maximum bar width in characters.
|
50
|
Source code in src\cds\data_analysis\viz.py
| def plot_bar(data: dict[str, float], title: str = "Bar Chart", width: int = 50) -> str:
"""Generate an ASCII bar chart from a dictionary.
Args:
data: Mapping from label to numeric value.
title: Chart title.
width: Maximum bar width in characters.
"""
if not data:
return "No data to plot."
vals = list(data.values())
max_val = max(vals)
min_val = min(vals)
lines = [f"\n[bold]{title}[/]", "─" * len(title)]
# Simple normalization logic that respects 0 as a baseline
limit = max(abs(max_val), abs(min_val), 1e-10)
for label, val in data.items():
# Calculate bar length relative to the largest absolute value
bar_len = int((abs(val) / limit) * width)
if val >= 0:
bar = "█" * bar_len
suffix = f" (+{val:.2f})"
else:
# Represent negative values with a different character or notation
bar = "░" * bar_len
suffix = f" ({val:.2f})"
lines.append(f"{label:<15} | {bar}{suffix}")
return "\n".join(lines)
|
plot_line
plot_line(
y_values: list[float],
title: str = "Line Plot",
height: int = 10,
width: int = 60,
) -> str
Generate a simple ASCII line plot.
Parameters:
| Name |
Type |
Description |
Default |
y_values
|
list[float]
|
|
required
|
title
|
str
|
|
'Line Plot'
|
height
|
int
|
|
10
|
width
|
int
|
Number of columns (will sample data to fit).
|
60
|
Source code in src\cds\data_analysis\viz.py
| def plot_line(
y_values: list[float], title: str = "Line Plot", height: int = 10, width: int = 60
) -> str:
"""Generate a simple ASCII line plot.
Args:
y_values: List of numeric values.
title: Plot title.
height: Number of rows.
width: Number of columns (will sample data to fit).
"""
if not y_values:
return "No data to plot."
# Guard: width must be at least 2; otherwise stride below divides by zero.
eff_width = max(2, width)
# Sample/Interpolate to fit width
if len(y_values) > eff_width:
indices = [int(i * (len(y_values) - 1) / (eff_width - 1)) for i in range(eff_width)]
sampled = [y_values[i] for i in indices]
else:
sampled = y_values
eff_width = len(y_values)
max_y = max(sampled)
min_y = min(sampled)
y_range = max_y - min_y if max_y != min_y else 1.0
# Create grid
grid = [[" " for _ in range(eff_width)] for _ in range(height)]
for x, y in enumerate(sampled):
# Calculate row (inverted because row 0 is top)
norm_y = (y - min_y) / y_range
row = height - 1 - int(norm_y * (height - 1))
grid[row][x] = "•"
lines = [f"\n[bold]{title}[/]", "─" * len(title)]
for grid_row in grid:
lines.append("".join(grid_row))
min_y_str = f"{min_y:.2f}"
max_y_str = f"{max_y:.2f}"
lines.append(f"min: {min_y_str:<{eff_width // 2}}max: {max_y_str:>{eff_width // 2}}")
return "\n".join(lines)
|
Educational NLP
From-scratch transformer primitives: BPE tokeniser, sinusoidal embeddings, attention, autograd, MiniGPT.
cds.nlp
Natural language processing primitives in pure Python.
Educational, from-scratch implementations of the building blocks used in
modern language models — byte-pair encoding (BPE) tokenisation, the
sinusoidal token / positional embeddings from the original Transformer
paper, scaled dot-product and multi-head self-attention, the
Transformer encoder block (GeLU FFN, LayerNorm, residual), a
scalar-valued reverse-mode autograd engine with SGD/Adam optimisers,
and a high-level training helper.
Designed for teaching, prototyping, and small-model experiments, not
for production-scale training (no NumPy, no BLAS — performance is
deliberately the trade-off for full transparency). The optional
cds[fast-jit] extra brings in Numba for the matmul hot-path
without changing the public surface.
Why this module exists inside CDS:
- Demonstrates that the core ideas of transformer-era NLP are short,
readable pure-Python code.
- Provides a reproducible playground for tokenisation experiments
without pulling in tokenizers / sentencepiece / torch.
- Closes the loop on the educational NLP track: BPE + embeddings +
attention + autograd = a learner can now train a tiny GPT and
see every gradient flowing back through the graph because the
engine is ~250 lines, not 50,000.
Scope (v0.10.0b1):
- :func:
~cds.nlp.bpe.train_bpe — train a BPE vocabulary from a corpus
- :class:
~cds.nlp.bpe.BPETokenizer — encode / decode / save / load
- :class:
~cds.nlp.embed.TokenEmbedding — token lookup table
- :class:
~cds.nlp.embed.PositionalEncoding — sinusoidal positions
- :func:
~cds.nlp.attention.scaled_dot_product_attention
- :func:
~cds.nlp.attention.multi_head_attention
- :func:
~cds.nlp.attention.causal_mask — decoder self-attention mask
- :func:
~cds.nlp.layers.gelu / :func:~cds.nlp.layers.layer_norm
- :func:
~cds.nlp.layers.feed_forward
- :func:
~cds.nlp.layers.transformer_block
- :class:
~cds.nlp.autograd.Tensor / :class:Parameter — scalar autograd
- :func:
~cds.nlp.autograd.matmul — nested-Tensor matmul
- :class:
~cds.nlp.optim.SGD / :class:Adam — optimisers
- :func:
~cds.nlp.training.cross_entropy / :func:train_step — loss + loop
- :func:
~cds.nlp.viz.render_attention_heatmap — ASCII attention heatmap
- :func:
~cds.nlp.viz.render_embedding_projection — ASCII 2-D PCA scatter
- :func:
~cds.nlp.viz.render_training_curve — ASCII loss curve
Out of scope for the educational track:
- Mixed precision (FP16 / bfloat16) — meaningful only with the
Numba backend, deferred for now.
- Dropout, ALiBi, RoPE, GQA / MQA — modern attention refinements
tracked for a later educational add-on.
- Subword sampling tricks (BPE-Dropout, Unigram LM).
- WordPiece / SentencePiece alternatives.
References
- Sennrich, R., Haddow, B., & Birch, A. (2016). "Neural Machine
Translation of Rare Words with Subword Units." ACL.
- Vaswani, A. et al. (2017). "Attention Is All You Need." NeurIPS.
- Gage, P. (1994). "A New Algorithm for Data Compression." C
Users Journal.
- Kingma, D. P., & Ba, J. (2014). "Adam: A Method for Stochastic
Optimization." arXiv:1412.6980.
- Karpathy, A. (2020). micrograd — the scalar autograd engine
this module imitates.
Classes
Parameter
Bases: Tensor
A :class:Tensor that's a trainable weight.
Subclass of :class:Tensor with requires_grad=True by default.
Use these for everything a model should learn (embeddings, attention
projections, FFN weights, biases, etc.). The optimizer sees them
via :func:cds.nlp.optim.parameters.
Initial values should be small and zero-centred; the simplest
default is to wrap an existing :class:Tensor via
Parameter(tensor.data).
Source code in src\cds\nlp\autograd\tensor.py
| class Parameter(Tensor):
"""A :class:`Tensor` that's a trainable weight.
Subclass of :class:`Tensor` with ``requires_grad=True`` by default.
Use these for everything a model should learn (embeddings, attention
projections, FFN weights, biases, etc.). The optimizer sees them
via :func:`cds.nlp.optim.parameters`.
Initial values should be small and zero-centred; the simplest
default is to wrap an existing :class:`Tensor` via
``Parameter(tensor.data)``.
"""
def __init__(self, value: Scalar) -> None:
super().__init__(data=float(value), requires_grad=True)
|
Tensor
dataclass
A scalar value with optional gradient tracking.
Attributes:
| Name |
Type |
Description |
data |
Scalar
|
The numeric value (always a Python float — the
educational track stays in scalars; vector ops are
expressed as nested Tensor lists).
|
requires_grad |
bool
|
If True, backward() will populate grad.
|
grad |
Scalar
|
The running gradient (initialised to 0.0 on first
backward()).
|
_backward |
BackwardFn
|
A closure set by each op that propagates grad
to _prev. None for leaf nodes.
|
_prev |
set[Tensor]
|
The set of :class:Tensor nodes that produced this
node (the parents in the dep graph).
|
Source code in src\cds\nlp\autograd\tensor.py
| @dataclass(eq=False)
class Tensor:
"""A scalar value with optional gradient tracking.
Attributes:
data: The numeric value (always a Python ``float`` — the
educational track stays in scalars; vector ops are
expressed as nested ``Tensor`` lists).
requires_grad: If True, ``backward()`` will populate ``grad``.
grad: The running gradient (initialised to 0.0 on first
``backward()``).
_backward: A closure set by each op that propagates ``grad``
to ``_prev``. ``None`` for leaf nodes.
_prev: The set of :class:`Tensor` nodes that produced this
node (the parents in the dep graph).
"""
data: Scalar
requires_grad: bool = False
grad: Scalar = 0.0
_backward: BackwardFn = field(default=None, repr=False)
_prev: set[Tensor] = field(default_factory=set, repr=False)
def __repr__(self) -> str:
grad_str = f", grad={self.grad}" if self.requires_grad else ""
return f"Tensor(data={self.data}{grad_str})"
# ------------------------------------------------------------------ #
# Operator overloads — implemented inline so mypy strict sees them.
# ------------------------------------------------------------------ #
def __add__(self, other: Tensor | float | int) -> Tensor:
return _binop("+", self, other)
def __radd__(self, other: float | int) -> Tensor:
return _binop("+", other, self)
def __sub__(self, other: Tensor | float | int) -> Tensor:
return _binop("-", self, other)
def __rsub__(self, other: float | int) -> Tensor:
return _binop("-", other, self)
def __mul__(self, other: Tensor | float | int) -> Tensor:
return _binop("*", self, other)
def __rmul__(self, other: float | int) -> Tensor:
return _binop("*", other, self)
def __truediv__(self, other: Tensor | float | int) -> Tensor:
return _binop("/", self, other)
def __rtruediv__(self, other: float | int) -> Tensor:
return _binop("/", other, self)
def __neg__(self) -> Tensor:
return neg(self)
def __pos__(self) -> Tensor:
return self
def __pow__(self, exponent: float) -> Tensor:
# Return ``NotImplemented`` for unsupported operand types instead of
# raising — this is the Pythonic contract for arithmetic dunders
# (lets Python try the reflected ``__rpow__`` and only raise a real
# ``TypeError`` if neither side can handle it). CodeQL's
# ``unexpected-raise-in-special-method`` flags ``raise`` in dunders
# precisely because it short-circuits that reflection protocol.
if not isinstance(exponent, (int, float)):
# Returning NotImplemented is correct here even though the declared
# return type is Tensor: CPython's binary-operator dispatch
# consumes the value (it never reaches user code), and mypy
# models NotImplemented as compatible with arithmetic-dunder
# return types for exactly this reason.
return NotImplemented
c = float(exponent)
def _backward() -> None:
self.grad += c * (self.data ** (c - 1.0)) * out.grad
out = Tensor(data=self.data**c)
return _track(out, (self,), _backward)
# ------------------------------------------------------------------ #
# Gradient propagation
# ------------------------------------------------------------------ #
def backward(self) -> None:
"""Compute gradients via reverse-mode autodiff.
Builds a post-order traversal of the graph rooted at this
node (children before parents) then walks it in reverse,
calling each ``_backward`` closure to chain the gradient
back to leaves. Sets every visited leaf's ``.grad`` to the
accumulated value.
"""
if not self.requires_grad:
raise RuntimeError("backward() called on a Tensor with requires_grad=False")
# Iterative post-order DFS. We push ``(node, processed)``
# tuples; on the first visit we re-push the node with
# ``processed=True`` after scheduling its children, so the
# node only gets appended to ``topo`` after every descendant
# is already in place.
topo: list[Tensor] = []
visited: set[Tensor] = set()
work: list[tuple[Tensor, bool]] = [(self, False)]
while work:
node, processed = work.pop()
if processed:
topo.append(node)
continue
if node in visited:
continue
visited.add(node)
work.append((node, True))
for child in node._prev:
if child not in visited:
work.append((child, False))
# ``topo`` is post-order (children before parents). Reverse
# so the backward pass walks parents first — that propagates
# the seed gradient correctly.
topo.reverse()
# Seed the output gradient.
self.grad = 1.0
for node in topo:
if node._backward is not None:
node._backward()
def zero_grad(self) -> None:
"""Reset ``grad`` to 0 in this node and all reachable leaves.
Call between training steps so gradients don't accumulate
across batches (PyTorch's ``optim.zero_grad()`` semantics).
"""
topo: list[Tensor] = []
visited: set[Tensor] = set()
stack: list[Tensor] = [self]
while stack:
node = stack.pop()
# Defensive duplicate-pop guard. Unreachable given the LIFO
# stack + the ``child not in visited`` filter below: that pair
# mathematically prevents any node from being pushed twice, so
# the re-pop never happens. Kept to mirror ``backward()``'s
# defensive structure and to stay robust if the push filter
# is ever relaxed.
if node in visited: # pragma: no cover
continue
visited.add(node)
topo.append(node)
for child in node._prev:
if child not in visited:
stack.append(child)
for node in topo:
node.grad = 0.0
|
Methods:
backward
Compute gradients via reverse-mode autodiff.
Builds a post-order traversal of the graph rooted at this
node (children before parents) then walks it in reverse,
calling each _backward closure to chain the gradient
back to leaves. Sets every visited leaf's .grad to the
accumulated value.
Source code in src\cds\nlp\autograd\tensor.py
| def backward(self) -> None:
"""Compute gradients via reverse-mode autodiff.
Builds a post-order traversal of the graph rooted at this
node (children before parents) then walks it in reverse,
calling each ``_backward`` closure to chain the gradient
back to leaves. Sets every visited leaf's ``.grad`` to the
accumulated value.
"""
if not self.requires_grad:
raise RuntimeError("backward() called on a Tensor with requires_grad=False")
# Iterative post-order DFS. We push ``(node, processed)``
# tuples; on the first visit we re-push the node with
# ``processed=True`` after scheduling its children, so the
# node only gets appended to ``topo`` after every descendant
# is already in place.
topo: list[Tensor] = []
visited: set[Tensor] = set()
work: list[tuple[Tensor, bool]] = [(self, False)]
while work:
node, processed = work.pop()
if processed:
topo.append(node)
continue
if node in visited:
continue
visited.add(node)
work.append((node, True))
for child in node._prev:
if child not in visited:
work.append((child, False))
# ``topo`` is post-order (children before parents). Reverse
# so the backward pass walks parents first — that propagates
# the seed gradient correctly.
topo.reverse()
# Seed the output gradient.
self.grad = 1.0
for node in topo:
if node._backward is not None:
node._backward()
|
zero_grad
Reset grad to 0 in this node and all reachable leaves.
Call between training steps so gradients don't accumulate
across batches (PyTorch's optim.zero_grad() semantics).
Source code in src\cds\nlp\autograd\tensor.py
| def zero_grad(self) -> None:
"""Reset ``grad`` to 0 in this node and all reachable leaves.
Call between training steps so gradients don't accumulate
across batches (PyTorch's ``optim.zero_grad()`` semantics).
"""
topo: list[Tensor] = []
visited: set[Tensor] = set()
stack: list[Tensor] = [self]
while stack:
node = stack.pop()
# Defensive duplicate-pop guard. Unreachable given the LIFO
# stack + the ``child not in visited`` filter below: that pair
# mathematically prevents any node from being pushed twice, so
# the re-pop never happens. Kept to mirror ``backward()``'s
# defensive structure and to stay robust if the push filter
# is ever relaxed.
if node in visited: # pragma: no cover
continue
visited.add(node)
topo.append(node)
for child in node._prev:
if child not in visited:
stack.append(child)
for node in topo:
node.grad = 0.0
|
BPEMerge
dataclass
A single BPE merge rule.
Attributes:
| Name |
Type |
Description |
pair |
tuple[str, str]
|
The adjacent symbol pair that was merged.
|
rank |
int
|
Priority — lower rank = applied earlier. When two merges
could both apply, the lower-rank one wins. (Greedy encoding
relies on this.)
|
new_token |
str
|
The merged token string (= pair[0] + pair[1]).
|
Source code in src\cds\nlp\bpe.py
| @dataclass
class BPEMerge:
"""A single BPE merge rule.
Attributes:
pair: The adjacent symbol pair that was merged.
rank: Priority — lower rank = applied earlier. When two merges
could both apply, the lower-rank one wins. (Greedy encoding
relies on this.)
new_token: The merged token string (= ``pair[0] + pair[1]``).
"""
pair: tuple[str, str]
rank: int
new_token: str
def to_dict(self) -> dict[str, object]:
"""Serialize this merge to a JSON-friendly dict."""
return {"pair": list(self.pair), "rank": self.rank, "new_token": self.new_token}
@classmethod
def from_dict(cls, data: dict[str, object]) -> BPEMerge:
"""Reconstruct a :class:`BPEMerge` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys, has the wrong types,
or the ``pair`` does not contain exactly two strings.
"""
pair_raw = data["pair"]
if not isinstance(pair_raw, list) or len(pair_raw) != 2:
raise ValueError(f"Invalid pair in BPE merge: {pair_raw!r}")
a_raw, b_raw = pair_raw[0], pair_raw[1]
if not isinstance(a_raw, str) or not isinstance(b_raw, str):
raise ValueError(f"Invalid pair components: {pair_raw!r}")
rank_raw = data["rank"]
if not isinstance(rank_raw, int) or isinstance(rank_raw, bool):
raise ValueError(f"Invalid rank: {rank_raw!r}")
token_raw = data["new_token"]
if not isinstance(token_raw, str):
raise ValueError(f"Invalid new_token: {token_raw!r}")
return cls(pair=(a_raw, b_raw), rank=rank_raw, new_token=token_raw)
|
Methods:
to_dict
to_dict() -> dict[str, object]
Serialize this merge to a JSON-friendly dict.
Source code in src\cds\nlp\bpe.py
| def to_dict(self) -> dict[str, object]:
"""Serialize this merge to a JSON-friendly dict."""
return {"pair": list(self.pair), "rank": self.rank, "new_token": self.new_token}
|
from_dict
classmethod
from_dict(data: dict[str, object]) -> BPEMerge
Reconstruct a :class:BPEMerge from :meth:to_dict output.
Raises:
| Type |
Description |
ValueError
|
if data is missing keys, has the wrong types,
or the pair does not contain exactly two strings.
|
Source code in src\cds\nlp\bpe.py
| @classmethod
def from_dict(cls, data: dict[str, object]) -> BPEMerge:
"""Reconstruct a :class:`BPEMerge` from :meth:`to_dict` output.
Raises:
ValueError: if ``data`` is missing keys, has the wrong types,
or the ``pair`` does not contain exactly two strings.
"""
pair_raw = data["pair"]
if not isinstance(pair_raw, list) or len(pair_raw) != 2:
raise ValueError(f"Invalid pair in BPE merge: {pair_raw!r}")
a_raw, b_raw = pair_raw[0], pair_raw[1]
if not isinstance(a_raw, str) or not isinstance(b_raw, str):
raise ValueError(f"Invalid pair components: {pair_raw!r}")
rank_raw = data["rank"]
if not isinstance(rank_raw, int) or isinstance(rank_raw, bool):
raise ValueError(f"Invalid rank: {rank_raw!r}")
token_raw = data["new_token"]
if not isinstance(token_raw, str):
raise ValueError(f"Invalid new_token: {token_raw!r}")
return cls(pair=(a_raw, b_raw), rank=rank_raw, new_token=token_raw)
|
BPETokenizer
dataclass
A trained byte-pair encoding tokenizer.
Construct one with :func:train_bpe, or load a previously saved one
with :meth:load. Encoding is greedy: scan input left-to-right,
find the longest contiguous substring that is in the vocab, emit
its id, advance. Unknown characters fall back to <unk> (id 0).
Attributes:
| Name |
Type |
Description |
vocab |
dict[str, int]
|
Mapping from token string → integer id.
|
id_to_token |
dict[int, str]
|
Inverse of vocab (built lazily on demand).
|
merges |
list[BPEMerge]
|
List of BPE merge rules, in training order. The index
of a rule is its priority (lower = earlier).
|
eow |
str
|
End-of-word marker symbol appended during training.
|
Source code in src\cds\nlp\bpe.py
| @dataclass
class BPETokenizer:
"""A trained byte-pair encoding tokenizer.
Construct one with :func:`train_bpe`, or load a previously saved one
with :meth:`load`. Encoding is greedy: scan input left-to-right,
find the longest contiguous substring that is in the vocab, emit
its id, advance. Unknown characters fall back to ``<unk>`` (id 0).
Attributes:
vocab: Mapping from token string → integer id.
id_to_token: Inverse of ``vocab`` (built lazily on demand).
merges: List of BPE merge rules, in training order. The index
of a rule is its priority (lower = earlier).
eow: End-of-word marker symbol appended during training.
"""
vocab: dict[str, int] = field(default_factory=dict)
merges: list[BPEMerge] = field(default_factory=list)
eow: str = _END_OF_WORD
def __post_init__(self) -> None:
# Build the inverse vocabulary. We keep it cached so decode() is
# O(N) rather than O(N log V).
self._id_to_token: dict[int, str] = {i: t for t, i in self.vocab.items()}
@property
def vocab_size(self) -> int:
"""Number of tokens in the vocabulary (including specials)."""
return len(self.vocab)
@property
def id_to_token(self) -> dict[int, str]:
"""Read-only view of id → token mapping."""
return dict(self._id_to_token)
# ------------------------------------------------------------------ #
# Encoding
# ------------------------------------------------------------------ #
def encode(self, text: str, add_specials: bool = False) -> list[int]:
"""Encode ``text`` into a list of token ids.
Greedy longest-match: scan the input, at each position take the
longest substring that exists in the vocabulary. If no character
matches, emit ``<unk>`` and advance by one.
Args:
text: Raw input string. Will be pre-tokenised on whitespace
and punctuation.
add_specials: If True, prepend ``<bos>`` (id 2) and append
``<eos>`` (id 3). Off by default — the educational
pipeline wants raw token streams.
"""
if not self.vocab:
raise ValueError("Vocabulary is empty. Train or load a tokenizer first.")
words = _pre_tokenize(text)
ids: list[int] = []
if add_specials:
bos_id = self.vocab.get(BOS)
if bos_id is not None:
ids.append(bos_id)
for word in words:
ids.extend(self._encode_word(word))
if add_specials:
eos_id = self.vocab.get(EOS)
if eos_id is not None:
ids.append(eos_id)
return ids
def _encode_word(self, word: str) -> list[int]:
"""Greedy-encode a single pre-tokenised word.
We try every contiguous substring starting at the current cursor,
longest first, and pick the first one that's in the vocab. This
is O(L²) per word but L is small (typical English word ≤ 20 chars).
"""
symbols = _word_to_symbols(word)
result: list[int] = []
i = 0
n = len(symbols)
while i < n:
# Longest contiguous substring starting at i that is in vocab.
matched = False
# Walk from i+1 to i+n inclusive, looking for the longest
# substring that exists in vocab.
for j in range(n, i, -1):
candidate = "".join(symbols[i:j])
token_id = self.vocab.get(candidate)
if token_id is not None:
result.append(token_id)
i = j
matched = True
break
if not matched:
# Single character not in vocab — emit unk and skip it.
# This is unreachable in practice because every training
# character is in the base vocab, but keep the guard.
unk_id = self.vocab.get(UNK, 0)
result.append(unk_id)
i += 1
return result
# ------------------------------------------------------------------ #
# Decoding
# ------------------------------------------------------------------ #
def decode(self, ids: list[int], strip_eow: bool = True) -> str:
"""Decode a list of ids back to a string.
Args:
ids: Token id sequence (must be non-negative integers in
``[0, vocab_size)``).
strip_eow: If True (default), drop the ``</w>`` marker from
the end of each word and join with spaces. Works for
both the literal ``</w>`` token *and* merged tokens
that end in ``</w>`` (e.g. ``"low</w>"``). If False,
concatenate the raw token strings.
"""
if strip_eow:
words: list[str] = []
current: list[str] = []
for tid in ids:
tok = self._id_to_token.get(tid)
if tok is None:
raise ValueError(f"Unknown token id: {tid}")
if tok == UNK:
current.append("�")
elif tok.endswith(_END_OF_WORD):
# Either a literal eow token ("</w>") or a merged
# token that ends with it ("low</w>"). Flush the
# current word minus the suffix.
stem = tok[: -len(_END_OF_WORD)]
current.append(stem)
words.append("".join(current))
current = []
else:
current.append(tok)
if current:
words.append("".join(current))
# Collapse empty strings (from consecutive EOS/PAD tokens)
words = [w for w in words if w]
return " ".join(words)
# Raw concatenation — useful for inspecting token boundaries.
return "".join(self._id_to_token.get(tid, "") for tid in ids)
# ------------------------------------------------------------------ #
# Persistence
# ------------------------------------------------------------------ #
def save(self, path: str | Path) -> None:
"""Save the tokenizer to a JSON file.
Format::
{
"vocab": {"<unk>": 0, "a": 1, ...},
"merges": [{"pair": ["a", "b"], "rank": 0, "new_token": "ab"}, ...],
"eow": "</w>"
}
"""
payload = {
"vocab": self.vocab,
"merges": [m.to_dict() for m in self.merges],
"eow": self.eow,
}
Path(path).write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
@classmethod
def load(cls, path: str | Path) -> BPETokenizer:
"""Load a tokenizer previously saved with :meth:`save`."""
data = json.loads(Path(path).read_text(encoding="utf-8"))
merges_raw = data.get("merges", [])
if not isinstance(merges_raw, list):
raise ValueError("Invalid merges field in tokenizer file")
merges: list[BPEMerge] = []
for raw_merge in merges_raw:
if not isinstance(raw_merge, dict):
raise ValueError(f"Invalid merge entry: {raw_merge!r}")
merges.append(BPEMerge.from_dict(raw_merge))
return cls(
vocab=dict(data["vocab"]),
merges=merges,
eow=str(data.get("eow", _END_OF_WORD)),
)
|
Attributes
vocab_size
property
Number of tokens in the vocabulary (including specials).
id_to_token
property
id_to_token: dict[int, str]
Read-only view of id → token mapping.
Methods:
encode
encode(text: str, add_specials: bool = False) -> list[int]
Encode text into a list of token ids.
Greedy longest-match: scan the input, at each position take the
longest substring that exists in the vocabulary. If no character
matches, emit <unk> and advance by one.
Parameters:
| Name |
Type |
Description |
Default |
text
|
str
|
Raw input string. Will be pre-tokenised on whitespace
and punctuation.
|
required
|
add_specials
|
bool
|
If True, prepend <bos> (id 2) and append
<eos> (id 3). Off by default — the educational
pipeline wants raw token streams.
|
False
|
Source code in src\cds\nlp\bpe.py
| def encode(self, text: str, add_specials: bool = False) -> list[int]:
"""Encode ``text`` into a list of token ids.
Greedy longest-match: scan the input, at each position take the
longest substring that exists in the vocabulary. If no character
matches, emit ``<unk>`` and advance by one.
Args:
text: Raw input string. Will be pre-tokenised on whitespace
and punctuation.
add_specials: If True, prepend ``<bos>`` (id 2) and append
``<eos>`` (id 3). Off by default — the educational
pipeline wants raw token streams.
"""
if not self.vocab:
raise ValueError("Vocabulary is empty. Train or load a tokenizer first.")
words = _pre_tokenize(text)
ids: list[int] = []
if add_specials:
bos_id = self.vocab.get(BOS)
if bos_id is not None:
ids.append(bos_id)
for word in words:
ids.extend(self._encode_word(word))
if add_specials:
eos_id = self.vocab.get(EOS)
if eos_id is not None:
ids.append(eos_id)
return ids
|
decode
decode(ids: list[int], strip_eow: bool = True) -> str
Decode a list of ids back to a string.
Parameters:
| Name |
Type |
Description |
Default |
ids
|
list[int]
|
Token id sequence (must be non-negative integers in
[0, vocab_size)).
|
required
|
strip_eow
|
bool
|
If True (default), drop the </w> marker from
the end of each word and join with spaces. Works for
both the literal </w> token and merged tokens
that end in </w> (e.g. "low</w>"). If False,
concatenate the raw token strings.
|
True
|
Source code in src\cds\nlp\bpe.py
| def decode(self, ids: list[int], strip_eow: bool = True) -> str:
"""Decode a list of ids back to a string.
Args:
ids: Token id sequence (must be non-negative integers in
``[0, vocab_size)``).
strip_eow: If True (default), drop the ``</w>`` marker from
the end of each word and join with spaces. Works for
both the literal ``</w>`` token *and* merged tokens
that end in ``</w>`` (e.g. ``"low</w>"``). If False,
concatenate the raw token strings.
"""
if strip_eow:
words: list[str] = []
current: list[str] = []
for tid in ids:
tok = self._id_to_token.get(tid)
if tok is None:
raise ValueError(f"Unknown token id: {tid}")
if tok == UNK:
current.append("�")
elif tok.endswith(_END_OF_WORD):
# Either a literal eow token ("</w>") or a merged
# token that ends with it ("low</w>"). Flush the
# current word minus the suffix.
stem = tok[: -len(_END_OF_WORD)]
current.append(stem)
words.append("".join(current))
current = []
else:
current.append(tok)
if current:
words.append("".join(current))
# Collapse empty strings (from consecutive EOS/PAD tokens)
words = [w for w in words if w]
return " ".join(words)
# Raw concatenation — useful for inspecting token boundaries.
return "".join(self._id_to_token.get(tid, "") for tid in ids)
|
save
save(path: str | Path) -> None
Save the tokenizer to a JSON file.
Format::
{
"vocab": {"<unk>": 0, "a": 1, ...},
"merges": [{"pair": ["a", "b"], "rank": 0, "new_token": "ab"}, ...],
"eow": "</w>"
}
Source code in src\cds\nlp\bpe.py
| def save(self, path: str | Path) -> None:
"""Save the tokenizer to a JSON file.
Format::
{
"vocab": {"<unk>": 0, "a": 1, ...},
"merges": [{"pair": ["a", "b"], "rank": 0, "new_token": "ab"}, ...],
"eow": "</w>"
}
"""
payload = {
"vocab": self.vocab,
"merges": [m.to_dict() for m in self.merges],
"eow": self.eow,
}
Path(path).write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
load
classmethod
load(path: str | Path) -> BPETokenizer
Load a tokenizer previously saved with :meth:save.
Source code in src\cds\nlp\bpe.py
| @classmethod
def load(cls, path: str | Path) -> BPETokenizer:
"""Load a tokenizer previously saved with :meth:`save`."""
data = json.loads(Path(path).read_text(encoding="utf-8"))
merges_raw = data.get("merges", [])
if not isinstance(merges_raw, list):
raise ValueError("Invalid merges field in tokenizer file")
merges: list[BPEMerge] = []
for raw_merge in merges_raw:
if not isinstance(raw_merge, dict):
raise ValueError(f"Invalid merge entry: {raw_merge!r}")
merges.append(BPEMerge.from_dict(raw_merge))
return cls(
vocab=dict(data["vocab"]),
merges=merges,
eow=str(data.get("eow", _END_OF_WORD)),
)
|
PositionalEncoding
dataclass
Sinusoidal positional encoding from Vaswani et al. (2017).
PE(pos, 2i) = sin(pos / 10000^(2i / d_model))
PE(pos, 2i+1) = cos(pos / 10000^(2i / d_model))
where pos is the zero-based position and i indexes the
embedding dimension. The matrix is computed once at construction
and reused for every forward pass.
Attributes:
| Name |
Type |
Description |
max_len |
int
|
Maximum sequence length the encoding supports.
|
d_model |
int
|
Embedding dimensionality (must match the token
embedding it's added to).
|
matrix |
list[list[float]]
|
The precomputed max_len × d_model encoding matrix.
|
Source code in src\cds\nlp\embed.py
| @dataclass
class PositionalEncoding:
"""Sinusoidal positional encoding from Vaswani et al. (2017).
PE(pos, 2i) = sin(pos / 10000^(2i / d_model))
PE(pos, 2i+1) = cos(pos / 10000^(2i / d_model))
where ``pos`` is the zero-based position and ``i`` indexes the
embedding dimension. The matrix is computed once at construction
and reused for every forward pass.
Attributes:
max_len: Maximum sequence length the encoding supports.
d_model: Embedding dimensionality (must match the token
embedding it's added to).
matrix: The precomputed ``max_len × d_model`` encoding matrix.
"""
max_len: int
d_model: int
matrix: list[list[float]] = field(init=False)
def __post_init__(self) -> None:
if self.max_len <= 0:
raise ValueError(f"max_len must be > 0, got {self.max_len}")
if self.d_model <= 0:
raise ValueError(f"d_model must be > 0, got {self.d_model}")
self.matrix = _make_matrix(self.max_len, self.d_model)
for pos in range(self.max_len):
for i in range(self.d_model):
# Even index → sin, odd index → cos. The exponent
# 10000^(2i/d_model) grows geometrically across the
# embedding dimension so each position gets a unique
# low-frequency pattern.
div_term = 10000.0 ** ((2 * (i // 2)) / self.d_model)
angle = pos / div_term
self.matrix[pos][i] = math.sin(angle) if i % 2 == 0 else math.cos(angle)
def forward(self, length: int) -> list[list[float]]:
"""Return the first ``length`` rows of the encoding matrix.
Args:
length: Desired output length (must be ``<= max_len``).
"""
if length < 0:
raise ValueError(f"length must be >= 0, got {length}")
if length > self.max_len:
raise ValueError(f"length {length} exceeds max_len {self.max_len}")
return [list(self.matrix[i]) for i in range(length)]
@property
def shape(self) -> tuple[int, int]:
"""Returns ``(max_len, d_model)``."""
return (self.max_len, self.d_model)
|
Attributes
shape
property
Returns (max_len, d_model).
Methods:
forward
forward(length: int) -> list[list[float]]
Return the first length rows of the encoding matrix.
Parameters:
| Name |
Type |
Description |
Default |
length
|
int
|
Desired output length (must be <= max_len).
|
required
|
Source code in src\cds\nlp\embed.py
| def forward(self, length: int) -> list[list[float]]:
"""Return the first ``length`` rows of the encoding matrix.
Args:
length: Desired output length (must be ``<= max_len``).
"""
if length < 0:
raise ValueError(f"length must be >= 0, got {length}")
if length > self.max_len:
raise ValueError(f"length {length} exceeds max_len {self.max_len}")
return [list(self.matrix[i]) for i in range(length)]
|
TokenEmbedding
dataclass
A token-id → dense-vector lookup table.
Initialised with small random values from a fixed RNG seed so the
educational pipeline is reproducible. Training (gradient updates)
is handled by the autograd module; this layer is read-only until
then — call :meth:forward to embed, then call :meth:set_value
to hand-write weights.
Attributes:
| Name |
Type |
Description |
vocab_size |
int
|
Number of rows in the embedding table.
|
d_model |
int
|
Embedding dimensionality (output width).
|
matrix |
list[list[float]]
|
The vocab_size × d_model weight matrix as nested
lists. matrix[id][j] is the j-th component of the
embedding for token id id.
|
Source code in src\cds\nlp\embed.py
| @dataclass
class TokenEmbedding:
"""A token-id → dense-vector lookup table.
Initialised with small random values from a fixed RNG seed so the
educational pipeline is reproducible. Training (gradient updates)
is handled by the autograd module; this layer is read-only until
then — call :meth:`forward` to embed, then call :meth:`set_value`
to hand-write weights.
Attributes:
vocab_size: Number of rows in the embedding table.
d_model: Embedding dimensionality (output width).
matrix: The ``vocab_size × d_model`` weight matrix as nested
lists. ``matrix[id][j]`` is the j-th component of the
embedding for token id ``id``.
"""
vocab_size: int
d_model: int
matrix: list[list[float]] = field(init=False)
def __post_init__(self) -> None:
if self.vocab_size <= 0:
raise ValueError(f"vocab_size must be > 0, got {self.vocab_size}")
if self.d_model <= 0:
raise ValueError(f"d_model must be > 0, got {self.d_model}")
# Deterministic init via a fixed seed so test runs are
# reproducible. The exact init scheme is Xavier/Glorot-uniform,
# truncated so that max weight ≈ 1/sqrt(d_model).
import random
rng = random.Random(0xC0FFEE)
bound = 1.0 / math.sqrt(self.d_model)
self.matrix = [
[rng.uniform(-bound, bound) for _ in range(self.d_model)]
for _ in range(self.vocab_size)
]
def forward(self, ids: list[int]) -> list[list[float]]:
"""Look up embeddings for a sequence of token ids.
Args:
ids: List of token ids (must be in ``[0, vocab_size)``).
Returns:
A ``len(ids) × d_model`` matrix (nested list).
"""
out: list[list[float]] = []
for tid in ids:
if tid < 0 or tid >= self.vocab_size:
raise IndexError(f"Token id {tid} out of range [0, {self.vocab_size})")
# Defensive copy — callers might mutate the result without
# poisoning the table.
out.append(list(self.matrix[tid]))
return out
def set_value(self, token_id: int, values: list[float]) -> None:
"""Overwrite the embedding for ``token_id`` (used by tests)."""
if len(values) != self.d_model:
raise ValueError(f"values length {len(values)} != d_model {self.d_model}")
self.matrix[token_id] = list(values)
@property
def shape(self) -> tuple[int, int]:
"""Returns ``(vocab_size, d_model)``."""
return (self.vocab_size, self.d_model)
|
Attributes
shape
property
Returns (vocab_size, d_model).
Methods:
forward
forward(ids: list[int]) -> list[list[float]]
Look up embeddings for a sequence of token ids.
Parameters:
| Name |
Type |
Description |
Default |
ids
|
list[int]
|
List of token ids (must be in [0, vocab_size)).
|
required
|
Returns:
| Type |
Description |
list[list[float]]
|
A len(ids) × d_model matrix (nested list).
|
Source code in src\cds\nlp\embed.py
| def forward(self, ids: list[int]) -> list[list[float]]:
"""Look up embeddings for a sequence of token ids.
Args:
ids: List of token ids (must be in ``[0, vocab_size)``).
Returns:
A ``len(ids) × d_model`` matrix (nested list).
"""
out: list[list[float]] = []
for tid in ids:
if tid < 0 or tid >= self.vocab_size:
raise IndexError(f"Token id {tid} out of range [0, {self.vocab_size})")
# Defensive copy — callers might mutate the result without
# poisoning the table.
out.append(list(self.matrix[tid]))
return out
|
set_value
set_value(token_id: int, values: list[float]) -> None
Overwrite the embedding for token_id (used by tests).
Source code in src\cds\nlp\embed.py
| def set_value(self, token_id: int, values: list[float]) -> None:
"""Overwrite the embedding for ``token_id`` (used by tests)."""
if len(values) != self.d_model:
raise ValueError(f"values length {len(values)} != d_model {self.d_model}")
self.matrix[token_id] = list(values)
|
SGD
dataclass
Stochastic gradient descent with optional momentum.
Parameters:
| Name |
Type |
Description |
Default |
params
|
list[Tensor]
|
Iterable of :class:Parameter (or any
:class:cds.nlp.autograd.Tensor with requires_grad=True)
to update.
|
required
|
lr
|
float
|
Learning rate. Must be positive.
|
SGD_DEFAULT_LR
|
momentum
|
float
|
Momentum factor in [0, 1). 0 reduces to
vanilla SGD. > 0 updates each parameter with
v = momentum * v + grad; p -= lr * v.
|
0.0
|
weight_decay
|
float
|
Optional L2 penalty coefficient. Adds
weight_decay * p.data to the gradient at every step.
|
0.0
|
Source code in src\cds\nlp\optim.py
| @dataclass
class SGD:
"""Stochastic gradient descent with optional momentum.
Args:
params: Iterable of :class:`Parameter` (or any
:class:`cds.nlp.autograd.Tensor` with ``requires_grad=True``)
to update.
lr: Learning rate. Must be positive.
momentum: Momentum factor in ``[0, 1)``. ``0`` reduces to
vanilla SGD. ``> 0`` updates each parameter with
``v = momentum * v + grad; p -= lr * v``.
weight_decay: Optional L2 penalty coefficient. Adds
``weight_decay * p.data`` to the gradient at every step.
"""
params: list[Tensor]
lr: float = SGD_DEFAULT_LR
momentum: float = 0.0
weight_decay: float = 0.0
_velocities: list[float] = field(init=False, default_factory=list)
def __post_init__(self) -> None:
if self.lr <= 0:
raise ValueError(f"lr must be > 0, got {self.lr}")
if not 0.0 <= self.momentum < 1.0:
raise ValueError(f"momentum must be in [0, 1), got {self.momentum}")
if self.weight_decay < 0:
raise ValueError(f"weight_decay must be >= 0, got {self.weight_decay}")
# Allocate a velocity slot per parameter.
self._velocities = [0.0] * len(self.params)
def step(self) -> None:
"""Apply one update to each parameter.
Must be called *after* ``loss.backward()`` and *before*
``zero_grad()`` — otherwise the gradient buffer will be
overwritten on the next forward pass.
"""
for i, p in enumerate(self.params):
grad = p.grad + self.weight_decay * p.data if self.weight_decay else p.grad
if self.momentum == 0.0:
p.data -= self.lr * grad
else:
self._velocities[i] = self.momentum * self._velocities[i] + grad
p.data -= self.lr * self._velocities[i]
def zero_grad(self) -> None:
"""Reset all parameter gradients to 0. Call between batches."""
for p in self.params:
p.grad = 0.0
|
Methods:
step
Apply one update to each parameter.
Must be called after loss.backward() and before
zero_grad() — otherwise the gradient buffer will be
overwritten on the next forward pass.
Source code in src\cds\nlp\optim.py
| def step(self) -> None:
"""Apply one update to each parameter.
Must be called *after* ``loss.backward()`` and *before*
``zero_grad()`` — otherwise the gradient buffer will be
overwritten on the next forward pass.
"""
for i, p in enumerate(self.params):
grad = p.grad + self.weight_decay * p.data if self.weight_decay else p.grad
if self.momentum == 0.0:
p.data -= self.lr * grad
else:
self._velocities[i] = self.momentum * self._velocities[i] + grad
p.data -= self.lr * self._velocities[i]
|
zero_grad
Reset all parameter gradients to 0. Call between batches.
Source code in src\cds\nlp\optim.py
| def zero_grad(self) -> None:
"""Reset all parameter gradients to 0. Call between batches."""
for p in self.params:
p.grad = 0.0
|
Adam
dataclass
Adam optimiser (Kingma & Ba 2014).
Maintains per-parameter first and second moment estimates with
bias correction. Defaults match the paper (betas=(0.9, 0.999),
eps=1e-8).
Parameters:
| Name |
Type |
Description |
Default |
params
|
list[Tensor]
|
|
required
|
lr
|
float
|
Learning rate. Typical values for transformer training
are in the 3e-4 to 1e-3 range.
|
ADAM_DEFAULT_LR
|
betas
|
tuple[float, float]
|
Coefficients for the first and second moment moving
averages.
|
ADAM_DEFAULT_BETAS
|
eps
|
float
|
Epsilon for numerical stability in the denominator.
|
ADAM_DEFAULT_EPS
|
weight_decay
|
float
|
Optional L2 penalty coefficient.
|
0.0
|
Source code in src\cds\nlp\optim.py
| @dataclass
class Adam:
"""Adam optimiser (Kingma & Ba 2014).
Maintains per-parameter first and second moment estimates with
bias correction. Defaults match the paper (``betas=(0.9, 0.999)``,
``eps=1e-8``).
Args:
params: Trainable parameters.
lr: Learning rate. Typical values for transformer training
are in the ``3e-4`` to ``1e-3`` range.
betas: Coefficients for the first and second moment moving
averages.
eps: Epsilon for numerical stability in the denominator.
weight_decay: Optional L2 penalty coefficient.
"""
params: list[Tensor]
lr: float = ADAM_DEFAULT_LR
betas: tuple[float, float] = ADAM_DEFAULT_BETAS
eps: float = ADAM_DEFAULT_EPS
weight_decay: float = 0.0
_t: int = field(init=False, default=0)
_m: list[float] = field(init=False, default_factory=list)
_v: list[float] = field(init=False, default_factory=list)
def __post_init__(self) -> None:
if self.lr <= 0:
raise ValueError(f"lr must be > 0, got {self.lr}")
if not (0.0 <= self.betas[0] < 1.0 and 0.0 <= self.betas[1] < 1.0):
raise ValueError(f"betas must each be in [0, 1), got {self.betas}")
if self.eps <= 0:
raise ValueError(f"eps must be > 0, got {self.eps}")
if self.weight_decay < 0:
raise ValueError(f"weight_decay must be >= 0, got {self.weight_decay}")
self._m = [0.0] * len(self.params)
self._v = [0.0] * len(self.params)
def step(self) -> None:
"""Apply one update. Increment step counter internally."""
self._t += 1
b1, b2 = self.betas
for i, p in enumerate(self.params):
grad = p.grad + self.weight_decay * p.data if self.weight_decay else p.grad
self._m[i] = b1 * self._m[i] + (1.0 - b1) * grad
self._v[i] = b2 * self._v[i] + (1.0 - b2) * (grad * grad)
# Bias correction — important in the first few hundred
# steps when the moving averages are still warming up.
m_hat = self._m[i] / (1.0 - b1**self._t)
v_hat = self._v[i] / (1.0 - b2**self._t)
p.data -= self.lr * m_hat / (math.sqrt(v_hat) + self.eps)
def zero_grad(self) -> None:
"""Reset ``.grad`` to ``0.0`` on every managed parameter."""
for p in self.params:
p.grad = 0.0
|
Methods:
step
Apply one update. Increment step counter internally.
Source code in src\cds\nlp\optim.py
| def step(self) -> None:
"""Apply one update. Increment step counter internally."""
self._t += 1
b1, b2 = self.betas
for i, p in enumerate(self.params):
grad = p.grad + self.weight_decay * p.data if self.weight_decay else p.grad
self._m[i] = b1 * self._m[i] + (1.0 - b1) * grad
self._v[i] = b2 * self._v[i] + (1.0 - b2) * (grad * grad)
# Bias correction — important in the first few hundred
# steps when the moving averages are still warming up.
m_hat = self._m[i] / (1.0 - b1**self._t)
v_hat = self._v[i] / (1.0 - b2**self._t)
p.data -= self.lr * m_hat / (math.sqrt(v_hat) + self.eps)
|
zero_grad
Reset .grad to 0.0 on every managed parameter.
Source code in src\cds\nlp\optim.py
| def zero_grad(self) -> None:
"""Reset ``.grad`` to ``0.0`` on every managed parameter."""
for p in self.params:
p.grad = 0.0
|
Functions:
causal_mask
causal_mask(n: int) -> list[list[float]]
Upper-triangular -inf mask for decoder self-attention.
Position i may attend to positions 0..=i and nothing else.
The mask is added to the pre-softmax scores, so the -inf entries
become zero probability after softmax.
Source code in src\cds\nlp\attention.py
| def causal_mask(n: int) -> list[list[float]]:
"""Upper-triangular ``-inf`` mask for decoder self-attention.
Position ``i`` may attend to positions ``0..=i`` and nothing else.
The mask is added to the pre-softmax scores, so the ``-inf`` entries
become zero probability after softmax.
"""
if n < 0:
raise ValueError(f"n must be >= 0, got {n}")
return [[0.0 if j <= i else _NEG_INF for j in range(n)] for i in range(n)]
|
merge_heads
merge_heads(
heads: list[list[list[float]]],
) -> list[list[float]]
Inverse of :func:split_heads: (n_heads, n, d_head) → (n, d_model).
Source code in src\cds\nlp\attention.py
| def merge_heads(
heads: list[list[list[float]]],
) -> list[list[float]]:
"""Inverse of :func:`split_heads`: ``(n_heads, n, d_head) → (n, d_model)``."""
if not heads:
return []
n_heads = len(heads)
n = len(heads[0])
if n == 0:
return [[] for _ in range(n)]
d_head = len(heads[0][0])
d_model = n_heads * d_head
out: list[list[float]] = _make_matrix(n, d_model)
for h in range(n_heads):
for i in range(n):
for j in range(d_head):
out[i][h * d_head + j] = heads[h][i][j]
return out
|
multi_head_attention
multi_head_attention(
x: list[list[float]],
w_q: list[list[float]],
w_k: list[list[float]],
w_v: list[list[float]],
w_o: list[list[float]],
n_heads: int,
mask: list[list[float]] | None = None,
) -> list[list[float]]
Multi-head self-attention (Vaswani 2017 §3.2.2).
Parameters:
| Name |
Type |
Description |
Default |
x
|
list[list[float]]
|
Input sequence, shape (n, d_model).
|
required
|
w_q
|
list[list[float]]
|
Projection matrix for queries, shape (d_model, d_model).
|
required
|
w_k
|
list[list[float]]
|
Projection matrix for keys, shape (d_model, d_model).
|
required
|
w_v
|
list[list[float]]
|
Projection matrix for values, shape (d_model, d_model).
|
required
|
w_o
|
list[list[float]]
|
Output projection, shape (d_model, d_model).
|
required
|
n_heads
|
int
|
Number of attention heads. Must divide d_model.
|
required
|
mask
|
list[list[float]] | None
|
Optional additive mask broadcast across all heads,
shape (n, n).
|
None
|
Returns:
| Type |
Description |
list[list[float]]
|
Output sequence, shape (n, d_model).
|
Source code in src\cds\nlp\attention.py
| def multi_head_attention(
x: list[list[float]],
w_q: list[list[float]],
w_k: list[list[float]],
w_v: list[list[float]],
w_o: list[list[float]],
n_heads: int,
mask: list[list[float]] | None = None,
) -> list[list[float]]:
"""Multi-head self-attention (Vaswani 2017 §3.2.2).
Args:
x: Input sequence, shape ``(n, d_model)``.
w_q: Projection matrix for queries, shape ``(d_model, d_model)``.
w_k: Projection matrix for keys, shape ``(d_model, d_model)``.
w_v: Projection matrix for values, shape ``(d_model, d_model)``.
w_o: Output projection, shape ``(d_model, d_model)``.
n_heads: Number of attention heads. Must divide ``d_model``.
mask: Optional additive mask broadcast across all heads,
shape ``(n, n)``.
Returns:
Output sequence, shape ``(n, d_model)``.
"""
if not x:
return []
d_model = len(x[0])
if d_model % n_heads != 0:
raise ValueError(f"d_model {d_model} not divisible by n_heads {n_heads}")
# 1. Project to Q, K, V.
q = matmul(x, w_q)
k = matmul(x, w_k)
v = matmul(x, w_v)
# 2. Split into heads, run per-head attention.
qh = split_heads(q, n_heads)
kh = split_heads(k, n_heads)
vh = split_heads(v, n_heads)
head_outputs = [scaled_dot_product_attention(qh[h], kh[h], vh[h], mask) for h in range(n_heads)]
# 3. Concatenate heads, then project to the output space.
merged = merge_heads(head_outputs)
return matmul(merged, w_o)
|
scaled_dot_product_attention
scaled_dot_product_attention(
q: list[list[float]],
k: list[list[float]],
v: list[list[float]],
mask: list[list[float]] | None = None,
) -> list[list[float]]
Compute softmax(Q K^T / sqrt(d_k) + mask) V.
Parameters:
| Name |
Type |
Description |
Default |
q
|
list[list[float]]
|
Query matrix of shape (n_q, d_k).
|
required
|
k
|
list[list[float]]
|
Key matrix of shape (n_k, d_k).
|
required
|
v
|
list[list[float]]
|
Value matrix of shape (n_k, d_v). n_k must equal
n_q for self-attention; cross-attention uses a
different n_k.
|
required
|
mask
|
list[list[float]] | None
|
Optional additive mask of shape (n_q, n_k). Use
0.0 to keep a position and -inf to suppress it.
The mask is added to the scaled scores before softmax.
|
None
|
Returns:
| Type |
Description |
list[list[float]]
|
A matrix of shape (n_q, d_v).
|
Source code in src\cds\nlp\attention.py
| def scaled_dot_product_attention(
q: list[list[float]],
k: list[list[float]],
v: list[list[float]],
mask: list[list[float]] | None = None,
) -> list[list[float]]:
"""Compute ``softmax(Q K^T / sqrt(d_k) + mask) V``.
Args:
q: Query matrix of shape ``(n_q, d_k)``.
k: Key matrix of shape ``(n_k, d_k)``.
v: Value matrix of shape ``(n_k, d_v)``. ``n_k`` must equal
``n_q`` for self-attention; cross-attention uses a
different ``n_k``.
mask: Optional additive mask of shape ``(n_q, n_k)``. Use
``0.0`` to keep a position and ``-inf`` to suppress it.
The mask is *added* to the scaled scores before softmax.
Returns:
A matrix of shape ``(n_q, d_v)``.
"""
if not q or not k or not v:
return []
d_k = len(q[0])
if d_k == 0:
raise ValueError("q has zero width (d_k = 0)")
n_q = len(q)
n_k = len(k)
if len(v) != n_k:
raise ValueError(f"k has {n_k} rows but v has {len(v)}")
if len(k[0]) != d_k:
raise ValueError(f"q and k widths differ: {d_k} vs {len(k[0])}")
if mask is not None and (len(mask) != n_q or len(mask[0]) != n_k):
raise ValueError(
f"mask shape {len(mask)}x{len(mask[0]) if mask else 0} "
f"does not match attention shape {n_q}x{n_k}"
)
# scores = Q K^T / sqrt(d_k) — the scaling keeps the dot products
# in a regime where softmax gradients are well-behaved.
scale = 1.0 / math.sqrt(d_k)
k_t = transpose(k)
scores = matmul(q, k_t)
for i in range(n_q):
row = scores[i]
for j in range(n_k):
row[j] = row[j] * scale + (mask[i][j] if mask is not None else 0.0)
# softmax row-wise, then multiply by V.
attn_weights = [softmax(row) for row in scores]
return matmul(attn_weights, v)
|
softmax
softmax(x: list[float]) -> list[float]
Numerically stable softmax for a 1-D list.
Subtracts the max before exp to avoid overflow on large
inputs; the resulting distribution is invariant to the shift.
Empty input returns []; the result always sums to 1.0
(within float precision).
Source code in src\cds\nlp\attention.py
| def softmax(x: list[float]) -> list[float]:
"""Numerically stable softmax for a 1-D list.
Subtracts the max before ``exp`` to avoid overflow on large
inputs; the resulting distribution is invariant to the shift.
Empty input returns ``[]``; the result always sums to 1.0
(within float precision).
"""
if not x:
return []
m = max(x)
exps = [math.exp(xi - m) for xi in x]
total = sum(exps)
if total == 0.0 or math.isnan(total):
# Degenerate: all inputs were -inf (``-inf - -inf`` is NaN, so
# ``exp`` returns 0 / NaN and the sum is 0 or NaN). Return a
# uniform distribution to keep downstream matmul finite.
n = len(x)
return [1.0 / n] * n
return [e / total for e in exps]
|
split_heads
split_heads(
x: list[list[float]], n_heads: int
) -> list[list[list[float]]]
Split the last dim of (n, d_model) into n_heads slices.
Returns a list [n_heads][n][d_head] where d_head = d_model /
n_heads. Equivalent to x.view(n, n_heads, d_head).transpose(0, 1)
in PyTorch's convention.
Source code in src\cds\nlp\attention.py
| def split_heads(
x: list[list[float]],
n_heads: int,
) -> list[list[list[float]]]:
"""Split the last dim of ``(n, d_model)`` into ``n_heads`` slices.
Returns a list ``[n_heads][n][d_head]`` where ``d_head = d_model /
n_heads``. Equivalent to ``x.view(n, n_heads, d_head).transpose(0, 1)``
in PyTorch's convention.
"""
if n_heads <= 0:
raise ValueError(f"n_heads must be > 0, got {n_heads}")
if not x:
return [[] for _ in range(n_heads)]
n = len(x)
d_model = len(x[0])
if d_model % n_heads != 0:
raise ValueError(f"d_model {d_model} is not divisible by n_heads {n_heads}")
d_head = d_model // n_heads
return [
[[x[i][h * d_head + j] for j in range(d_head)] for i in range(n)] for h in range(n_heads)
]
|
transpose
transpose(m: list[list[float]]) -> list[list[float]]
Transpose a nested-list matrix.
Source code in src\cds\nlp\attention.py
| def transpose(m: list[list[float]]) -> list[list[float]]:
"""Transpose a nested-list matrix."""
if not m:
return []
rows = len(m)
cols = len(m[0])
return [[m[r][c] for r in range(rows)] for c in range(cols)]
|
add
add(a: Tensor, b: Tensor) -> Tensor
a + b with reverse-mode grad ∂/∂a = ∂/∂b = out.grad.
Source code in src\cds\nlp\autograd\tensor.py
| def add(a: Tensor, b: Tensor) -> Tensor:
"""``a + b`` with reverse-mode grad ``∂/∂a = ∂/∂b = out.grad``."""
def _backward() -> None:
a.grad += out.grad
b.grad += out.grad
out = Tensor(data=a.data + b.data)
return _track(out, (a, b), _backward)
|
div
div(a: Tensor, b: Tensor) -> Tensor
a / b with reverse-mode grad via the quotient rule.
Source code in src\cds\nlp\autograd\tensor.py
| def div(a: Tensor, b: Tensor) -> Tensor:
"""``a / b`` with reverse-mode grad via the quotient rule."""
def _backward() -> None:
a.grad += out.grad / b.data
b.grad -= a.data * out.grad / (b.data * b.data)
out = Tensor(data=a.data / b.data)
return _track(out, (a, b), _backward)
|
exp
exp(a) with reverse-mode grad ∂/∂a = exp(a) * out.grad.
Source code in src\cds\nlp\autograd\ops.py
| def exp(a: Tensor) -> Tensor:
"""``exp(a)`` with reverse-mode grad ``∂/∂a = exp(a) * out.grad``."""
def _backward() -> None:
a.grad += math.exp(a.data) * out.grad
out = Tensor(data=math.exp(a.data))
return _track(out, (a,), _backward)
|
log
Natural log. ∂/∂a = out.grad / a.data.
Raises :class:ValueError for non-positive input — the gradient
is undefined there.
Source code in src\cds\nlp\autograd\ops.py
| def log(a: Tensor) -> Tensor:
"""Natural log. ``∂/∂a = out.grad / a.data``.
Raises :class:`ValueError` for non-positive input — the gradient
is undefined there.
"""
def _backward() -> None:
# Unreachable: the forward guard below rejects ``a.data <= 0`` before
# any backward is queued, so ``a.data`` can never be 0 here. The check
# is kept as a defensive assertion against future callers that bypass
# the public ``log`` entrypoint.
if a.data == 0.0: # pragma: no cover
raise ValueError("log(0) gradient is undefined")
a.grad += out.grad / a.data
if a.data <= 0:
raise ValueError(f"log requires positive input, got {a.data}")
out = Tensor(data=math.log(a.data))
return _track(out, (a,), _backward)
|
matmul
matmul(
a: list[list[Tensor]], b: list[list[Tensor]]
) -> list[list[Tensor]]
Matrix multiply for nested :class:Tensor matrices.
a has shape (m, p); b has shape (p, n). The result
is an (m, n) matrix whose entries are :class:Tensor nodes
connected to the inputs via the dep graph.
Implementation is the textbook triple loop. Each inner product
uses scalar autograd (one multiply + accumulate) so every entry
in the result gets a backward fn that propagates to the
contributing a and b entries.
For a 50K-param model this is the hot path — the pure-Python
implementation stays as-is; the optional cds[fast-jit] Numba
backend wraps the inner loop for ~10x speed-up without
changing the autograd semantics.
Source code in src\cds\nlp\autograd\ops.py
| def matmul(a: list[list[Tensor]], b: list[list[Tensor]]) -> list[list[Tensor]]:
"""Matrix multiply for nested :class:`Tensor` matrices.
``a`` has shape ``(m, p)``; ``b`` has shape ``(p, n)``. The result
is an ``(m, n)`` matrix whose entries are :class:`Tensor` nodes
connected to the inputs via the dep graph.
Implementation is the textbook triple loop. Each inner product
uses scalar autograd (one multiply + accumulate) so every entry
in the result gets a backward fn that propagates to the
contributing ``a`` and ``b`` entries.
For a 50K-param model this is the hot path — the pure-Python
implementation stays as-is; the optional ``cds[fast-jit]`` Numba
backend wraps the inner loop for ~10x speed-up without
changing the autograd semantics.
"""
if not a or not b or not a[0] or not b[0]:
return []
m = len(a)
p = len(a[0])
if len(b) != p:
raise ValueError(f"matmul shape mismatch: a has {p} cols, b has {len(b)} rows")
n = len(b[0])
# Allocate result as a (m, n) matrix of zero-constant Tensors so
# we can mutate them in place. The constant stays out of the
# autograd graph because ``_track`` skips it (no grad children).
zero = Tensor(data=0.0, requires_grad=False)
out: list[list[Tensor]] = [[zero for _ in range(n)] for _ in range(m)]
for i in range(m):
for j in range(n):
acc = Tensor(data=0.0, requires_grad=False)
for k in range(p):
# acc += a[i][k] * b[k][j] (scalar autograd chain)
prod = _tracked_mul(a[i][k], b[k][j])
acc = _tracked_add(acc, prod)
out[i][j] = acc
return out
|
mul
mul(a: Tensor, b: Tensor) -> Tensor
a * b with reverse-mode grad via the product rule.
Source code in src\cds\nlp\autograd\tensor.py
| def mul(a: Tensor, b: Tensor) -> Tensor:
"""``a * b`` with reverse-mode grad via the product rule."""
def _backward() -> None:
a.grad += b.data * out.grad
b.grad += a.data * out.grad
out = Tensor(data=a.data * b.data)
return _track(out, (a, b), _backward)
|
neg
Unary negation.
Source code in src\cds\nlp\autograd\tensor.py
| def neg(a: Tensor) -> Tensor:
"""Unary negation."""
def _backward() -> None:
a.grad -= out.grad
out = Tensor(data=-a.data)
return _track(out, (a,), _backward)
|
no_grad
Return a context manager that disables grad tracking.
Source code in src\cds\nlp\autograd\_grad.py
| def no_grad() -> _NoGrad:
"""Return a context manager that disables grad tracking."""
return _NoGrad()
|
relu
relu(a: Tensor) -> Tensor
Rectified linear unit. ∂/∂a = out.grad if a > 0 else 0.
Source code in src\cds\nlp\autograd\ops.py
| def relu(a: Tensor) -> Tensor:
"""Rectified linear unit. ``∂/∂a = out.grad if a > 0 else 0``."""
def _backward() -> None:
if a.data > 0:
a.grad += out.grad
out = Tensor(data=max(0.0, a.data))
return _track(out, (a,), _backward)
|
sub
sub(a: Tensor, b: Tensor) -> Tensor
a - b with reverse-mode grad ∂/∂a = +out.grad, ∂/∂b = -out.grad.
Source code in src\cds\nlp\autograd\tensor.py
| def sub(a: Tensor, b: Tensor) -> Tensor:
"""``a - b`` with reverse-mode grad ``∂/∂a = +out.grad, ∂/∂b = -out.grad``."""
def _backward() -> None:
a.grad += out.grad
b.grad -= out.grad
out = Tensor(data=a.data - b.data)
return _track(out, (a, b), _backward)
|
train_bpe
train_bpe(
corpus: str,
vocab_size: int = 1000,
min_frequency: int = 2,
show_progress: bool = False,
) -> BPETokenizer
Train a BPE tokenizer on a raw text corpus.
The training procedure follows Sennrich et al. (2016) — start with
a base vocabulary of every distinct character in the corpus, then
repeatedly merge the most frequent adjacent symbol pair until the
vocabulary reaches vocab_size.
Parameters:
| Name |
Type |
Description |
Default |
corpus
|
str
|
Raw training text. UTF-8 string; will be normalised to
NFC implicitly by Python's str handling.
|
required
|
vocab_size
|
int
|
Target vocabulary size (including the four reserved
special tokens). The actual vocabulary may be smaller if
the corpus has fewer unique characters — training stops when
no pair exceeds min_frequency.
|
1000
|
min_frequency
|
int
|
Stop merging when the most frequent pair has
count ≤ this. Prevents pathological merges from rare noise.
|
2
|
show_progress
|
bool
|
If True, print each merge as it happens. Off by
default for clean test output.
|
False
|
Returns:
| Type |
Description |
BPETokenizer
|
A fully populated :class:BPETokenizer.
|
Raises:
| Type |
Description |
ValueError
|
If corpus is empty or vocab_size < 5 (must
fit the four reserved tokens + at least one real token).
|
Example
tk = train_bpe("low low low lower lower newest newest newest", vocab_size=20)
"low" in tk.vocab and "est" in tk.vocab
True
Source code in src\cds\nlp\bpe.py
| def train_bpe(
corpus: str,
vocab_size: int = 1000,
min_frequency: int = 2,
show_progress: bool = False,
) -> BPETokenizer:
"""Train a BPE tokenizer on a raw text corpus.
The training procedure follows Sennrich et al. (2016) — start with
a base vocabulary of every distinct character in the corpus, then
repeatedly merge the most frequent adjacent symbol pair until the
vocabulary reaches ``vocab_size``.
Args:
corpus: Raw training text. UTF-8 string; will be normalised to
NFC implicitly by Python's str handling.
vocab_size: Target vocabulary size (including the four reserved
special tokens). The actual vocabulary may be smaller if
the corpus has fewer unique characters — training stops when
no pair exceeds ``min_frequency``.
min_frequency: Stop merging when the most frequent pair has
count ≤ this. Prevents pathological merges from rare noise.
show_progress: If True, print each merge as it happens. Off by
default for clean test output.
Returns:
A fully populated :class:`BPETokenizer`.
Raises:
ValueError: If ``corpus`` is empty or ``vocab_size < 5`` (must
fit the four reserved tokens + at least one real token).
Example:
>>> tk = train_bpe("low low low lower lower newest newest newest", vocab_size=20)
>>> "low" in tk.vocab and "est" in tk.vocab
True
"""
if not corpus:
raise ValueError("Cannot train BPE on an empty corpus")
if vocab_size < 5:
raise ValueError(f"vocab_size must be >= 5 (4 specials + 1 real), got {vocab_size}")
# 1. Base vocabulary: every distinct character in the corpus.
base_chars = sorted(set(corpus))
vocab: dict[str, int] = {tok: idx for idx, tok in enumerate(SPECIAL_TOKENS)}
next_id = len(SPECIAL_TOKENS)
for ch in base_chars:
# SPECIAL_TOKENS are all multi-char strings, so a single ``ch`` can
# never collide with them — the False branch is unreachable. Kept as a
# defensive guard against a future single-char special token.
if ch not in vocab: # pragma: no branch
vocab[ch] = next_id
next_id += 1
# 2. Tokenise corpus into per-word symbol sequences.
words = _pre_tokenize(corpus)
if not words:
# Corpus contains only whitespace / punctuation.
return BPETokenizer(vocab=vocab, merges=[])
corpus_symbols: list[tuple[str, ...]] = [_word_to_symbols(w) for w in words]
# 3. Iteratively merge the most frequent pair.
merges: list[BPEMerge] = []
while len(vocab) < vocab_size:
stats = _get_pair_stats(corpus_symbols)
if not stats:
break
best_pair, best_count = stats.most_common(1)[0]
if best_count < min_frequency:
break
new_token = best_pair[0] + best_pair[1]
# A merged token can never already be in vocab: stats only counts pairs
# of currently-unmerged symbols, so a previously-merged token is not a
# candidate. The False branch is unreachable; kept defensively.
if new_token not in vocab: # pragma: no branch
vocab[new_token] = next_id
next_id += 1
merges.append(BPEMerge(pair=best_pair, rank=len(merges), new_token=new_token))
if show_progress:
print(f"merge {len(merges):>4}: {best_pair!r} -> {new_token!r} (count={best_count})")
corpus_symbols = _merge_pair(corpus_symbols, best_pair)
return BPETokenizer(vocab=vocab, merges=merges)
|
add_positional
add_positional(
token_embeddings: list[list[float]],
positional: PositionalEncoding,
) -> list[list[float]]
Add a positional encoding to a sequence of token embeddings.
Element-wise: out[i][j] = token[i][j] + pos[i][j]. Used in the
Transformer encoder block to inject position information.
Parameters:
| Name |
Type |
Description |
Default |
token_embeddings
|
list[list[float]]
|
An n × d_model matrix (nested list).
|
required
|
positional
|
PositionalEncoding
|
A :class:PositionalEncoding whose d_model
matches the token embedding width.
|
required
|
Returns:
| Type |
Description |
list[list[float]]
|
A new n × d_model matrix (input is not mutated).
|
Source code in src\cds\nlp\embed.py
| def add_positional(
token_embeddings: list[list[float]],
positional: PositionalEncoding,
) -> list[list[float]]:
"""Add a positional encoding to a sequence of token embeddings.
Element-wise: ``out[i][j] = token[i][j] + pos[i][j]``. Used in the
Transformer encoder block to inject position information.
Args:
token_embeddings: An ``n × d_model`` matrix (nested list).
positional: A :class:`PositionalEncoding` whose ``d_model``
matches the token embedding width.
Returns:
A new ``n × d_model`` matrix (input is not mutated).
"""
if not token_embeddings:
return []
n = len(token_embeddings)
d = len(token_embeddings[0])
if d != positional.d_model:
raise ValueError(
f"d_model mismatch: token embedding {d} != positional {positional.d_model}"
)
pos = positional.forward(n)
return [[token_embeddings[i][j] + pos[i][j] for j in range(d)] for i in range(n)]
|
feed_forward
feed_forward(
x: list[list[float]],
w1: list[list[float]],
b1: list[float],
w2: list[list[float]],
b2: list[float],
) -> list[list[float]]
Two-layer position-wise FFN with GeLU.
FFN(x) = (GeLU(x W1 + b1)) W2 + b2
Parameters:
| Name |
Type |
Description |
Default |
x
|
list[list[float]]
|
Input, shape (n, d_model).
|
required
|
w1
|
list[list[float]]
|
First weight matrix, shape (d_model, d_ff).
|
required
|
b1
|
list[float]
|
|
required
|
w2
|
list[list[float]]
|
Second weight matrix, shape (d_ff, d_model).
|
required
|
b2
|
list[float]
|
Second bias, length d_model.
|
required
|
Source code in src\cds\nlp\layers.py
| def feed_forward(
x: list[list[float]],
w1: list[list[float]],
b1: list[float],
w2: list[list[float]],
b2: list[float],
) -> list[list[float]]:
"""Two-layer position-wise FFN with GeLU.
``FFN(x) = (GeLU(x W1 + b1)) W2 + b2``
Args:
x: Input, shape ``(n, d_model)``.
w1: First weight matrix, shape ``(d_model, d_ff)``.
b1: First bias, length ``d_ff``.
w2: Second weight matrix, shape ``(d_ff, d_model)``.
b2: Second bias, length ``d_model``.
"""
if not x:
return []
d_model = len(x[0])
if len(b2) != d_model:
raise ValueError(f"b2 length {len(b2)} != d_model {d_model}")
d_ff = len(b1)
if len(w1) != d_model or len(w1[0]) != d_ff:
raise ValueError(
f"w1 shape {len(w1)}x{len(w1[0]) if w1 else 0} != expected ({d_model}, {d_ff})"
)
h = matmul(x, w1)
for i, row in enumerate(h):
for j in range(d_ff):
row[j] = gelu(row[j] + b1[j])
out = matmul(h, w2)
for i, row in enumerate(out):
for j in range(d_model):
row[j] = row[j] + b2[j]
return out
|
gelu
Exact Gaussian Error Linear Unit activation.
GELU(x) = x * Phi(x) where Phi is the standard normal CDF.
Computed via 0.5 * x * (1 + erf(x / sqrt(2))) for numerical
accuracy. The Tanh approximation used in some papers
(0.5 x (1 + tanh(...))) is faster but introduces a small bias
that doesn't matter for educational use — the exact form costs
nothing here.
Source code in src\cds\nlp\layers.py
| def gelu(x: float) -> float:
"""Exact Gaussian Error Linear Unit activation.
``GELU(x) = x * Phi(x)`` where ``Phi`` is the standard normal CDF.
Computed via ``0.5 * x * (1 + erf(x / sqrt(2)))`` for numerical
accuracy. The Tanh approximation used in some papers
(``0.5 x (1 + tanh(...))``) is faster but introduces a small bias
that doesn't matter for educational use — the exact form costs
nothing here.
"""
return 0.5 * x * (1.0 + math.erf(x / math.sqrt(2.0)))
|
layer_norm
layer_norm(
x: list[list[float]],
gamma: list[float],
beta: list[float],
eps: float = LAYERNORM_EPS,
) -> list[list[float]]
Layer normalisation over the last dimension.
For each row of x:
mean = E[x]
var = E[(x - mean)^2]
y = gamma * (x - mean) / sqrt(var + eps) + beta
Parameters:
| Name |
Type |
Description |
Default |
x
|
list[list[float]]
|
|
required
|
gamma
|
list[float]
|
Per-feature scale, length d.
|
required
|
beta
|
list[float]
|
Per-feature shift, length d.
|
required
|
eps
|
float
|
Variance floor for numerical stability.
|
LAYERNORM_EPS
|
Source code in src\cds\nlp\layers.py
| def layer_norm(
x: list[list[float]],
gamma: list[float],
beta: list[float],
eps: float = LAYERNORM_EPS,
) -> list[list[float]]:
"""Layer normalisation over the last dimension.
For each row of ``x``:
mean = E[x]
var = E[(x - mean)^2]
y = gamma * (x - mean) / sqrt(var + eps) + beta
Args:
x: Input, shape ``(n, d)``.
gamma: Per-feature scale, length ``d``.
beta: Per-feature shift, length ``d``.
eps: Variance floor for numerical stability.
"""
if not x:
return []
d = len(x[0])
if len(gamma) != d or len(beta) != d:
raise ValueError(f"gamma/beta length {len(gamma)}/{len(beta)} != feature dim {d}")
out: list[list[float]] = []
for row in x:
inv_d = 1.0 / d
mean = sum(row) * inv_d
var = sum((xi - mean) ** 2 for xi in row) * inv_d
std = math.sqrt(var + eps)
out.append([gamma[j] * (row[j] - mean) / std + beta[j] for j in range(d)])
return out
|
transformer_block(
x: list[list[float]],
attn_weights: AttentionWeights,
ffn_weights: FeedForwardWeights,
n_heads: int,
mask: list[list[float]] | None = None,
prenorm: bool = True,
) -> list[list[float]]
One Transformer encoder block.
Parameters:
| Name |
Type |
Description |
Default |
x
|
list[list[float]]
|
Input sequence, shape (n, d_model).
|
required
|
attn_weights
|
AttentionWeights
|
:class:AttentionWeights dict.
|
required
|
ffn_weights
|
FeedForwardWeights
|
:class:FeedForwardWeights dict.
|
required
|
n_heads
|
int
|
Number of attention heads.
|
required
|
mask
|
list[list[float]] | None
|
Optional additive attention mask.
|
None
|
prenorm
|
bool
|
If True (default), apply LayerNorm before attention
and FFN (Vaswani 2017 default in modern code; the paper
used post-norm, but prenorm trains more stably).
|
True
|
Returns:
| Type |
Description |
list[list[float]]
|
Output sequence, shape (n, d_model).
|
Source code in src\cds\nlp\layers.py
| def transformer_block(
x: list[list[float]],
attn_weights: AttentionWeights,
ffn_weights: FeedForwardWeights,
n_heads: int,
mask: list[list[float]] | None = None,
prenorm: bool = True,
) -> list[list[float]]:
"""One Transformer encoder block.
Args:
x: Input sequence, shape ``(n, d_model)``.
attn_weights: :class:`AttentionWeights` dict.
ffn_weights: :class:`FeedForwardWeights` dict.
n_heads: Number of attention heads.
mask: Optional additive attention mask.
prenorm: If True (default), apply LayerNorm *before* attention
and FFN (Vaswani 2017 default in modern code; the paper
used post-norm, but prenorm trains more stably).
Returns:
Output sequence, shape ``(n, d_model)``.
"""
if not x:
return []
# TypedDict gives us a precise type for each key — no narrowing
# or cast needed for the LayerNorm parameters.
ln1_gamma = attn_weights["ln1_gamma"]
ln1_beta = attn_weights["ln1_beta"]
ln2_gamma = attn_weights["ln2_gamma"]
ln2_beta = attn_weights["ln2_beta"]
if prenorm:
normed = layer_norm(x, ln1_gamma, ln1_beta)
attn_out = multi_head_attention(
normed,
attn_weights["w_q"],
attn_weights["w_k"],
attn_weights["w_v"],
attn_weights["w_o"],
n_heads,
mask,
)
x = _add(x, attn_out)
normed2 = layer_norm(x, ln2_gamma, ln2_beta)
ffn_out = feed_forward(
normed2,
ffn_weights["w1"],
ffn_weights["b1"],
ffn_weights["w2"],
ffn_weights["b2"],
)
x = _add(x, ffn_out)
return x
# Post-norm: original paper convention. Less stable for deep stacks
# but kept for completeness in the educational track.
attn_out = multi_head_attention(
x,
attn_weights["w_q"],
attn_weights["w_k"],
attn_weights["w_v"],
attn_weights["w_o"],
n_heads,
mask,
)
x = layer_norm(_add(x, attn_out), ln1_gamma, ln1_beta)
ffn_out = feed_forward(
x,
ffn_weights["w1"],
ffn_weights["b1"],
ffn_weights["w2"],
ffn_weights["b2"],
)
x = layer_norm(_add(x, ffn_out), ln2_gamma, ln2_beta)
return x
|
parameters
parameters(items: Iterable[Tensor]) -> list[Tensor]
Collect trainable tensors from a model.
Convenience helper — many models store their weights in
dictionaries or lists; this filters to requires_grad=True
in one call.
Source code in src\cds\nlp\optim.py
| def parameters(items: Iterable[Tensor]) -> list[Tensor]:
"""Collect trainable tensors from a model.
Convenience helper — many models store their weights in
dictionaries or lists; this filters to ``requires_grad=True``
in one call.
"""
return [t for t in items if t.requires_grad]
|
cross_entropy
cross_entropy(
logits: list[float] | list[Tensor], target: int
) -> Tensor
Softmax + negative log-likelihood for one example.
Computes -log(softmax(logits)[target]) in numerically stable
form (subtract the max logit before exponentiating). The result
is a :class:cds.nlp.autograd.Tensor so the optimiser can
backpropagate through it.
Parameters:
| Name |
Type |
Description |
Default |
logits
|
list[float] | list[Tensor]
|
Output of the model's final linear layer (unnormalised
log-probabilities), length V for vocab size V. May
be a list of Python floats (no autograd — useful for
sanity checks) or :class:cds.nlp.autograd.Tensor values
(loss is connected to the autograd graph).
|
required
|
target
|
int
|
Index of the correct next token in [0, V).
|
required
|
Returns:
| Name | Type |
Description |
Scalar |
Tensor
|
class:Tensor — the cross-entropy loss for this
|
|
Tensor
|
example. backward() on it populates gradients on every
|
|
Tensor
|
model parameter that contributed (only if the logits were
|
|
Tensor
|
|
Source code in src\cds\nlp\training.py
| def cross_entropy(
logits: list[float] | list[Tensor],
target: int,
) -> Tensor:
"""Softmax + negative log-likelihood for one example.
Computes ``-log(softmax(logits)[target])`` in numerically stable
form (subtract the max logit before exponentiating). The result
is a :class:`cds.nlp.autograd.Tensor` so the optimiser can
backpropagate through it.
Args:
logits: Output of the model's final linear layer (unnormalised
log-probabilities), length ``V`` for vocab size ``V``. May
be a list of Python floats (no autograd — useful for
sanity checks) or :class:`cds.nlp.autograd.Tensor` values
(loss is connected to the autograd graph).
target: Index of the correct next token in ``[0, V)``.
Returns:
Scalar :class:`Tensor` — the cross-entropy loss for this
example. ``backward()`` on it populates gradients on every
model parameter that contributed (only if the logits were
Tensors).
"""
# Local import keeps the autograd module from being pulled into
# the public ``cds.nlp`` namespace through this file.
from cds.nlp.autograd import Tensor, exp, log
if not logits:
raise ValueError("cross_entropy: logits is empty")
if not 0 <= target < len(logits):
raise ValueError(f"cross_entropy: target {target} out of range [0, {len(logits)})")
# Normalise to float values for the numerics — Tensor inputs
# are unwrapped via ``.data``; float inputs pass through.
raw = [li.data if isinstance(li, Tensor) else float(li) for li in logits]
m = max(raw)
m_const = Tensor(data=m, requires_grad=False)
acc = Tensor(data=0.0, requires_grad=False)
for li, v in zip(logits, raw):
# Promote float logits to no-grad constants so the loss
# itself isn't part of the graph when the user passes raw
# floats (e.g. in tests). For Tensor logits, ``li - m_const``
# uses the operator overload and keeps the graph.
if isinstance(li, Tensor):
shifted = li - m_const
else:
shifted = Tensor(data=v - m, requires_grad=False)
acc = acc + exp(shifted)
lse = m_const + log(acc)
# Loss = LSE - logit[target] — keep the graph connection only
# when the user passed Tensor logits.
if isinstance(logits[target], Tensor):
return lse - logits[target]
return lse - Tensor(data=raw[target], requires_grad=False)
|
train_step
train_step(
model_fn: Callable[
[list[int]], list[float] | list[Tensor]
],
x: list[int],
y: int,
optimiser: SGD | Adam,
) -> float
Run one training step on a single example.
Performs
logits = model_fn(x) (the user-supplied forward pass)
loss = cross_entropy(logits, y)
optimiser.zero_grad(); loss.backward(); optimiser.step()
The model's parameters must be exposed somewhere the optimiser
can see them — typically by collecting them into a list at
construction time and passing that list to the optimiser.
Parameters:
| Name |
Type |
Description |
Default |
model_fn
|
Callable[[list[int]], list[float] | list[Tensor]]
|
Pure function x -> logits for one example. The
autograd graph is built inside this function (when the
return type is list[Tensor]); the function should
return the model's pre-softmax output for the next-token
prediction.
|
required
|
x
|
list[int]
|
Input token ids (length T for a T-token context).
|
required
|
y
|
int
|
|
required
|
optimiser
|
SGD | Adam
|
:class:cds.nlp.optim.SGD or :class:Adam whose
params list contains every :class:Parameter reachable
from model_fn(x).
|
required
|
Returns:
| Type |
Description |
float
|
The loss as a plain Python float (snapshot of loss.data).
|
Source code in src\cds\nlp\training.py
| def train_step(
model_fn: Callable[[list[int]], list[float] | list[Tensor]],
x: list[int],
y: int,
optimiser: SGD | Adam,
) -> float:
"""Run one training step on a single example.
Performs:
1. ``logits = model_fn(x)`` (the user-supplied forward pass)
2. ``loss = cross_entropy(logits, y)``
3. ``optimiser.zero_grad(); loss.backward(); optimiser.step()``
The model's parameters must be exposed somewhere the optimiser
can see them — typically by collecting them into a list at
construction time and passing that list to the optimiser.
Args:
model_fn: Pure function ``x -> logits`` for one example. The
autograd graph is built inside this function (when the
return type is ``list[Tensor]``); the function should
return the model's pre-softmax output for the next-token
prediction.
x: Input token ids (length ``T`` for a ``T``-token context).
y: Target next-token id.
optimiser: :class:`cds.nlp.optim.SGD` or :class:`Adam` whose
``params`` list contains every :class:`Parameter` reachable
from ``model_fn(x)``.
Returns:
The loss as a plain Python float (snapshot of ``loss.data``).
"""
optimiser.zero_grad()
logits = model_fn(x)
loss = cross_entropy(logits, y)
if not loss.requires_grad:
raise RuntimeError(
"train_step: model_fn returned plain floats — autograd "
"needs the forward pass to produce Tensor logits so the "
"loss can chain back to model parameters."
)
loss.backward()
optimiser.step()
return float(loss.data)
|
render_attention_heatmap
render_attention_heatmap(
attn_weights: Sequence[Sequence[float]],
row_tokens: Sequence[str],
col_tokens: Sequence[str],
) -> str
Render an attention matrix as an ASCII heatmap.
Parameters:
| Name |
Type |
Description |
Default |
attn_weights
|
Sequence[Sequence[float]]
|
[rows][cols] matrix of attention weights. Values are
min-max normalised per render, so any real range works; rows are
expected to sum to ~1 (softmaxed) but this is not enforced.
|
required
|
row_tokens
|
Sequence[str]
|
one label per row (e.g. query tokens).
|
required
|
col_tokens
|
Sequence[str]
|
one label per column (e.g. key tokens).
|
required
|
Returns:
| Type |
Description |
str
|
A multi-line str: a header row of column tokens, then one line per
|
str
|
row token followed by its shaded cells.
|
Raises:
| Type |
Description |
ValueError
|
if the matrix / label shapes do not line up.
|
Source code in src\cds\nlp\viz.py
| def render_attention_heatmap(
attn_weights: Sequence[Sequence[float]],
row_tokens: Sequence[str],
col_tokens: Sequence[str],
) -> str:
"""Render an attention matrix as an ASCII heatmap.
Args:
attn_weights: ``[rows][cols]`` matrix of attention weights. Values are
min-max normalised per render, so any real range works; rows are
expected to sum to ~1 (softmaxed) but this is not enforced.
row_tokens: one label per row (e.g. query tokens).
col_tokens: one label per column (e.g. key tokens).
Returns:
A multi-line ``str``: a header row of column tokens, then one line per
row token followed by its shaded cells.
Raises:
ValueError: if the matrix / label shapes do not line up.
"""
if not attn_weights or not attn_weights[0]:
raise ValueError("attn_weights must be a non-empty [rows][cols] matrix")
rows = len(attn_weights)
cols = len(attn_weights[0])
if len(row_tokens) != rows:
raise ValueError(f"row_tokens length {len(row_tokens)} != rows {rows}")
if len(col_tokens) != cols:
raise ValueError(f"col_tokens length {len(col_tokens)} != cols {cols}")
flat = [w for r in attn_weights for w in r]
lo, hi = min(flat), max(flat)
span = hi - lo
label_w = max(len(t) for t in row_tokens)
header_w = max(len(t) for t in col_tokens)
header = " " * label_w + " | " + " ".join(f"{t:>{header_w}}" for t in col_tokens)
sep = "-" * label_w + "-+-" + "-" * (cols * (header_w + 1) - 1)
lines = [header, sep]
for label, weights in zip(row_tokens, attn_weights):
cells = " ".join(_shade(w, lo, span).center(header_w) for w in weights)
lines.append(f"{label:>{label_w}} | {cells}")
return "\n".join(lines) + "\n"
|
render_embedding_projection
render_embedding_projection(
embeddings: Sequence[Sequence[float]],
labels: Sequence[str] | None = None,
top_n: int = 10,
width: int = 50,
height: int = 12,
) -> str
Render a 2-D PCA scatter of embedding vectors as ASCII.
Parameters:
| Name |
Type |
Description |
Default |
embeddings
|
Sequence[Sequence[float]]
|
|
required
|
labels
|
Sequence[str] | None
|
optional per-vector label. If None, the row index is used.
|
None
|
top_n
|
int
|
render at most this many points (highest-variance first along PC1)
so large vocabularies stay readable. <= 0 renders all.
|
10
|
width
|
int
|
canvas width in characters.
|
50
|
height
|
int
|
canvas height in characters.
|
12
|
Returns:
| Type |
Description |
str
|
A multi-line str with x/y axis labels and one character per point.
|
Source code in src\cds\nlp\viz.py
| def render_embedding_projection(
embeddings: Sequence[Sequence[float]],
labels: Sequence[str] | None = None,
top_n: int = 10,
width: int = 50,
height: int = 12,
) -> str:
"""Render a 2-D PCA scatter of embedding vectors as ASCII.
Args:
embeddings: ``[n_vectors][dim]`` matrix.
labels: optional per-vector label. If ``None``, the row index is used.
top_n: render at most this many points (highest-variance first along PC1)
so large vocabularies stay readable. ``<= 0`` renders all.
width: canvas width in characters.
height: canvas height in characters.
Returns:
A multi-line ``str`` with x/y axis labels and one character per point.
"""
if not embeddings or not embeddings[0]:
raise ValueError("embeddings must be a non-empty [n][d] matrix")
if top_n <= 0:
top_n = len(embeddings)
if labels is not None and len(labels) != len(embeddings):
raise ValueError("labels length must match number of embeddings")
pts = _pca_2d(embeddings)
# Keep the ``top_n`` points with the largest |PC1| so the spread is visible.
# Tag each projected point with its original row index so a legend can map
# back to ``labels`` after the descending-|PC1| sort reorders them.
indexed = sorted(
((p, k) for k, p in enumerate(pts)),
key=lambda item: abs(item[0][0]),
reverse=True,
)[:top_n]
if labels is not None:
labels = list(labels)
else:
labels = [str(i) for i in range(len(embeddings))]
xs = [p[0] for p, _ in indexed]
ys = [p[1] for p, _ in indexed]
xlo, xhi = min(xs), max(xs)
ylo, yhi = min(ys), max(ys)
xspan = xhi - xlo if xhi > xlo else 1.0
yspan = yhi - ylo if yhi > ylo else 1.0
grid: list[list[str]] = [[" "] * width for _ in range(height)]
marks = "o*+x#@%&123456789abcdefghijklmnopqrstuvwxyz"
legend: list[str] = []
for k, ((x, y), orig_idx) in enumerate(indexed):
col = int((x - xlo) / xspan * (width - 1))
# Invert y so larger PC2 is at the top.
row = int((yhi - y) / yspan * (height - 1))
col = max(0, min(width - 1, col))
row = max(0, min(height - 1, row))
mark = marks[k % len(marks)]
grid[row][col] = mark
legend.append(f"{mark}={labels[orig_idx]}")
lines: list[str] = []
lines.append(f"PC2 {yhi:.3g} |" + "".join(grid[0]))
for r in range(1, height):
lines.append(" " * (3 + len(f"{yhi:.3g}")) + "|" + "".join(grid[r]))
lines.append(" " * (3 + len(f"{yhi:.3g}")) + "+" + "-" * width)
# Right-align the x-axis hi label. pad = width - (label widths already on
# the line); clamp at 0 so a tiny width never produces a negative format
# width ("Sign not allowed in string format specifier").
left = f"{'PC1':>{3 + len(f'{yhi:.3g}')}} {xlo:.3g}"
pad = max(0, width - len(left) - len(f"{xhi:.3g}"))
lines.append(f"{left}{'':>{pad}}{xhi:.3g}")
# Mark→label legend so caller-supplied ``labels`` are actually surfaced.
# ``legend`` gets one entry per plotted point, and ``indexed`` is always
# non-empty (top_n falls back to len(embeddings) ≥ 1), so the False branch
# is unreachable — kept defensively against a future top_n==0 path.
if legend: # pragma: no branch
lines.append(" legend: " + " ".join(legend))
return "\n".join(lines) + "\n"
|
render_training_curve
render_training_curve(
losses: Sequence[float],
width: int = 50,
height: int = 10,
) -> str
Render an ASCII loss curve.
Parameters:
| Name |
Type |
Description |
Default |
losses
|
Sequence[float]
|
per-step training losses (monotonic-decreasing looks best,
but any sequence is accepted; a single point renders as one cell).
|
required
|
width
|
int
|
plot width in characters (>= 1).
|
50
|
height
|
int
|
plot height in characters (>= 1).
|
10
|
Returns:
| Type |
Description |
str
|
A multi-line str with y-axis label, the curve, and an x-axis
|
str
|
showing the step range. Always ends with a trailing newline so it
|
str
|
composes cleanly under print().
|
Source code in src\cds\nlp\viz.py
| def render_training_curve(
losses: Sequence[float],
width: int = 50,
height: int = 10,
) -> str:
"""Render an ASCII loss curve.
Args:
losses: per-step training losses (monotonic-decreasing looks best,
but any sequence is accepted; a single point renders as one cell).
width: plot width in characters (>= 1).
height: plot height in characters (>= 1).
Returns:
A multi-line ``str`` with y-axis label, the curve, and an x-axis
showing the step range. Always ends with a trailing newline so it
composes cleanly under ``print()``.
"""
if width < 1 or height < 1:
raise ValueError("width and height must be >= 1")
if not losses:
raise ValueError("losses must contain at least one value")
lo = min(losses)
hi = max(losses)
span = hi - lo if hi > lo else 1.0 # avoid divide-by-zero for flat curves
n = len(losses)
# Sample ``width`` columns from the loss series. Each column maps to the
# loss at that fractional position, then to a plot row.
grid: list[list[str]] = [[" "] * width for _ in range(height)]
for col in range(width):
idx = int(col * (n - 1) / max(1, width - 1)) if n > 1 else 0
loss = losses[idx]
# Invert: high loss -> row 0 (top), low loss -> bottom row.
row = int((hi - loss) / span * (height - 1))
row = max(0, min(height - 1, row))
grid[row][col] = "*"
lines: list[str] = []
lines.append(f"{hi:.4g} |" + "".join(grid[0]))
for r in range(1, height):
lines.append(" " * (len(f"{hi:.4g}")) + " |" + "".join(grid[r]))
lines.append(" " * (len(f"{hi:.4g}")) + " +" + "-" * width)
# Right-align the last step index against ``width``. Use max(1, ...) so the
# format width is never negative when width < ~10 chars (the label "step 0"
# already accounts for the left side; padding only fills what remains).
last_step = n - 1 if n > 1 else 0
pad = max(0, width - len(f"step 0{last_step}"))
lines.append(f"step 0{'':>{pad}}{last_step}")
return "\n".join(lines) + "\n"
|