Source code for agent_urban_planning.core.run_metadata

"""Per-run metadata tracking and cost estimation.

Captures everything needed to make a simulation run reproducible from the
output JSON: scenario / policy / seed, LLM provider details, performance
counters (call count, cache hit rate, wall-clock time), clustering
configuration (algorithm + k + assignments), and an estimated USD cost
based on a static per-model price table.

Saved automatically alongside results when LLM mode or clustering is used,
or on demand via `--save-metadata`.
"""

from __future__ import annotations

import json
import os
import time
import uuid
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional


# ------------------------------------------------------------------
# Cost table
# ------------------------------------------------------------------

# Approximate USD per 1k tokens. Numbers are rough public list prices and
# should be treated as order-of-magnitude estimates only. Format:
#   {(provider, model): (input_per_1k, output_per_1k)}
LLM_COST_TABLE: dict[tuple[str, str], tuple[float, float]] = {
    # Z.ai GLM family — community/coding tier estimates
    ("zai-coding", "glm-4.7"): (0.0006, 0.0022),
    ("zai-coding", "glm-4.6"): (0.0006, 0.0022),
    ("zai-coding", "glm-4.5"): (0.0005, 0.0015),
    ("zai-coding", "glm-4.5-air"): (0.0002, 0.0006),
    # Anthropic
    ("anthropic", "claude-haiku-4-5-20251001"): (0.001, 0.005),
    ("anthropic", "claude-sonnet-4-6"): (0.003, 0.015),
    ("anthropic", "claude-opus-4-6"): (0.015, 0.075),
    # OpenAI
    ("openai", "gpt-4o-mini"): (0.00015, 0.0006),
    ("openai", "gpt-4o"): (0.0025, 0.010),
}


def compute_cost(
    provider: Optional[str],
    model: Optional[str],
    input_tokens: int,
    output_tokens: int,
) -> float:
    """Estimate USD cost from token counts. Returns 0.0 for unknown models."""
    if not provider or not model:
        return 0.0
    key = (provider.lower(), model.lower())
    if key not in LLM_COST_TABLE:
        return 0.0
    in_per_1k, out_per_1k = LLM_COST_TABLE[key]
    return (input_tokens / 1000.0) * in_per_1k + (output_tokens / 1000.0) * out_per_1k


# ------------------------------------------------------------------
# RunMetadata dataclass
# ------------------------------------------------------------------


def _new_run_id() -> str:
    """Generate a unique run id: timestamp + short random suffix."""
    ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
    suffix = uuid.uuid4().hex[:8]
    return f"{ts}_{suffix}"



[docs]
@dataclass
class RunMetadata:
    """Reproducibility metadata for a single simulation run.

    Captures everything needed to make a run reproducible from the
    output JSON: scenario / policy / seed, LLM provider details,
    performance counters (call count, cache hit rate, wall-clock time),
    clustering configuration (algorithm + ``k`` + assignments), and an
    estimated USD cost based on a static per-model price table. Every
    field is optional so this can be incrementally populated during a
    run. The result is JSON-serializable and saved alongside the
    :class:`SimulationResults`.

    Examples:
        >>> from agent_urban_planning import RunMetadata
        >>> md = RunMetadata(scenario_name="berlin", policy_name="counterfactual")
        >>> md.to_dict()["scenario_name"]
        'berlin'
    """

    run_id: str = field(default_factory=_new_run_id)
    timestamp: str = field(
        default_factory=lambda: datetime.now(timezone.utc).isoformat()
    )
    scenario_name: Optional[str] = None
    policy_name: Optional[str] = None
    seed: Optional[int] = None

    # LLM info
    llm_provider: Optional[str] = None
    llm_model: Optional[str] = None
    llm_temperature: Optional[float] = None
    llm_concurrency: Optional[int] = None

    # Performance counters
    total_llm_calls: int = 0
    cached_llm_calls: int = 0
    cache_hit_rate: float = 0.0
    total_input_tokens: int = 0
    total_output_tokens: int = 0
    wall_clock_seconds: float = 0.0
    estimated_cost_usd: float = 0.0

    # Reliability counters (pure LLM mode)
    llm_retry_count: int = 0          # total retries triggered by transient errors
    llm_failed_calls: int = 0          # calls that failed after all retries exhausted
    llm_success_rate: float = 1.0      # successful / (successful + failed_final)

    # Clustering
    clustering_algo: str = "none"
    num_archetypes: Optional[int] = None
    samples_per_archetype: int = 1
    within_cluster_assignment: str = "deterministic"
    cluster_features: list[str] = field(default_factory=list)
    cluster_assignments: Optional[dict[int, int]] = None

    # Market clearing
    price_elasticity_used: Optional[float] = None
    damping_final: Optional[float] = None
    market_iterations_actual: Optional[int] = None
    convergence_achieved: Optional[bool] = None

    # Other
    decision_engine_name: Optional[str] = None
    notes: Optional[str] = None

    # ------------------------------------------------------------------
    # Computed helpers
    # ------------------------------------------------------------------


[docs]
    def update_cost(self):
        """Recompute ``estimated_cost_usd`` from the static cost table.

        Uses ``llm_provider`` + ``llm_model`` + token counters to look
        up per-1k-token rates and compute the rough USD cost.

        Returns:
            None. Mutates ``self.estimated_cost_usd``.

        Examples:
            >>> from agent_urban_planning import RunMetadata
            >>> md = RunMetadata(llm_provider="openai", llm_model="gpt-4o-mini",
            ...                  total_input_tokens=1000, total_output_tokens=500)
            >>> md.update_cost()
            >>> md.estimated_cost_usd > 0
            True
        """
        self.estimated_cost_usd = compute_cost(
            self.llm_provider,
            self.llm_model,
            self.total_input_tokens,
            self.total_output_tokens,
        )



[docs]
    def update_cache_hit_rate(self):
        """Recompute ``cache_hit_rate`` from cached and uncached call counters.

        Returns:
            None. Mutates ``self.cache_hit_rate``.

        Examples:
            >>> from agent_urban_planning import RunMetadata
            >>> md = RunMetadata(total_llm_calls=8, cached_llm_calls=2)
            >>> md.update_cache_hit_rate()
            >>> round(md.cache_hit_rate, 1)
            0.2
        """
        total = self.total_llm_calls + self.cached_llm_calls
        if total > 0:
            self.cache_hit_rate = self.cached_llm_calls / total
        else:
            self.cache_hit_rate = 0.0



[docs]
    def update_llm_success_rate(self):
        """Compute LLM success rate from successful and failed counters.

        In pure LLM mode this SHOULD be ``1.0`` — any value less than 1
        means some agents' decisions could not be made by the LLM. By
        design the simulation aborts rather than falling back to
        utility, so a < 1 value only occurs from manual failure
        injection or an incomplete run.

        Returns:
            None. Mutates ``self.llm_success_rate``.

        Examples:
            >>> from agent_urban_planning import RunMetadata
            >>> md = RunMetadata(total_llm_calls=99, llm_failed_calls=1)
            >>> md.update_llm_success_rate()
            >>> round(md.llm_success_rate, 2)
            0.99
        """
        total = self.total_llm_calls + self.llm_failed_calls
        if total > 0:
            self.llm_success_rate = self.total_llm_calls / total
        else:
            self.llm_success_rate = 1.0


    # ------------------------------------------------------------------
    # JSON I/O
    # ------------------------------------------------------------------


[docs]
    def to_dict(self) -> dict[str, Any]:
        """Return a JSON-serializable dict of every field.

        Converts int keys in ``cluster_assignments`` to strings so the
        result round-trips through ``json.dumps``.

        Returns:
            ``dict`` ready for serialization.

        Examples:
            >>> from agent_urban_planning import RunMetadata
            >>> md = RunMetadata(scenario_name="x")
            >>> md.to_dict()["scenario_name"]
            'x'
        """
        d = asdict(self)
        # Convert int keys in cluster_assignments to strings for JSON
        if self.cluster_assignments is not None:
            d["cluster_assignments"] = {
                str(k): int(v) for k, v in self.cluster_assignments.items()
            }
        return d



[docs]
    def to_json(self, indent: int = 2) -> str:
        """Serialize to an indented JSON string.

        Args:
            indent: Number of spaces of indentation. Defaults to ``2``.

        Returns:
            JSON string.

        Examples:
            >>> from agent_urban_planning import RunMetadata
            >>> md = RunMetadata(scenario_name="x")
            >>> '"scenario_name"' in md.to_json()
            True
        """
        return json.dumps(self.to_dict(), indent=indent, default=str)



[docs]
    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "RunMetadata":
        """Build a :class:`RunMetadata` from its serialized dict form.

        Args:
            data: Dict shaped like the output of :meth:`to_dict`.

        Returns:
            A new :class:`RunMetadata` with ``cluster_assignments`` int
            keys restored.

        Examples:
            >>> from agent_urban_planning import RunMetadata
            >>> md = RunMetadata(scenario_name="x")
            >>> RunMetadata.from_dict(md.to_dict()).scenario_name
            'x'
        """
        ca = data.get("cluster_assignments")
        if ca is not None:
            data = dict(data)
            data["cluster_assignments"] = {int(k): int(v) for k, v in ca.items()}
        return cls(**data)



[docs]
    @classmethod
    def from_json(cls, s: str) -> "RunMetadata":
        """Build a :class:`RunMetadata` from a JSON string.

        Args:
            s: JSON string previously produced by :meth:`to_json`.

        Returns:
            A new :class:`RunMetadata`.

        Examples:
            >>> from agent_urban_planning import RunMetadata
            >>> md = RunMetadata(scenario_name="x")
            >>> RunMetadata.from_json(md.to_json()).scenario_name
            'x'
        """
        return cls.from_dict(json.loads(s))



[docs]
    def save(self, path):
        """Write this metadata to ``path`` as JSON.

        Creates parent directories as needed.

        Args:
            path: Path-like target.

        Returns:
            None.

        Examples:
            >>> from agent_urban_planning import RunMetadata
            >>> # md.save("output/run.json")
        """
        p = Path(path)
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(self.to_json())



[docs]
    @classmethod
    def load(cls, path) -> "RunMetadata":
        """Load a :class:`RunMetadata` from a JSON file.

        Args:
            path: Path-like file source.

        Returns:
            The deserialized :class:`RunMetadata`.

        Examples:
            >>> from agent_urban_planning import RunMetadata
            >>> # md = RunMetadata.load("output/run.json")
        """
        return cls.from_json(Path(path).read_text())




# ------------------------------------------------------------------
# Convenience: a small timer helper
# ------------------------------------------------------------------


class WallClock:
    """Context manager that measures wall-clock time."""

    def __init__(self):
        self.elapsed = 0.0
        self._start: Optional[float] = None

    def __enter__(self):
        self._start = time.time()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.elapsed = time.time() - (self._start or time.time())
        return False