Source code for agent_urban_planning.core.run_metadata

"""Per-run metadata tracking and cost estimation.

Captures everything needed to make a simulation run reproducible from the
output JSON: scenario / policy / seed, LLM provider details, performance
counters (call count, cache hit rate, wall-clock time), clustering
configuration (algorithm + k + assignments), and an estimated USD cost
based on a static per-model price table.

Saved automatically alongside results when LLM mode or clustering is used,
or on demand via `--save-metadata`.
"""

from __future__ import annotations

import json
import os
import time
import uuid
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional


# ------------------------------------------------------------------
# Cost table
# ------------------------------------------------------------------

# Approximate USD per 1k tokens. Numbers are rough public list prices and
# should be treated as order-of-magnitude estimates only. Format:
#   {(provider, model): (input_per_1k, output_per_1k)}
LLM_COST_TABLE: dict[tuple[str, str], tuple[float, float]] = {
    # Z.ai GLM family — community/coding tier estimates
    ("zai-coding", "glm-4.7"): (0.0006, 0.0022),
    ("zai-coding", "glm-4.6"): (0.0006, 0.0022),
    ("zai-coding", "glm-4.5"): (0.0005, 0.0015),
    ("zai-coding", "glm-4.5-air"): (0.0002, 0.0006),
    # Anthropic
    ("anthropic", "claude-haiku-4-5-20251001"): (0.001, 0.005),
    ("anthropic", "claude-sonnet-4-6"): (0.003, 0.015),
    ("anthropic", "claude-opus-4-6"): (0.015, 0.075),
    # OpenAI
    ("openai", "gpt-4o-mini"): (0.00015, 0.0006),
    ("openai", "gpt-4o"): (0.0025, 0.010),
}


def compute_cost(
    provider: Optional[str],
    model: Optional[str],
    input_tokens: int,
    output_tokens: int,
) -> float:
    """Estimate USD cost from token counts. Returns 0.0 for unknown models."""
    if not provider or not model:
        return 0.0
    key = (provider.lower(), model.lower())
    if key not in LLM_COST_TABLE:
        return 0.0
    in_per_1k, out_per_1k = LLM_COST_TABLE[key]
    return (input_tokens / 1000.0) * in_per_1k + (output_tokens / 1000.0) * out_per_1k


# ------------------------------------------------------------------
# RunMetadata dataclass
# ------------------------------------------------------------------


def _new_run_id() -> str:
    """Generate a unique run id: timestamp + short random suffix."""
    ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
    suffix = uuid.uuid4().hex[:8]
    return f"{ts}_{suffix}"


[docs] @dataclass class RunMetadata: """Reproducibility metadata for a single simulation run. Captures everything needed to make a run reproducible from the output JSON: scenario / policy / seed, LLM provider details, performance counters (call count, cache hit rate, wall-clock time), clustering configuration (algorithm + ``k`` + assignments), and an estimated USD cost based on a static per-model price table. Every field is optional so this can be incrementally populated during a run. The result is JSON-serializable and saved alongside the :class:`SimulationResults`. Examples: >>> from agent_urban_planning import RunMetadata >>> md = RunMetadata(scenario_name="berlin", policy_name="counterfactual") >>> md.to_dict()["scenario_name"] 'berlin' """ run_id: str = field(default_factory=_new_run_id) timestamp: str = field( default_factory=lambda: datetime.now(timezone.utc).isoformat() ) scenario_name: Optional[str] = None policy_name: Optional[str] = None seed: Optional[int] = None # LLM info llm_provider: Optional[str] = None llm_model: Optional[str] = None llm_temperature: Optional[float] = None llm_concurrency: Optional[int] = None # Performance counters total_llm_calls: int = 0 cached_llm_calls: int = 0 cache_hit_rate: float = 0.0 total_input_tokens: int = 0 total_output_tokens: int = 0 wall_clock_seconds: float = 0.0 estimated_cost_usd: float = 0.0 # Reliability counters (pure LLM mode) llm_retry_count: int = 0 # total retries triggered by transient errors llm_failed_calls: int = 0 # calls that failed after all retries exhausted llm_success_rate: float = 1.0 # successful / (successful + failed_final) # Clustering clustering_algo: str = "none" num_archetypes: Optional[int] = None samples_per_archetype: int = 1 within_cluster_assignment: str = "deterministic" cluster_features: list[str] = field(default_factory=list) cluster_assignments: Optional[dict[int, int]] = None # Market clearing price_elasticity_used: Optional[float] = None damping_final: Optional[float] = None market_iterations_actual: Optional[int] = None convergence_achieved: Optional[bool] = None # Other decision_engine_name: Optional[str] = None notes: Optional[str] = None # ------------------------------------------------------------------ # Computed helpers # ------------------------------------------------------------------
[docs] def update_cost(self): """Recompute ``estimated_cost_usd`` from the static cost table. Uses ``llm_provider`` + ``llm_model`` + token counters to look up per-1k-token rates and compute the rough USD cost. Returns: None. Mutates ``self.estimated_cost_usd``. Examples: >>> from agent_urban_planning import RunMetadata >>> md = RunMetadata(llm_provider="openai", llm_model="gpt-4o-mini", ... total_input_tokens=1000, total_output_tokens=500) >>> md.update_cost() >>> md.estimated_cost_usd > 0 True """ self.estimated_cost_usd = compute_cost( self.llm_provider, self.llm_model, self.total_input_tokens, self.total_output_tokens, )
[docs] def update_cache_hit_rate(self): """Recompute ``cache_hit_rate`` from cached and uncached call counters. Returns: None. Mutates ``self.cache_hit_rate``. Examples: >>> from agent_urban_planning import RunMetadata >>> md = RunMetadata(total_llm_calls=8, cached_llm_calls=2) >>> md.update_cache_hit_rate() >>> round(md.cache_hit_rate, 1) 0.2 """ total = self.total_llm_calls + self.cached_llm_calls if total > 0: self.cache_hit_rate = self.cached_llm_calls / total else: self.cache_hit_rate = 0.0
[docs] def update_llm_success_rate(self): """Compute LLM success rate from successful and failed counters. In pure LLM mode this SHOULD be ``1.0`` — any value less than 1 means some agents' decisions could not be made by the LLM. By design the simulation aborts rather than falling back to utility, so a < 1 value only occurs from manual failure injection or an incomplete run. Returns: None. Mutates ``self.llm_success_rate``. Examples: >>> from agent_urban_planning import RunMetadata >>> md = RunMetadata(total_llm_calls=99, llm_failed_calls=1) >>> md.update_llm_success_rate() >>> round(md.llm_success_rate, 2) 0.99 """ total = self.total_llm_calls + self.llm_failed_calls if total > 0: self.llm_success_rate = self.total_llm_calls / total else: self.llm_success_rate = 1.0
# ------------------------------------------------------------------ # JSON I/O # ------------------------------------------------------------------
[docs] def to_dict(self) -> dict[str, Any]: """Return a JSON-serializable dict of every field. Converts int keys in ``cluster_assignments`` to strings so the result round-trips through ``json.dumps``. Returns: ``dict`` ready for serialization. Examples: >>> from agent_urban_planning import RunMetadata >>> md = RunMetadata(scenario_name="x") >>> md.to_dict()["scenario_name"] 'x' """ d = asdict(self) # Convert int keys in cluster_assignments to strings for JSON if self.cluster_assignments is not None: d["cluster_assignments"] = { str(k): int(v) for k, v in self.cluster_assignments.items() } return d
[docs] def to_json(self, indent: int = 2) -> str: """Serialize to an indented JSON string. Args: indent: Number of spaces of indentation. Defaults to ``2``. Returns: JSON string. Examples: >>> from agent_urban_planning import RunMetadata >>> md = RunMetadata(scenario_name="x") >>> '"scenario_name"' in md.to_json() True """ return json.dumps(self.to_dict(), indent=indent, default=str)
[docs] @classmethod def from_dict(cls, data: dict[str, Any]) -> "RunMetadata": """Build a :class:`RunMetadata` from its serialized dict form. Args: data: Dict shaped like the output of :meth:`to_dict`. Returns: A new :class:`RunMetadata` with ``cluster_assignments`` int keys restored. Examples: >>> from agent_urban_planning import RunMetadata >>> md = RunMetadata(scenario_name="x") >>> RunMetadata.from_dict(md.to_dict()).scenario_name 'x' """ ca = data.get("cluster_assignments") if ca is not None: data = dict(data) data["cluster_assignments"] = {int(k): int(v) for k, v in ca.items()} return cls(**data)
[docs] @classmethod def from_json(cls, s: str) -> "RunMetadata": """Build a :class:`RunMetadata` from a JSON string. Args: s: JSON string previously produced by :meth:`to_json`. Returns: A new :class:`RunMetadata`. Examples: >>> from agent_urban_planning import RunMetadata >>> md = RunMetadata(scenario_name="x") >>> RunMetadata.from_json(md.to_json()).scenario_name 'x' """ return cls.from_dict(json.loads(s))
[docs] def save(self, path): """Write this metadata to ``path`` as JSON. Creates parent directories as needed. Args: path: Path-like target. Returns: None. Examples: >>> from agent_urban_planning import RunMetadata >>> # md.save("output/run.json") """ p = Path(path) p.parent.mkdir(parents=True, exist_ok=True) p.write_text(self.to_json())
[docs] @classmethod def load(cls, path) -> "RunMetadata": """Load a :class:`RunMetadata` from a JSON file. Args: path: Path-like file source. Returns: The deserialized :class:`RunMetadata`. Examples: >>> from agent_urban_planning import RunMetadata >>> # md = RunMetadata.load("output/run.json") """ return cls.from_json(Path(path).read_text())
# ------------------------------------------------------------------ # Convenience: a small timer helper # ------------------------------------------------------------------ class WallClock: """Context manager that measures wall-clock time.""" def __init__(self): self.elapsed = 0.0 self._start: Optional[float] = None def __enter__(self): self._start = time.time() return self def __exit__(self, exc_type, exc_val, exc_tb): self.elapsed = time.time() - (self._start or time.time()) return False