Configuration as Code: TOML and Templates - prealpha; Formal Verification Agent Recipes

Hardcoded prompts and magic numbers are a sin. They make code difficult to read, maintain, and test. That’s why our plain Python agent uses a configuration-driven design. All of the agent’s settings, including the system prompt, are stored in a TOML file. ## TODO: audit the prose to make sure its not too LLMy

The config.toml file is divided into five sections:

[evaluation]: Loop limits, timeouts, and the model to use.
[logging]: The directory for artifacts and the log level.
[prompt]: The system prompt and any templates.
[dataset]: The Hugging Face dataset to use.

The PlainConfig dataclass in config.py is used to load the configuration from the TOML file.

config.py

"""Configuration loading for DafnyBench plain implementation."""

from dataclasses import dataclass
from pathlib import Path

try:
    import tomllib  # Python 3.11+
except ImportError:
    import tomli as tomllib  # type: ignore  # Fallback for Python < 3.11


@dataclass
class EvaluationConfig:
    """Evaluation settings."""

    max_iterations: int
    max_tokens: int
    verification_timeout: int
    default_model: str


@dataclass
class LoggingConfig:
    """Logging settings."""

    level: str
    save_artifacts: bool
    artifacts_dir: str
    logs_dir: str


@dataclass
class PromptConfig:
    """Prompt settings."""

    system_prompt: str
    initial_state_template: str
    state_update_template: str


@dataclass
class DatasetConfig:
    """Dataset settings."""

    name: str
    split: str


@dataclass
class PlainConfig:
    """Complete configuration for plain evaluation."""

    evaluation: EvaluationConfig
    logging: LoggingConfig
    prompt: PromptConfig
    dataset: DatasetConfig

    @classmethod
    def from_file(cls, config_path: Path | str | None = None) -> "PlainConfig":
        """Load configuration from TOML file.

        Args:
            config_path: Path to config.toml. If None, uses default location
                        (same directory as this module)

        Returns:
            RawdogConfig instance
        """
        if config_path is None:
            # Default: config.toml in same directory as this module
            config_path = Path(__file__).parent / "config.toml"
        else:
            config_path = Path(config_path)

        if not config_path.exists():
            raise FileNotFoundError(f"Config file not found: {config_path}")

        with open(config_path, "rb") as f:
            data = tomllib.load(f)

        return cls(
            evaluation=EvaluationConfig(**data["evaluation"]),
            logging=LoggingConfig(**data["logging"]),
            prompt=PromptConfig(**data["prompt"]),
            dataset=DatasetConfig(**data["dataset"]),
        )

    @classmethod
    def default(cls) -> "PlainConfig":
        """Get default configuration (loads from default config.toml location).

        Returns:
            PlainConfig instance with default settings
        """
        return cls.from_file()


# Singleton instance
_config: PlainConfig | None = None


def get_config(reload: bool = False) -> PlainConfig:
    """Get the global configuration instance.

    Args:
        reload: If True, reload configuration from file

    Returns:
        PlainConfig instance
    """
    global _config
    if _config is None or reload:
        _config = PlainConfig.default()
    return _config


def load_config(config_path: Path | str) -> PlainConfig:
    """Load configuration from a specific path and set as global.

    Args:
        config_path: Path to config.toml

    Returns:
        PlainConfig instance
    """
    global _config
    _config = PlainConfig.from_file(config_path)
    return _config


def normalize_model_name(model: str) -> str:
    """Strip 'anthropic/' prefix from model name if present (inspect-ai format).

    Args:
        model: Model name that may have 'anthropic/' prefix

    Returns:
        Model name without 'anthropic/' prefix
    """
    if model.startswith("anthropic/"):
        return model.replace("anthropic/", "")
    return model

The PlainConfig dataclass

The get_config function provides a global singleton for accessing the configuration.

config.py

"""Configuration loading for DafnyBench plain implementation."""

from dataclasses import dataclass
from pathlib import Path

try:
    import tomllib  # Python 3.11+
except ImportError:
    import tomli as tomllib  # type: ignore  # Fallback for Python < 3.11


@dataclass
class EvaluationConfig:
    """Evaluation settings."""

    max_iterations: int
    max_tokens: int
    verification_timeout: int
    default_model: str


@dataclass
class LoggingConfig:
    """Logging settings."""

    level: str
    save_artifacts: bool
    artifacts_dir: str
    logs_dir: str


@dataclass
class PromptConfig:
    """Prompt settings."""

    system_prompt: str
    initial_state_template: str
    state_update_template: str


@dataclass
class DatasetConfig:
    """Dataset settings."""

    name: str
    split: str


@dataclass
class PlainConfig:
    """Complete configuration for plain evaluation."""

    evaluation: EvaluationConfig
    logging: LoggingConfig
    prompt: PromptConfig
    dataset: DatasetConfig

    @classmethod
    def from_file(cls, config_path: Path | str | None = None) -> "PlainConfig":
        """Load configuration from TOML file.

        Args:
            config_path: Path to config.toml. If None, uses default location
                        (same directory as this module)

        Returns:
            RawdogConfig instance
        """
        if config_path is None:
            # Default: config.toml in same directory as this module
            config_path = Path(__file__).parent / "config.toml"
        else:
            config_path = Path(config_path)

        if not config_path.exists():
            raise FileNotFoundError(f"Config file not found: {config_path}")

        with open(config_path, "rb") as f:
            data = tomllib.load(f)

        return cls(
            evaluation=EvaluationConfig(**data["evaluation"]),
            logging=LoggingConfig(**data["logging"]),
            prompt=PromptConfig(**data["prompt"]),
            dataset=DatasetConfig(**data["dataset"]),
        )

    @classmethod
    def default(cls) -> "PlainConfig":
        """Get default configuration (loads from default config.toml location).

        Returns:
            PlainConfig instance with default settings
        """
        return cls.from_file()


# Singleton instance
_config: PlainConfig | None = None


def get_config(reload: bool = False) -> PlainConfig:
    """Get the global configuration instance.

    Args:
        reload: If True, reload configuration from file

    Returns:
        PlainConfig instance
    """
    global _config
    if _config is None or reload:
        _config = PlainConfig.default()
    return _config


def load_config(config_path: Path | str) -> PlainConfig:
    """Load configuration from a specific path and set as global.

    Args:
        config_path: Path to config.toml

    Returns:
        PlainConfig instance
    """
    global _config
    _config = PlainConfig.from_file(config_path)
    return _config


def normalize_model_name(model: str) -> str:
    """Strip 'anthropic/' prefix from model name if present (inspect-ai format).

    Args:
        model: Model name that may have 'anthropic/' prefix

    Returns:
        Model name without 'anthropic/' prefix
    """
    if model.startswith("anthropic/"):
        return model.replace("anthropic/", "")
    return model

The get_config function

The system prompt itself is a 185-line behemoth that explains the agent’s task in excruciating detail. It includes descriptions of all the tools, examples of how to use them, and important rules and reminders about Dafny syntax.

This configuration-driven approach makes the agent much more flexible and reusable. We can easily change the agent’s behavior by simply editing the TOML file, without having to touch the code.

Dafny part 2: plain Python

State Management via Message History

Dafny part 2: plain Python

Running the Plain Implementation