Hardcoded prompts and magic numbers are a sin. They make code difficult to read, maintain, and test. That’s why our plain Python agent uses a configuration-driven design. All of the agent’s settings, including the system prompt, are stored in a TOML file. ## TODO: audit the prose to make sure its not too LLMy
The config.toml file is divided into five sections:
[evaluation]: Loop limits, timeouts, and the model to use.[logging]: The directory for artifacts and the log level.[prompt]: The system prompt and any templates.[dataset]: The Hugging Face dataset to use.
The PlainConfig dataclass in config.py is used to load the configuration from the TOML file.
"""Configuration loading for DafnyBench plain implementation."""
from dataclasses import dataclass
from pathlib import Path
try:
import tomllib # Python 3.11+
except ImportError:
import tomli as tomllib # type: ignore # Fallback for Python < 3.11
@dataclass
class EvaluationConfig:
"""Evaluation settings."""
max_iterations: int
max_tokens: int
verification_timeout: int
default_model: str
@dataclass
class LoggingConfig:
"""Logging settings."""
level: str
save_artifacts: bool
artifacts_dir: str
logs_dir: str
@dataclass
class PromptConfig:
"""Prompt settings."""
system_prompt: str
initial_state_template: str
state_update_template: str
@dataclass
class DatasetConfig:
"""Dataset settings."""
name: str
split: str
@dataclass
class PlainConfig:
"""Complete configuration for plain evaluation."""
evaluation: EvaluationConfig
logging: LoggingConfig
prompt: PromptConfig
dataset: DatasetConfig
@classmethod
def from_file(cls, config_path: Path | str | None = None) -> "PlainConfig":
"""Load configuration from TOML file.
Args:
config_path: Path to config.toml. If None, uses default location
(same directory as this module)
Returns:
RawdogConfig instance
"""
if config_path is None:
# Default: config.toml in same directory as this module
config_path = Path(__file__).parent / "config.toml"
else:
config_path = Path(config_path)
if not config_path.exists():
raise FileNotFoundError(f"Config file not found: {config_path}")
with open(config_path, "rb") as f:
data = tomllib.load(f)
return cls(
evaluation=EvaluationConfig(**data["evaluation"]),
logging=LoggingConfig(**data["logging"]),
prompt=PromptConfig(**data["prompt"]),
dataset=DatasetConfig(**data["dataset"]),
)
@classmethod
def default(cls) -> "PlainConfig":
"""Get default configuration (loads from default config.toml location).
Returns:
PlainConfig instance with default settings
"""
return cls.from_file()
# Singleton instance
_config: PlainConfig | None = None
def get_config(reload: bool = False) -> PlainConfig:
"""Get the global configuration instance.
Args:
reload: If True, reload configuration from file
Returns:
PlainConfig instance
"""
global _config
if _config is None or reload:
_config = PlainConfig.default()
return _config
def load_config(config_path: Path | str) -> PlainConfig:
"""Load configuration from a specific path and set as global.
Args:
config_path: Path to config.toml
Returns:
PlainConfig instance
"""
global _config
_config = PlainConfig.from_file(config_path)
return _config
def normalize_model_name(model: str) -> str:
"""Strip 'anthropic/' prefix from model name if present (inspect-ai format).
Args:
model: Model name that may have 'anthropic/' prefix
Returns:
Model name without 'anthropic/' prefix
"""
if model.startswith("anthropic/"):
return model.replace("anthropic/", "")
return model
The PlainConfig dataclass
The get_config function provides a global singleton for accessing the configuration.
"""Configuration loading for DafnyBench plain implementation."""
from dataclasses import dataclass
from pathlib import Path
try:
import tomllib # Python 3.11+
except ImportError:
import tomli as tomllib # type: ignore # Fallback for Python < 3.11
@dataclass
class EvaluationConfig:
"""Evaluation settings."""
max_iterations: int
max_tokens: int
verification_timeout: int
default_model: str
@dataclass
class LoggingConfig:
"""Logging settings."""
level: str
save_artifacts: bool
artifacts_dir: str
logs_dir: str
@dataclass
class PromptConfig:
"""Prompt settings."""
system_prompt: str
initial_state_template: str
state_update_template: str
@dataclass
class DatasetConfig:
"""Dataset settings."""
name: str
split: str
@dataclass
class PlainConfig:
"""Complete configuration for plain evaluation."""
evaluation: EvaluationConfig
logging: LoggingConfig
prompt: PromptConfig
dataset: DatasetConfig
@classmethod
def from_file(cls, config_path: Path | str | None = None) -> "PlainConfig":
"""Load configuration from TOML file.
Args:
config_path: Path to config.toml. If None, uses default location
(same directory as this module)
Returns:
RawdogConfig instance
"""
if config_path is None:
# Default: config.toml in same directory as this module
config_path = Path(__file__).parent / "config.toml"
else:
config_path = Path(config_path)
if not config_path.exists():
raise FileNotFoundError(f"Config file not found: {config_path}")
with open(config_path, "rb") as f:
data = tomllib.load(f)
return cls(
evaluation=EvaluationConfig(**data["evaluation"]),
logging=LoggingConfig(**data["logging"]),
prompt=PromptConfig(**data["prompt"]),
dataset=DatasetConfig(**data["dataset"]),
)
@classmethod
def default(cls) -> "PlainConfig":
"""Get default configuration (loads from default config.toml location).
Returns:
PlainConfig instance with default settings
"""
return cls.from_file()
# Singleton instance
_config: PlainConfig | None = None
def get_config(reload: bool = False) -> PlainConfig:
"""Get the global configuration instance.
Args:
reload: If True, reload configuration from file
Returns:
PlainConfig instance
"""
global _config
if _config is None or reload:
_config = PlainConfig.default()
return _config
def load_config(config_path: Path | str) -> PlainConfig:
"""Load configuration from a specific path and set as global.
Args:
config_path: Path to config.toml
Returns:
PlainConfig instance
"""
global _config
_config = PlainConfig.from_file(config_path)
return _config
def normalize_model_name(model: str) -> str:
"""Strip 'anthropic/' prefix from model name if present (inspect-ai format).
Args:
model: Model name that may have 'anthropic/' prefix
Returns:
Model name without 'anthropic/' prefix
"""
if model.startswith("anthropic/"):
return model.replace("anthropic/", "")
return model
The get_config function
The system prompt itself is a 185-line behemoth that explains the agent’s task in excruciating detail. It includes descriptions of all the tools, examples of how to use them, and important rules and reminders about Dafny syntax.
This configuration-driven approach makes the agent much more flexible and reusable. We can easily change the agent’s behavior by simply editing the TOML file, without having to touch the code.