from trusttest.probes.dataset import DatasetProbe
from trusttest.dataset_builder.base import Dataset
from trusttest.targets.http import HttpTarget, PayloadConfig
from trusttest.evaluators.llm_judges import CorrectnessEvaluator
from trusttest.evaluator_suite import EvaluatorSuite
from trusttest.evaluation_scenarios import EvaluationScenario
# Configure target
target = HttpTarget(
url="https://your-model-endpoint.com/chat",
headers={"Content-Type": "application/json"},
payload_config=PayloadConfig(
format={"messages": [{"role": "user", "content": "{{ test }}"}]},
message_regex="{{ test }}",
),
)
# Load dataset
dataset = Dataset.from_yaml("functional_tests.yaml")
# Create probe
probe = DatasetProbe(target=target, dataset=dataset)
# Generate test set
test_set = probe.get_test_set()
# Evaluate
evaluator = CorrectnessEvaluator()
suite = EvaluatorSuite(evaluators=[evaluator])
scenario = EvaluationScenario(evaluator_suite=suite)
results = scenario.evaluate(test_set)
results.display_summary()