from trusttest.dataset_builder.single_prompt import SinglePromptDatasetBuilder, DatasetItem
from trusttest.probes.dataset import PromptDatasetProbe
from trusttest.evaluation_contexts import ExpectedResponseContext
from trusttest.targets.http import HttpTarget, PayloadConfig
from trusttest.evaluators.llm_judges import CorrectnessEvaluator
from trusttest.evaluator_suite import EvaluatorSuite
from trusttest.evaluation_scenarios import EvaluationScenario
# Configure target
target = HttpTarget(
url="https://your-model-endpoint.com/chat",
headers={"Content-Type": "application/json"},
payload_config=PayloadConfig(
format={"messages": [{"role": "user", "content": "{{ test }}"}]},
message_regex="{{ test }}",
),
)
# Create dataset builder
builder = SinglePromptDatasetBuilder(
instructions="""
Generate questions that a customer might ask a support chatbot for an e-commerce platform.
Include questions about:
- Order status and tracking
- Returns and refunds
- Product information
- Account management
- Shipping options
Each question should be realistic and varied.
""",
examples=[
DatasetItem(
question="Where is my order #12345?",
context=ExpectedResponseContext(
expected_response="I can help you track your order. Please provide your order number and I'll look up the current status."
),
),
DatasetItem(
question="How do I return a defective item?",
context=ExpectedResponseContext(
expected_response="To return a defective item, go to your Orders page, select the item, and click 'Return'. We'll provide a prepaid shipping label."
),
),
],
context_type=ExpectedResponseContext,
language="English",
num_items=50,
)
# Create probe
probe = PromptDatasetProbe(target=target, dataset_builder=builder)
# Generate test set
test_set = probe.get_test_set()
# Evaluate
evaluator = CorrectnessEvaluator()
suite = EvaluatorSuite(evaluators=[evaluator])
scenario = EvaluationScenario(evaluator_suite=suite)
results = scenario.evaluate(test_set)
results.display_summary()