from trusttest.knowledge_base import InMemoryKnowledgeBase
from trusttest.probes.rag import RAGProbe
from trusttest.targets.http import HttpTarget, PayloadConfig
from trusttest.evaluators.llm_judges import CorrectnessEvaluator
from trusttest.evaluator_suite import EvaluatorSuite
from trusttest.evaluation_scenarios import EvaluationScenario
# Your document chunks
documents = [
"TrustTest is a framework for testing AI models for safety and reliability.",
"TrustTest supports multiple knowledge base connectors including Azure, Neo4j, and PostgreSQL.",
"Probes in TrustTest generate test cases to evaluate model behavior.",
]
# Create knowledge base
kb = InMemoryKnowledgeBase(documents=documents)
# Configure target
target = HttpTarget(
url="https://your-rag-endpoint.com/chat",
headers={"Content-Type": "application/json"},
payload_config=PayloadConfig(
format={"messages": [{"role": "user", "content": "{{ test }}"}]},
message_regex="{{ test }}",
),
)
# Create RAG probe
probe = RAGProbe(
target=target,
knowledge_base=kb,
num_questions=20,
)
# Generate test set
test_set = probe.get_test_set()
# Evaluate with correctness judge
evaluator = CorrectnessEvaluator()
suite = EvaluatorSuite(evaluators=[evaluator])
scenario = EvaluationScenario(evaluator_suite=suite)
results = scenario.evaluate(test_set)
results.display_summary()