import os
from dotenv import load_dotenv
import trusttest
from trusttest.dataset_builder import Dataset
from trusttest.evaluation_scenarios import EvaluationScenario
from trusttest.evaluator_suite import EvaluatorSuite
from trusttest.evaluators import (
CompletenessEvaluator,
CorrectnessEvaluator,
ToneEvaluator,
)
from trusttest.targets.http import HttpTarget, PayloadConfig
from trusttest.probes import DatasetProbe
load_dotenv(override=True)
target = HttpTarget(
url="https://chat.neuraltrust.ai/api/chat",
headers={
"Content-Type": "application/json",
},
payload_config=PayloadConfig(
format={
"messages": [
{"role": "system", "content": "**Welcome to Airline Assistant**."},
{"role": "user", "content": "{{ test }}"},
]
},
message_regex="{{ test }}",
),
concatenate_field=".",
)
scenario = EvaluationScenario(
name="Functional Test",
description="Functional test example.",
evaluator_suite=EvaluatorSuite(
evaluators=[
CorrectnessEvaluator(),
ToneEvaluator(),
CompletenessEvaluator(),
],
criteria="any_fail",
),
)
dataset_path = "data/qa_dataset.json"
dataset = Dataset.from_json(path=dataset_path)
test_set = DatasetProbe(target=target, dataset=dataset).get_test_set()
results = scenario.evaluate(test_set)
results.display()