The Correctness Evaluator is a specialized tool designed to assess the accuracy of responses by comparing them against expected or ground truth responses. It uses an LLM (Large Language Model) as a judge to determine how well an actual response matches the expected response.
import asynciofrom trusttest.evaluation_contexts import ExpectedResponseContextfrom trusttest.evaluators import CorrectnessEvaluatorasync def evaluate(): evaluator = CorrectnessEvaluator() result = await evaluator.evaluate( response="What is the capital of Osona?", context=ExpectedResponseContext( expected_response="The capital of Osona is Vic." ) ) print(result)if __name__ == "__main__": asyncio.run(evaluate())