from agensight.eval.test_case import ModelTestCase
from agensight.eval.metrics import ContextualRecallMetric
# Example: Product Description
test_case = ModelTestCase(
input="What are the display size and weight of this laptop?",
actual_output="The laptop has a 15.6-inch display and weighs 2.5kg.",
expected_output="This laptop features a 15.6-inch Full HD display and weighs 2.5 kilograms.",
retrieval_context=[
"Display: 15.6-inch Full HD (1920x1080) IPS panel",
"Weight: 2.5kg",
"Battery life: Up to 8 hours"
]
)
# Initialize the metric
metric = ContextualRecallMetric(
threshold=0.7,
model="gpt-4o",
include_reason=True
)
# Run evaluation
metric.measure(test_case)
print(metric.score) # e.g. 1.0
print(metric.reason) # e.g. "All expected information is covered in the context."