from agensight.eval.test_case import ModelTestCase
from agensight.eval.metrics import ContextualRelevancyMetric
# Example: Return policy relevance
test_case = ModelTestCase(
input="What is your return policy?",
actual_output="You can return items within 30 days if unused.",
retrieval_context=[
"Return Policy: Items must be returned within 30 days of purchase",
"Condition: Products must be unused and in original packaging",
"Requirements: Original receipt must be presented",
"Shipping Policy: Free shipping on orders over $50"
]
)
# Initialize the metric
metric = ContextualRelevancyMetric(
threshold=0.7,
model="gpt-4o",
include_reason=True
)
# Evaluate
metric.measure(test_case)
print(metric.score) # e.g. 0.75
print(metric.reason) # e.g. "Most statements are relevant except for shipping info."