from agensight.eval.test_case import ModelTestCase
from agensight.eval.metrics import ContextualPrecisionMetric
# Example: Refund policy case
test_case = ModelTestCase(
input="What if these shoes don't fit?",
actual_output="We offer a 30-day full refund at no extra cost.",
expected_output="You are eligible for a 30 day full refund at no extra cost.",
retrieval_context=["All customers are eligible for a 30 day full refund at no extra cost."]
)
# Initialize the metric
metric = ContextualPrecisionMetric(
threshold=0.7,
model="gpt-4o",
include_reason=True
)
# Run the evaluation
metric.measure(test_case)
print(metric.score) # e.g. 1.0
print(metric.reason) # e.g. "The context was directly relevant and well-ranked."