import sys
import os
from agensight.eval.metrics import ImageReferenceMetric
from agensight.eval.test_case import MLLMTestCase
from agensight.eval.test_case import MLLMImage
input_data = ["This is a report with an image."]
actual_output = ["The following image shows a cat on a windowsill.", MLLMImage(url="/image-path/cat.jpeg")]
metric = ImageReferenceMetric(model="gpt-4o", threshold=0.5)
test_case = MLLMTestCase(input=input_data, actual_output=actual_output)
metric.measure(test_case)
print(f"Score: {metric.score}")
print(f"Reason: {metric.reason}")