rag_app / evaluate.py
iwashuman0405's picture
Upload 9 files
1a1949a verified
from recommendation_engine import prepare_input, scrape_url, get_recommendations
import numpy as np
import streamlit as st
def precision_at_k(preds, relevant, k):
preds_k = preds[:k]
return sum([1 for p in preds_k if p in relevant]) / k
def recall_at_k(preds, relevant, k):
preds_k = preds[:k]
return sum([1 for p in preds_k if p in relevant]) / len(relevant)
def average_precision(preds, relevant, k):
ap = 0
num_relevant = 0
for i in range(min(k, len(preds))):
if preds[i] in relevant:
num_relevant += 1
ap += num_relevant / (i + 1)
return ap / min(len(relevant), k) if relevant else 0
def clean_names(name):
return name.replace("Java Script", "JavaScript")
def evaluate(test_queries, k=3):
recalls, maps = [], []
for item in test_queries:
jd_text = scrape_url(item["url"]) if item["url"] else ""
input_text = prepare_input(item["query"], item["duration"], jd_text)
recommendations = get_recommendations(input_text, top_k=k)
pred_names = [clean_names(rec["name"]) for rec in recommendations]
gt = [clean_names(g) for g in item["relevant_assessments"]]
r = recall_at_k(pred_names, gt, k)
ap = average_precision(pred_names, gt, k)
recalls.append(r)
maps.append(ap)
st.markdown(f"""
**Query:** {item['query']}
**Recall@{k}:** {r:.3f}
**AP@{k}:** {ap:.3f}
---
""")
st.success(f"πŸ“Š Mean Recall@{k}: {np.mean(recalls):.3f}")
st.success(f"πŸ“Š MAP@{k}: {np.mean(maps):.3f}")