Spaces:
Sleeping
Sleeping
File size: 3,223 Bytes
bde8fd2 c2eb3af bde8fd2 7396d90 9ab69d9 bde8fd2 b698d05 bde8fd2 b698d05 bde8fd2 5342147 bde8fd2 c2eb3af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import os
import json
import requests
import streamlit as st
import pandas as pd
from sentence_transformers import CrossEncoder
import numpy as np
np.set_printoptions(suppress=True, precision=4)
model = CrossEncoder('vectara/hallucination_evaluation_model')
pd.set_option('display.width', 100)
pd.set_option('display.max_colwidth', None) # Use None to display full content without truncation
def vectara_query(query: str, config: dict) -> None:
corpus_key = [
{
"customerId": config["customer_id"],
"corpusId": config["corpus_id"],
"lexicalInterpolationConfig": {"lambda": config["lambda_val"]},
}
]
data = {
"query": [
{
"query": query,
"start": 0,
"numResults": config["top_k"],
"contextConfig": {
"sentencesBefore": 2,
"sentencesAfter": 2,
},
"corpusKey": corpus_key,
"summary": [
{
"responseLang": "eng",
"maxSummarizedResults": 5,
}
]
}
]
}
headers = {
"x-api-key": config["api_key"],
"customer-id": config["customer_id"],
"Content-Type": "application/json",
}
response = requests.post(
headers=headers,
url="https://api.vectara.io/v1/query",
data=json.dumps(data),
)
if response.status_code != 200:
print(
"Query failed %s",
f"(code {response.status_code}, reason {response.reason}, details "
f"{response.text})",
)
return []
result = response.json()
responses = result["responseSet"][0]["response"]
documents = result["responseSet"][0]["document"]
summary = result["responseSet"][0]["summary"][0]["text"]
res = [[r['text'], r['score']] for r in responses]
return res, summary
# Set the environment variables
os.environ['VECTARA_API_KEY'] = 'zwt_MD0gpPStP7DARQICFDZ4XIolYlRvi7qYm61HcA'
os.environ['VECTARA_CORPUS_ID'] = '5'
os.environ['VECTARA_CUSTOMER_ID'] = '809312420'
# Load config from environment variables
api_key = os.environ.get("VECTARA_API_KEY", "")
customer_id = os.environ.get("VECTARA_CUSTOMER_ID", "")
corpus_id = os.environ.get("VECTARA_CORPUS_ID", "")
config = {
"api_key": str(api_key),
"customer_id": str(customer_id),
"corpus_id": str(corpus_id),
"lambda_val": 0.025,
"top_k": 10,
}
# Streamlit app
st.title("KitchenCreators App")
# Input for the query
query = st.text_input("Enter your query:", "What does Kitchen Creators do?")
# Button to trigger the query
if st.button("Run Query"):
results, summary = vectara_query(query, config)
# Display summary
st.header("Summary")
st.write(summary)
# Additional processing
st.header("Additional Processing")
# Get texts and scores
texts = [r[0] for r in results[:5]]
scores = [model.predict([text, summary]) for text in texts]
# Create DataFrame
df = pd.DataFrame({'fact': texts, 'HHEM score': scores})
# Display DataFrame
st.write(df)
|