explainbility_benchmark / pages /2_batch_evaluation.py
Zekun Wu
update
b0eb8db
raw
history blame
3.18 kB
import os
import pandas as pd
import streamlit as st
from util.evaluator import evaluator, write_evaluation_commentary
# Function to check password
def check_password():
def password_entered():
if password_input == os.getenv('PASSWORD'):
st.session_state['password_correct'] = True
else:
st.error("Incorrect Password, please try again.")
password_input = st.text_input("Enter Password:", type="password")
submit_button = st.button("Submit", on_click=password_entered)
if submit_button and not st.session_state.get('password_correct', False):
st.error("Please enter a valid password to access the demo.")
# Function to batch evaluate explanations
def batch_evaluate(uploaded_file):
df = pd.read_csv(uploaded_file)
eval_instance = evaluator('gpt4-1106') # Assuming fixed model name for simplicity
total_rows = len(df)
results = []
# Add a progress bar
progress_bar = st.progress(0)
for index, row in enumerate(df.itertuples(), start=1):
question = row.question
explanation = row.explanation
scores = eval_instance(question, explanation) # Evaluate using the evaluator
commentary_details = write_evaluation_commentary(scores) # Generate commentary based on scores
results.append({
'Question': question,
'Explanation': explanation,
**{detail['Principle']: detail['Score'] for detail in commentary_details}
})
# Update progress bar
progress_bar.progress(index / total_rows)
return pd.DataFrame(results)
# Title of the application
st.title('Natural Language Explanation Demo')
# Check if password has been validated
if not st.session_state.get('password_correct', False):
check_password()
else:
st.sidebar.success("Password Verified. Proceed with the demo.")
st.write("""
### Instructions for Uploading CSV
Please upload a CSV file with the following columns:
- `question`: The question you want evaluated.
- `explanation`: The explanation corresponding to the question.
**Example CSV Format:**
```
question,explanation
"What causes rainbows to appear in the sky?","Rainbows appear when sunlight is refracted, dispersed, and reflected inside water droplets in the atmosphere, resulting in a spectrum of light appearing in the sky."
"Why is the sky blue?","The sky is blue because molecules in the air scatter blue light from the sun more than they scatter red light."
```
""")
uploaded_file = st.file_uploader("Upload CSV file with 'question' and 'explanation' columns", type=['csv'])
if uploaded_file is not None:
if st.button('Evaluate Explanations'):
result_df = batch_evaluate(uploaded_file)
st.write('### Evaluated Results')
st.dataframe(result_df)
# Create a CSV download link
csv = result_df.to_csv(index=False)
st.download_button(
label="Download evaluation results as CSV",
data=csv,
file_name='evaluated_results.csv',
mime='text/csv',
)