Spaces:

holistic-ai
/

explainbility_benchmark

Sleeping

File size: 2,212 Bytes

0c2bd43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0da3235
 
0c2bd43
0da3235
0c2bd43
 
0da3235
0c2bd43
0da3235
 
 
 
 
 
 
 
 
0c2bd43
 
 
0da3235
0c2bd43
0da3235
 
 
 
 
0c2bd43
0da3235
 
 
0c2bd43
 
 
0da3235
 
0c2bd43
0da3235
0c2bd43
0da3235
0c2bd43

import pandas as pd
import streamlit as st
from util.evaluator import evaluator, write_evaluation_commentary
import os

def check_password():
    with st.sidebar:
        password_input = st.text_input("Enter Password:", type="password")
        submit_button = st.button("Submit")
        if submit_button:
            if password_input == os.getenv('PASSWORD'):
                st.session_state['password_verified'] = True
                st.experimental_rerun()
            else:
                st.error("Incorrect Password, please try again.")

def batch_evaluate(uploaded_file):
    # Read the uploaded CSV file into DataFrame
    df = pd.read_csv(uploaded_file)
    eval_instance = evaluator('gpt4-1106')  # Using fixed model name for simplicity
    results = []

    # Process each row in the DataFrame
    for _, row in df.iterrows():
        question = row['question']
        explanation = row['explanation']
        scores = eval_instance(question, explanation)  # Evaluate using the evaluator
        commentary_details = write_evaluation_commentary(scores)  # Generate commentary based on scores
        results.append({
            'Question': question,
            'Explanation': explanation,
            **{detail['Principle']: detail['Score'] for detail in commentary_details}
        })

    return pd.DataFrame(results)

st.title('Natural Language Explanation Demo')

if 'password_verified' not in st.session_state or not st.session_state['password_verified']:
    check_password()
else:
    st.sidebar.success("Password Verified. Proceed with the demo.")
    uploaded_file = st.file_uploader("Upload CSV file with 'question' and 'explanation' columns", type=['csv'])

    if uploaded_file is not None:
        if st.button('Evaluate Explanations'):
            result_df = batch_evaluate(uploaded_file)
            st.write('### Evaluated Results')
            st.dataframe(result_df)

            # Create a CSV download link
            csv = result_df.to_csv(index=False)
            st.download_button(
                label="Download evaluation results as CSV",
                data=csv,
                file_name='evaluated_results.csv',
                mime='text/csv',
            )