Spaces:

hertogateis
/

Table_QandA_v2

Sleeping

App Files Files Community

hertogateis commited on Jan 3

Commit

bc8fae9

verified ·

1 Parent(s): d622429

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -52

app.py CHANGED Viewed

@@ -1,57 +1,92 @@
 import streamlit as st
 import pandas as pd
-import openpyxl
-from io import BytesIO
-from fetaqa import question_answering  # Hypothetical module for FeTaQA logic
-# Cache the DataFrame for performance
-@st.cache(allow_output_mutation=True)
-def load_data(uploaded_file):
-    if uploaded_file.name.endswith('.csv'):
-        df = pd.read_csv(uploaded_file)
-    elif uploaded_file.name.endswith(('.xlsx', '.xls')):
-        df = pd.read_excel(uploaded_file, engine='openpyxl')
-    else:
-        st.error("Unsupported file format. Please upload a CSV or XLSX file.")
-        return None
-    return df
-def main():
-    st.title("FeTaQA Table Question Answering")
-    # File uploader
-    uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=["csv", "xlsx", "xls"])
-    if uploaded_file is not None:
-        df = load_data(uploaded_file)
         if df is not None:
-            st.write("Uploaded Table:")
-            st.dataframe(df)
-            # Question input
-            question = st.text_input("Ask a question about the table:")
-            # Question history
-            if 'question_history' not in st.session_state:
-                st.session_state['question_history'] = []
-            if st.button('Ask'):
-                if question:
-                    answer = question_answering(df, question)
-                    st.write(f"Answer: {answer}")
-                    st.session_state['question_history'].append((question, answer))
-                    # Displaying history
-                    st.write("Question History:")
-                    for q, a in st.session_state['question_history'][-5:]:  # Show last 5 questions
-                        st.write(f"**Q:** {q}")
-                        st.write(f"**A:** {a}")
-                        st.write("---")
-            # Reset history
-            if st.button('Clear History'):
-                st.session_state['question_history'] = []
-if __name__ == "__main__":
-    main()

+import os
 import streamlit as st
+from st_aggrid import AgGrid
 import pandas as pd
+from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
+# Set the page layout for Streamlit
+st.set_page_config(layout="wide")
+# CSS styling
+# ... (keep your existing CSS code)
+# Initialize TAPAS pipeline
+tqa = pipeline(task="table-question-answering",
+              model="google/tapas-large-finetuned-wtq",
+              device="cpu")
+# Initialize T5 tokenizer and model for text generation
+t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
+t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
+# File uploader in the sidebar
+file_name = st.sidebar.file_uploader("Upload file:", type=['csv', 'xlsx'])
+# File processing and question answering
+if file_name is None:
+    st.markdown('<p class="font">Please upload an excel or csv file </p>', unsafe_allow_html=True)
+else:
+    try:
+        # Check file type and handle reading accordingly
+        if file_name.name.endswith('.csv'):
+            df = pd.read_csv(file_name, sep=';', encoding='ISO-8859-1')  # Adjust encoding if needed
+        elif file_name.name.endswith('.xlsx'):
+            df = pd.read_excel(file_name, engine='openpyxl')  # Use openpyxl to read .xlsx files
+        else:
+            st.error("Unsupported file type")
+            df = None
         if df is not None:
+            numeric_columns = df.select_dtypes(include=['object']).columns
+            for col in numeric_columns:
+                df[col] = pd.to_numeric(df[col], errors='ignore')
+            st.write("Original Data:")
+            st.write(df)
+            df_numeric = df.copy()
+            df = df.astype(str)
+            # Display the first 5 rows of the dataframe in an editable grid
+            grid_response = AgGrid(
+                df.head(5),
+                columns_auto_size_mode='FIT_CONTENTS',
+                editable=True,
+                height=300,
+                width='100%',
+            )
+    except Exception as e:
+        st.error(f"Error reading file: {str(e)}")
+    # User input for the question
+    question = st.text_input('Type your question')
+    # Process the answer using TAPAS and T5
+    with st.spinner():
+        if st.button('Answer'):
+            try:
+                raw_answer = tqa(table=df, query=question, truncation=True)
+                st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>",
+                           unsafe_allow_html=True)
+                st.success(raw_answer)
+                answer = raw_answer['answer']
+                aggregator = raw_answer.get('aggregator', '')
+                coordinates = raw_answer.get('coordinates', [])
+                cells = raw_answer.get('cells', [])
+                if aggregator == 'SUM':
+                    # Convert cell values to numbers and sum them
+                    values = [float(cell) for cell in cells if cell.replace('.', '').isdigit()]
+                    total_sum = sum(values)
+                    base_sentence = f"The sum for '{question}' is {total_sum}."
+                else:
+                    # Construct a base sentence for other aggregators or no aggregation
+                    base_sentence = f"The answer from TAPAS for '{question}' is {answer}."
+                    if coordinates and cells:
+                        rows_info = [f"Row {coordinate[0] + 1}, Column '{df.columns[coordinate[1]]}' with value {cell}"
+                                     for coordinate, cell in zip(coordinates, cells)]
+                        rows_description = " and ".join(rows_info)
+                        base_sentence += f" This includes the following data: