Spaces:

hertogateis
/

Table_QandA_v2

Sleeping

App Files Files Community

hertogateis commited on Jan 3

Commit

4665c84

verified ·

1 Parent(s): 7797cc9

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -18

app.py CHANGED Viewed

@@ -1,14 +1,19 @@
-import os
 import pandas as pd
 import streamlit as st
-from tapas import tqa, t5_tokenizer, t5_model
 # Assuming 'df' is the DataFrame you are using and has numeric columns
 df_numeric = df.select_dtypes(include='number')
-# Ensure that `column_name` is defined and valid
-column_name = None  # Make sure this is defined later from TAPAS response
 # User input for the question
 question = st.text_input('Type your question')
@@ -17,26 +22,24 @@ with st.spinner():
     if st.button('Answer'):
         try:
             # Get the raw answer from TAPAS
-            raw_answer = tqa(table=df, query=question, truncation=True)
             st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
             st.success(raw_answer)
             # Extract relevant information from the TAPAS result
-            answer = raw_answer['answer']
-            aggregator = raw_answer.get('aggregator', '')
-            coordinates = raw_answer.get('coordinates', [])
-            cells = raw_answer.get('cells', [])
-            # Extract the column name based on coordinates
-            if coordinates:
-                row, col = coordinates[0]  # assuming single cell result
-                column_name = df.columns[col]  # Get the column name
             # Construct a base sentence replacing 'SUM' with the query term
             base_sentence = f"The {question.lower()} of the selected data is {answer}."
             if coordinates and cells:
-                rows_info = [f"Row {coordinate[0] + 1}, Column '{df.columns[coordinate[1]]}' with value {cell}"
                              for coordinate, cell in zip(coordinates, cells)]
                 rows_description = " and ".join(rows_info)
                 base_sentence += f" This includes the following data: {rows_description}."
@@ -44,11 +47,9 @@ with st.spinner():
             # Generate a fluent response using the T5 model, rephrasing the base sentence
             input_text = f"Given the question: '{question}', generate a more human-readable response: {base_sentence}"
-            # Tokenize the input and generate a fluent response using T5
             inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
             summary_ids = t5_model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
-            # Decode the generated text
             generated_text = t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
             # Display the final generated response

 import pandas as pd
 import streamlit as st
+from transformers import TapasForQuestionAnswering, TapasTokenizer, T5ForConditionalGeneration, T5Tokenizer
+import torch
+# Load TAPAS model and tokenizer
+tqa_model = TapasForQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
+tqa_tokenizer = TapasTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
+# Load T5 model and tokenizer for rephrasing
+t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
+t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
 # Assuming 'df' is the DataFrame you are using and has numeric columns
 df_numeric = df.select_dtypes(include='number')
 # User input for the question
 question = st.text_input('Type your question')
     if st.button('Answer'):
         try:
             # Get the raw answer from TAPAS
+            inputs = tqa_tokenizer(table=df, query=question, return_tensors="pt")
+            with torch.no_grad():
+                outputs = tqa_model(**inputs)
+                raw_answer = tqa_tokenizer.decode(outputs.logits.argmax(dim=-1), skip_special_tokens=True)
             st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
             st.success(raw_answer)
             # Extract relevant information from the TAPAS result
+            answer = raw_answer
+            aggregator = "average"  # Example aggregator, adjust based on raw_answer if needed
+            coordinates = []  # Example, adjust based on raw_answer
+            cells = []  # Example, adjust based on raw_answer
             # Construct a base sentence replacing 'SUM' with the query term
             base_sentence = f"The {question.lower()} of the selected data is {answer}."
             if coordinates and cells:
+                rows_info = [f"Row {coordinate[0] + 1}, Column '{df.columns[coordinate[1]]}' with value {cell}"
                              for coordinate, cell in zip(coordinates, cells)]
                 rows_description = " and ".join(rows_info)
                 base_sentence += f" This includes the following data: {rows_description}."
             # Generate a fluent response using the T5 model, rephrasing the base sentence
             input_text = f"Given the question: '{question}', generate a more human-readable response: {base_sentence}"
             inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
             summary_ids = t5_model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
             generated_text = t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
             # Display the final generated response