Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,14 +1,19 @@
|
|
1 |
-
import os
|
2 |
import pandas as pd
|
3 |
import streamlit as st
|
4 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Assuming 'df' is the DataFrame you are using and has numeric columns
|
7 |
df_numeric = df.select_dtypes(include='number')
|
8 |
|
9 |
-
# Ensure that `column_name` is defined and valid
|
10 |
-
column_name = None # Make sure this is defined later from TAPAS response
|
11 |
-
|
12 |
# User input for the question
|
13 |
question = st.text_input('Type your question')
|
14 |
|
@@ -17,26 +22,24 @@ with st.spinner():
|
|
17 |
if st.button('Answer'):
|
18 |
try:
|
19 |
# Get the raw answer from TAPAS
|
20 |
-
|
|
|
|
|
|
|
21 |
|
22 |
st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
|
23 |
st.success(raw_answer)
|
24 |
|
25 |
# Extract relevant information from the TAPAS result
|
26 |
-
answer = raw_answer
|
27 |
-
aggregator =
|
28 |
-
coordinates =
|
29 |
-
cells =
|
30 |
-
|
31 |
-
# Extract the column name based on coordinates
|
32 |
-
if coordinates:
|
33 |
-
row, col = coordinates[0] # assuming single cell result
|
34 |
-
column_name = df.columns[col] # Get the column name
|
35 |
|
36 |
# Construct a base sentence replacing 'SUM' with the query term
|
37 |
base_sentence = f"The {question.lower()} of the selected data is {answer}."
|
38 |
if coordinates and cells:
|
39 |
-
rows_info = [f"Row {coordinate[0] + 1}, Column '{df.columns[coordinate[1]]}' with value {cell}"
|
40 |
for coordinate, cell in zip(coordinates, cells)]
|
41 |
rows_description = " and ".join(rows_info)
|
42 |
base_sentence += f" This includes the following data: {rows_description}."
|
@@ -44,11 +47,9 @@ with st.spinner():
|
|
44 |
# Generate a fluent response using the T5 model, rephrasing the base sentence
|
45 |
input_text = f"Given the question: '{question}', generate a more human-readable response: {base_sentence}"
|
46 |
|
47 |
-
# Tokenize the input and generate a fluent response using T5
|
48 |
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
|
49 |
summary_ids = t5_model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
|
50 |
|
51 |
-
# Decode the generated text
|
52 |
generated_text = t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
53 |
|
54 |
# Display the final generated response
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import streamlit as st
|
3 |
+
from transformers import TapasForQuestionAnswering, TapasTokenizer, T5ForConditionalGeneration, T5Tokenizer
|
4 |
+
import torch
|
5 |
+
|
6 |
+
# Load TAPAS model and tokenizer
|
7 |
+
tqa_model = TapasForQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
|
8 |
+
tqa_tokenizer = TapasTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
|
9 |
+
|
10 |
+
# Load T5 model and tokenizer for rephrasing
|
11 |
+
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
|
12 |
+
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
13 |
|
14 |
# Assuming 'df' is the DataFrame you are using and has numeric columns
|
15 |
df_numeric = df.select_dtypes(include='number')
|
16 |
|
|
|
|
|
|
|
17 |
# User input for the question
|
18 |
question = st.text_input('Type your question')
|
19 |
|
|
|
22 |
if st.button('Answer'):
|
23 |
try:
|
24 |
# Get the raw answer from TAPAS
|
25 |
+
inputs = tqa_tokenizer(table=df, query=question, return_tensors="pt")
|
26 |
+
with torch.no_grad():
|
27 |
+
outputs = tqa_model(**inputs)
|
28 |
+
raw_answer = tqa_tokenizer.decode(outputs.logits.argmax(dim=-1), skip_special_tokens=True)
|
29 |
|
30 |
st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
|
31 |
st.success(raw_answer)
|
32 |
|
33 |
# Extract relevant information from the TAPAS result
|
34 |
+
answer = raw_answer
|
35 |
+
aggregator = "average" # Example aggregator, adjust based on raw_answer if needed
|
36 |
+
coordinates = [] # Example, adjust based on raw_answer
|
37 |
+
cells = [] # Example, adjust based on raw_answer
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
# Construct a base sentence replacing 'SUM' with the query term
|
40 |
base_sentence = f"The {question.lower()} of the selected data is {answer}."
|
41 |
if coordinates and cells:
|
42 |
+
rows_info = [f"Row {coordinate[0] + 1}, Column '{df.columns[coordinate[1]]}' with value {cell}"
|
43 |
for coordinate, cell in zip(coordinates, cells)]
|
44 |
rows_description = " and ".join(rows_info)
|
45 |
base_sentence += f" This includes the following data: {rows_description}."
|
|
|
47 |
# Generate a fluent response using the T5 model, rephrasing the base sentence
|
48 |
input_text = f"Given the question: '{question}', generate a more human-readable response: {base_sentence}"
|
49 |
|
|
|
50 |
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
|
51 |
summary_ids = t5_model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
|
52 |
|
|
|
53 |
generated_text = t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
54 |
|
55 |
# Display the final generated response
|