hertogateis commited on
Commit
4665c84
·
verified ·
1 Parent(s): 7797cc9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -18
app.py CHANGED
@@ -1,14 +1,19 @@
1
- import os
2
  import pandas as pd
3
  import streamlit as st
4
- from tapas import tqa, t5_tokenizer, t5_model
 
 
 
 
 
 
 
 
 
5
 
6
  # Assuming 'df' is the DataFrame you are using and has numeric columns
7
  df_numeric = df.select_dtypes(include='number')
8
 
9
- # Ensure that `column_name` is defined and valid
10
- column_name = None # Make sure this is defined later from TAPAS response
11
-
12
  # User input for the question
13
  question = st.text_input('Type your question')
14
 
@@ -17,26 +22,24 @@ with st.spinner():
17
  if st.button('Answer'):
18
  try:
19
  # Get the raw answer from TAPAS
20
- raw_answer = tqa(table=df, query=question, truncation=True)
 
 
 
21
 
22
  st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
23
  st.success(raw_answer)
24
 
25
  # Extract relevant information from the TAPAS result
26
- answer = raw_answer['answer']
27
- aggregator = raw_answer.get('aggregator', '')
28
- coordinates = raw_answer.get('coordinates', [])
29
- cells = raw_answer.get('cells', [])
30
-
31
- # Extract the column name based on coordinates
32
- if coordinates:
33
- row, col = coordinates[0] # assuming single cell result
34
- column_name = df.columns[col] # Get the column name
35
 
36
  # Construct a base sentence replacing 'SUM' with the query term
37
  base_sentence = f"The {question.lower()} of the selected data is {answer}."
38
  if coordinates and cells:
39
- rows_info = [f"Row {coordinate[0] + 1}, Column '{df.columns[coordinate[1]]}' with value {cell}"
40
  for coordinate, cell in zip(coordinates, cells)]
41
  rows_description = " and ".join(rows_info)
42
  base_sentence += f" This includes the following data: {rows_description}."
@@ -44,11 +47,9 @@ with st.spinner():
44
  # Generate a fluent response using the T5 model, rephrasing the base sentence
45
  input_text = f"Given the question: '{question}', generate a more human-readable response: {base_sentence}"
46
 
47
- # Tokenize the input and generate a fluent response using T5
48
  inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
49
  summary_ids = t5_model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
50
 
51
- # Decode the generated text
52
  generated_text = t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
53
 
54
  # Display the final generated response
 
 
1
  import pandas as pd
2
  import streamlit as st
3
+ from transformers import TapasForQuestionAnswering, TapasTokenizer, T5ForConditionalGeneration, T5Tokenizer
4
+ import torch
5
+
6
+ # Load TAPAS model and tokenizer
7
+ tqa_model = TapasForQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
8
+ tqa_tokenizer = TapasTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
9
+
10
+ # Load T5 model and tokenizer for rephrasing
11
+ t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
12
+ t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
13
 
14
  # Assuming 'df' is the DataFrame you are using and has numeric columns
15
  df_numeric = df.select_dtypes(include='number')
16
 
 
 
 
17
  # User input for the question
18
  question = st.text_input('Type your question')
19
 
 
22
  if st.button('Answer'):
23
  try:
24
  # Get the raw answer from TAPAS
25
+ inputs = tqa_tokenizer(table=df, query=question, return_tensors="pt")
26
+ with torch.no_grad():
27
+ outputs = tqa_model(**inputs)
28
+ raw_answer = tqa_tokenizer.decode(outputs.logits.argmax(dim=-1), skip_special_tokens=True)
29
 
30
  st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
31
  st.success(raw_answer)
32
 
33
  # Extract relevant information from the TAPAS result
34
+ answer = raw_answer
35
+ aggregator = "average" # Example aggregator, adjust based on raw_answer if needed
36
+ coordinates = [] # Example, adjust based on raw_answer
37
+ cells = [] # Example, adjust based on raw_answer
 
 
 
 
 
38
 
39
  # Construct a base sentence replacing 'SUM' with the query term
40
  base_sentence = f"The {question.lower()} of the selected data is {answer}."
41
  if coordinates and cells:
42
+ rows_info = [f"Row {coordinate[0] + 1}, Column '{df.columns[coordinate[1]]}' with value {cell}"
43
  for coordinate, cell in zip(coordinates, cells)]
44
  rows_description = " and ".join(rows_info)
45
  base_sentence += f" This includes the following data: {rows_description}."
 
47
  # Generate a fluent response using the T5 model, rephrasing the base sentence
48
  input_text = f"Given the question: '{question}', generate a more human-readable response: {base_sentence}"
49
 
 
50
  inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
51
  summary_ids = t5_model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
52
 
 
53
  generated_text = t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
54
 
55
  # Display the final generated response