hertogateis commited on
Commit
04ac291
·
verified ·
1 Parent(s): 5179695

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -47
app.py CHANGED
@@ -19,8 +19,8 @@ t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
19
 
20
  # Title and Introduction
21
  st.title("Table Question Answering and Data Analysis App")
22
- st.markdown("""
23
- This app allows you to upload a table (CSV or Excel) and ask questions about the data.
24
  Based on your question, it will provide the corresponding answer using the **TAPAS** model and additional data processing.
25
 
26
  ### Available Features:
@@ -54,10 +54,7 @@ else:
54
  if df is not None:
55
  numeric_columns = df.select_dtypes(include=['object']).columns
56
  for col in numeric_columns:
57
- try:
58
- df[col] = pd.to_numeric(df[col])
59
- except ValueError:
60
- st.warning(f"Column '{col}' contains non-numeric values that could not be converted.")
61
 
62
  st.write("Original Data:")
63
  st.write(df)
@@ -68,10 +65,10 @@ else:
68
  # Display the first 5 rows of the dataframe in an editable grid
69
  grid_response = AgGrid(
70
  df.head(5),
 
71
  editable=True,
72
  height=300,
73
  width='100%',
74
- fit_columns_on_grid_load=True # Correct option for auto-sizing
75
  )
76
 
77
  except Exception as e:
@@ -80,10 +77,9 @@ else:
80
  # User input for the question
81
  question = st.text_input('Type your question')
82
 
83
- # Initialize a flag to check for graph-related queries
84
  is_graph_query = False
85
 
86
- # Check if the question is about generating a graph
87
  if 'graph' in question.lower():
88
  is_graph_query = True
89
 
@@ -91,7 +87,43 @@ else:
91
  with st.spinner():
92
  if st.button('Answer'):
93
  try:
94
- if is_graph_query:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # Handle graph-related questions
96
  if 'between' in question.lower() and 'and' in question.lower():
97
  columns = question.split('between')[-1].split('and')
@@ -110,43 +142,7 @@ else:
110
  st.success(f"Here is the graph of column '{column}'.")
111
  else:
112
  st.warning(f"Column '{column}' not found in the data.")
113
-
114
- # Skip the TAPAS processing if it's a graph query
115
- st.stop() # This ensures the code halts and avoids further processing
116
-
117
- # Process TAPAS-related questions if it's not a graph query
118
- raw_answer = tqa(table=df, query=question, truncation=True)
119
-
120
- st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
121
- st.success(raw_answer)
122
-
123
- answer = raw_answer['answer']
124
- aggregator = raw_answer.get('aggregator', '')
125
- coordinates = raw_answer.get('coordinates', [])
126
- cells = raw_answer.get('cells', [])
127
-
128
- # Fix the formatting error by ensuring numeric values
129
- if 'average' in question.lower() or aggregator == 'AVG':
130
- avg_value = df.mean().mean() # Calculate overall average
131
- base_sentence = f"The average for '{question}' is {avg_value:.2f}."
132
- elif 'sum' in question.lower() or aggregator == 'SUM':
133
- total_sum = df.sum().sum() # Calculate overall sum
134
- base_sentence = f"The sum for '{question}' is {total_sum:.2f}."
135
- elif 'max' in question.lower() or aggregator == 'MAX':
136
- max_value = df.max().max() # Find overall max value
137
- base_sentence = f"The maximum value for '{question}' is {max_value:.2f}."
138
- elif 'min' in question.lower() or aggregator == 'MIN':
139
- min_value = df.min().min() # Find overall min value
140
- base_sentence = f"The minimum value for '{question}' is {min_value:.2f}."
141
- elif 'count' in question.lower() or aggregator == 'COUNT':
142
- count_value = df.count().sum() # Count all values
143
- base_sentence = f"The total count of non-null values for '{question}' is {count_value}."
144
- else:
145
- base_sentence = f"The answer from TAPAS for '{question}' is {answer}."
146
-
147
- # Display the final response
148
- st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Final Generated Response: </p>", unsafe_allow_html=True)
149
- st.success(base_sentence)
150
 
151
  except Exception as e:
152
  st.warning(f"Error processing question or generating answer: {str(e)}")
 
19
 
20
  # Title and Introduction
21
  st.title("Table Question Answering and Data Analysis App")
22
+ st.markdown("""
23
+ This app allows you to upload a table (CSV or Excel) and ask questions about the data.
24
  Based on your question, it will provide the corresponding answer using the **TAPAS** model and additional data processing.
25
 
26
  ### Available Features:
 
54
  if df is not None:
55
  numeric_columns = df.select_dtypes(include=['object']).columns
56
  for col in numeric_columns:
57
+ df[col] = pd.to_numeric(df[col], errors='ignore')
 
 
 
58
 
59
  st.write("Original Data:")
60
  st.write(df)
 
65
  # Display the first 5 rows of the dataframe in an editable grid
66
  grid_response = AgGrid(
67
  df.head(5),
68
+ columns_auto_size_mode='FIT_CONTENTS',
69
  editable=True,
70
  height=300,
71
  width='100%',
 
72
  )
73
 
74
  except Exception as e:
 
77
  # User input for the question
78
  question = st.text_input('Type your question')
79
 
80
+ # Check if the question is about generating a graph
81
  is_graph_query = False
82
 
 
83
  if 'graph' in question.lower():
84
  is_graph_query = True
85
 
 
87
  with st.spinner():
88
  if st.button('Answer'):
89
  try:
90
+ if not is_graph_query:
91
+ # Process TAPAS-related questions if it's not a graph query
92
+ raw_answer = tqa(table=df, query=question, truncation=True)
93
+
94
+ # Display raw answer from TAPAS
95
+ st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
96
+ st.write(raw_answer) # Display the raw result
97
+
98
+ answer = raw_answer['answer']
99
+ aggregator = raw_answer.get('aggregator', '')
100
+ coordinates = raw_answer.get('coordinates', [])
101
+ cells = raw_answer.get('cells', [])
102
+
103
+ # Handle different aggregators
104
+ if 'average' in question.lower() or aggregator == 'AVG':
105
+ avg_value = df.mean().mean() # Calculate overall average
106
+ base_sentence = f"The average for '{question}' is {avg_value:.2f}."
107
+ elif 'sum' in question.lower() or aggregator == 'SUM':
108
+ total_sum = df.sum().sum() # Calculate overall sum
109
+ base_sentence = f"The sum for '{question}' is {total_sum:.2f}."
110
+ elif 'max' in question.lower() or aggregator == 'MAX':
111
+ max_value = df.max().max() # Find overall max value
112
+ base_sentence = f"The maximum value for '{question}' is {max_value:.2f}."
113
+ elif 'min' in question.lower() or aggregator == 'MIN':
114
+ min_value = df.min().min() # Find overall min value
115
+ base_sentence = f"The minimum value for '{question}' is {min_value:.2f}."
116
+ elif 'count' in question.lower() or aggregator == 'COUNT':
117
+ count_value = df.count().sum() # Count all values
118
+ base_sentence = f"The total count of non-null values for '{question}' is {count_value}."
119
+ else:
120
+ base_sentence = f"The answer from TAPAS for '{question}' is {answer}."
121
+
122
+ # Display the final response
123
+ st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Final Generated Response: </p>", unsafe_allow_html=True)
124
+ st.success(base_sentence)
125
+
126
+ else:
127
  # Handle graph-related questions
128
  if 'between' in question.lower() and 'and' in question.lower():
129
  columns = question.split('between')[-1].split('and')
 
142
  st.success(f"Here is the graph of column '{column}'.")
143
  else:
144
  st.warning(f"Column '{column}' not found in the data.")
145
+ return # Skip TAPAS processing for graph-related queries
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  except Exception as e:
148
  st.warning(f"Error processing question or generating answer: {str(e)}")