Spaces:

hertogateis
/

Table_QandA_V3

Sleeping

App Files Files Community

hertogateis commited on Jan 4

Commit

04ac291

verified ·

1 Parent(s): 5179695

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -47

app.py CHANGED Viewed

@@ -19,8 +19,8 @@ t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
 # Title and Introduction
 st.title("Table Question Answering and Data Analysis App")
-st.markdown("""
-    This app allows you to upload a table (CSV or Excel) and ask questions about the data.
     Based on your question, it will provide the corresponding answer using the **TAPAS** model and additional data processing.
     ### Available Features:
@@ -54,10 +54,7 @@ else:
         if df is not None:
             numeric_columns = df.select_dtypes(include=['object']).columns
             for col in numeric_columns:
-                try:
-                    df[col] = pd.to_numeric(df[col])
-                except ValueError:
-                    st.warning(f"Column '{col}' contains non-numeric values that could not be converted.")
             st.write("Original Data:")
             st.write(df)
@@ -68,10 +65,10 @@ else:
             # Display the first 5 rows of the dataframe in an editable grid
             grid_response = AgGrid(
                 df.head(5),
                 editable=True,
                 height=300,
                 width='100%',
-                fit_columns_on_grid_load=True  # Correct option for auto-sizing
             )
     except Exception as e:
@@ -80,10 +77,9 @@ else:
     # User input for the question
     question = st.text_input('Type your question')
-    # Initialize a flag to check for graph-related queries
     is_graph_query = False
-    # Check if the question is about generating a graph
     if 'graph' in question.lower():
         is_graph_query = True
@@ -91,7 +87,43 @@ else:
     with st.spinner():
         if st.button('Answer'):
             try:
-                if is_graph_query:
                     # Handle graph-related questions
                     if 'between' in question.lower() and 'and' in question.lower():
                         columns = question.split('between')[-1].split('and')
@@ -110,43 +142,7 @@ else:
                             st.success(f"Here is the graph of column '{column}'.")
                         else:
                             st.warning(f"Column '{column}' not found in the data.")
-                    # Skip the TAPAS processing if it's a graph query
-                    st.stop()  # This ensures the code halts and avoids further processing
-                # Process TAPAS-related questions if it's not a graph query
-                raw_answer = tqa(table=df, query=question, truncation=True)
-                st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
-                st.success(raw_answer)
-                answer = raw_answer['answer']
-                aggregator = raw_answer.get('aggregator', '')
-                coordinates = raw_answer.get('coordinates', [])
-                cells = raw_answer.get('cells', [])
-                # Fix the formatting error by ensuring numeric values
-                if 'average' in question.lower() or aggregator == 'AVG':
-                    avg_value = df.mean().mean()  # Calculate overall average
-                    base_sentence = f"The average for '{question}' is {avg_value:.2f}."
-                elif 'sum' in question.lower() or aggregator == 'SUM':
-                    total_sum = df.sum().sum()  # Calculate overall sum
-                    base_sentence = f"The sum for '{question}' is {total_sum:.2f}."
-                elif 'max' in question.lower() or aggregator == 'MAX':
-                    max_value = df.max().max()  # Find overall max value
-                    base_sentence = f"The maximum value for '{question}' is {max_value:.2f}."
-                elif 'min' in question.lower() or aggregator == 'MIN':
-                    min_value = df.min().min()  # Find overall min value
-                    base_sentence = f"The minimum value for '{question}' is {min_value:.2f}."
-                elif 'count' in question.lower() or aggregator == 'COUNT':
-                    count_value = df.count().sum()  # Count all values
-                    base_sentence = f"The total count of non-null values for '{question}' is {count_value}."
-                else:
-                    base_sentence = f"The answer from TAPAS for '{question}' is {answer}."
-                # Display the final response
-                st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Final Generated Response: </p>", unsafe_allow_html=True)
-                st.success(base_sentence)
             except Exception as e:
                 st.warning(f"Error processing question or generating answer: {str(e)}")

 # Title and Introduction
 st.title("Table Question Answering and Data Analysis App")
+st.markdown("""
+    This app allows you to upload a table (CSV or Excel) and ask questions about the data.
     Based on your question, it will provide the corresponding answer using the **TAPAS** model and additional data processing.
     ### Available Features:
         if df is not None:
             numeric_columns = df.select_dtypes(include=['object']).columns
             for col in numeric_columns:
+                df[col] = pd.to_numeric(df[col], errors='ignore')
             st.write("Original Data:")
             st.write(df)
             # Display the first 5 rows of the dataframe in an editable grid
             grid_response = AgGrid(
                 df.head(5),
+                columns_auto_size_mode='FIT_CONTENTS',
                 editable=True,
                 height=300,
                 width='100%',
             )
     except Exception as e:
     # User input for the question
     question = st.text_input('Type your question')
+    # Check if the question is about generating a graph
     is_graph_query = False
     if 'graph' in question.lower():
         is_graph_query = True
     with st.spinner():
         if st.button('Answer'):
             try:
+                if not is_graph_query:
+                    # Process TAPAS-related questions if it's not a graph query
+                    raw_answer = tqa(table=df, query=question, truncation=True)
+                    # Display raw answer from TAPAS
+                    st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
+                    st.write(raw_answer)  # Display the raw result
+                    answer = raw_answer['answer']
+                    aggregator = raw_answer.get('aggregator', '')
+                    coordinates = raw_answer.get('coordinates', [])
+                    cells = raw_answer.get('cells', [])
+                    # Handle different aggregators
+                    if 'average' in question.lower() or aggregator == 'AVG':
+                        avg_value = df.mean().mean()  # Calculate overall average
+                        base_sentence = f"The average for '{question}' is {avg_value:.2f}."
+                    elif 'sum' in question.lower() or aggregator == 'SUM':
+                        total_sum = df.sum().sum()  # Calculate overall sum
+                        base_sentence = f"The sum for '{question}' is {total_sum:.2f}."
+                    elif 'max' in question.lower() or aggregator == 'MAX':
+                        max_value = df.max().max()  # Find overall max value
+                        base_sentence = f"The maximum value for '{question}' is {max_value:.2f}."
+                    elif 'min' in question.lower() or aggregator == 'MIN':
+                        min_value = df.min().min()  # Find overall min value
+                        base_sentence = f"The minimum value for '{question}' is {min_value:.2f}."
+                    elif 'count' in question.lower() or aggregator == 'COUNT':
+                        count_value = df.count().sum()  # Count all values
+                        base_sentence = f"The total count of non-null values for '{question}' is {count_value}."
+                    else:
+                        base_sentence = f"The answer from TAPAS for '{question}' is {answer}."
+                    # Display the final response
+                    st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Final Generated Response: </p>", unsafe_allow_html=True)
+                    st.success(base_sentence)
+                else:
                     # Handle graph-related questions
                     if 'between' in question.lower() and 'and' in question.lower():
                         columns = question.split('between')[-1].split('and')
                             st.success(f"Here is the graph of column '{column}'.")
                         else:
                             st.warning(f"Column '{column}' not found in the data.")
+                    return  # Skip TAPAS processing for graph-related queries
             except Exception as e:
                 st.warning(f"Error processing question or generating answer: {str(e)}")