Spaces:

fschwartzer
/

streamlit_chatbot

Running

App Files Files Community

fschwartzer commited on Sep 24, 2024

Commit

7212b4f

verified ·

1 Parent(s): c898242

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -22

app.py CHANGED Viewed

@@ -26,17 +26,23 @@ html_content = f"""
 # Aplicar o markdown combinado no Streamlit
 st.markdown(html_content, unsafe_allow_html=True)
-# Inicialização de variáveis de estado
-if 'all_anomalies' not in st.session_state:
-    st.session_state['all_anomalies'] = pd.DataFrame()
-if 'history' not in st.session_state:
-    st.session_state['history'] = []
-# Carregar os modelos de tradução e TAPEX
-pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
-en_pt_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-en-pt-t5")
-tapex_model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
-tapex_tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
 tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
 def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
@@ -45,6 +51,7 @@ def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
     translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return translated_text
 def response(user_question, table_data):
     question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
     encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
@@ -53,6 +60,7 @@ def response(user_question, table_data):
     response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
     return response_pt
 def load_data(uploaded_file):
     if uploaded_file.name.endswith('.csv'):
         df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
@@ -95,6 +103,8 @@ def preprocess_data(df):
     df_clean = new_df.copy()
     return df_clean
 def apply_prophet(df_clean):
     if df_clean.empty:
         st.error("DataFrame está vazio após o pré-processamento.")
@@ -156,6 +166,13 @@ def apply_prophet(df_clean):
     # Return the dataframe of all anomalies
     return all_anomalies
 tab1, tab2 = st.tabs(["Meta Prophet", "Microsoft TAPEX"])
 # Interface para carregar arquivo
@@ -169,31 +186,24 @@ with tab1:
         if df_clean.empty:
             st.warning("Não há dados válidos para processar.")
         else:
-            # Check if 'all_anomalies' is already in session state to avoid re-running Prophet
-            if 'all_anomalies' not in st.session_state:
                 with st.spinner('Aplicando modelo de série temporal...'):
                     all_anomalies = apply_prophet(df_clean)
                     st.session_state['all_anomalies'] = all_anomalies
 with tab2:
-    # Ensure 'all_anomalies' exists in session state before allowing user interaction
     if 'all_anomalies' in st.session_state and not st.session_state['all_anomalies'].empty:
-        # Interface para perguntas do usuário
         user_question = st.text_input("Escreva sua questão aqui:", "")
         if user_question:
             bot_response = response(user_question, st.session_state['all_anomalies'])
             st.session_state['history'].append(('👤', user_question))
             st.session_state['history'].append(('🤖', bot_response))
-        # Mostrar histórico de conversa
         for sender, message in st.session_state['history']:
-            if sender == '👤':
-                st.markdown(f"**👤 {message}**")
-            elif sender == '🤖':
-                st.markdown(f"**🤖 {message}**", unsafe_allow_html=True)
-        # Botão para limpar histórico
         if st.button("Limpar histórico"):
             st.session_state['history'] = []
     else:
-        st.warning("Por favor, processe os dados no Meta Prophet primeiro.")

 # Aplicar o markdown combinado no Streamlit
 st.markdown(html_content, unsafe_allow_html=True)
+# Cache models to prevent re-loading on every run
+@st.cache_resource
+def load_translation_model(model_name):
+    return T5ForConditionalGeneration.from_pretrained(model_name)
+@st.cache_resource
+def load_tapex_model():
+    return BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+@st.cache_resource
+def load_tapex_tokenizer():
+    return TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+pt_en_translator = load_translation_model("unicamp-dl/translation-pt-en-t5")
+en_pt_translator = load_translation_model("unicamp-dl/translation-en-pt-t5")
+tapex_model = load_tapex_model()
+tapex_tokenizer = load_tapex_tokenizer()
 tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
 def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
     translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return translated_text
+# Function to translate and interact with TAPEX model
 def response(user_question, table_data):
     question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
     encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
     response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
     return response_pt
+# Load and preprocess the data
 def load_data(uploaded_file):
     if uploaded_file.name.endswith('.csv'):
         df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
     df_clean = new_df.copy()
     return df_clean
+# Cache the Prophet computation to avoid recomputing
+@st.cache_data
 def apply_prophet(df_clean):
     if df_clean.empty:
         st.error("DataFrame está vazio após o pré-processamento.")
     # Return the dataframe of all anomalies
     return all_anomalies
+# Initialize session states
+if 'all_anomalies' not in st.session_state:
+    st.session_state['all_anomalies'] = pd.DataFrame()
+if 'history' not in st.session_state:
+    st.session_state['history'] = []
 tab1, tab2 = st.tabs(["Meta Prophet", "Microsoft TAPEX"])
 # Interface para carregar arquivo
         if df_clean.empty:
             st.warning("Não há dados válidos para processar.")
         else:
+            # Cache the Prophet results
+            if st.session_state['all_anomalies'].empty:
                 with st.spinner('Aplicando modelo de série temporal...'):
                     all_anomalies = apply_prophet(df_clean)
                     st.session_state['all_anomalies'] = all_anomalies
 with tab2:
     if 'all_anomalies' in st.session_state and not st.session_state['all_anomalies'].empty:
         user_question = st.text_input("Escreva sua questão aqui:", "")
         if user_question:
             bot_response = response(user_question, st.session_state['all_anomalies'])
             st.session_state['history'].append(('👤', user_question))
             st.session_state['history'].append(('🤖', bot_response))
         for sender, message in st.session_state['history']:
+            st.markdown(f"**{sender} {message}**")
         if st.button("Limpar histórico"):
             st.session_state['history'] = []
     else:
+        st.warning("Por favor, processe os dados no Meta Prophet primeiro.")