Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -26,17 +26,23 @@ html_content = f"""
|
|
26 |
# Aplicar o markdown combinado no Streamlit
|
27 |
st.markdown(html_content, unsafe_allow_html=True)
|
28 |
|
29 |
-
#
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
|
41 |
|
42 |
def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
|
@@ -45,6 +51,7 @@ def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
|
|
45 |
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
46 |
return translated_text
|
47 |
|
|
|
48 |
def response(user_question, table_data):
|
49 |
question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
|
50 |
encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
|
@@ -53,6 +60,7 @@ def response(user_question, table_data):
|
|
53 |
response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
|
54 |
return response_pt
|
55 |
|
|
|
56 |
def load_data(uploaded_file):
|
57 |
if uploaded_file.name.endswith('.csv'):
|
58 |
df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
|
@@ -95,6 +103,8 @@ def preprocess_data(df):
|
|
95 |
df_clean = new_df.copy()
|
96 |
return df_clean
|
97 |
|
|
|
|
|
98 |
def apply_prophet(df_clean):
|
99 |
if df_clean.empty:
|
100 |
st.error("DataFrame está vazio após o pré-processamento.")
|
@@ -156,6 +166,13 @@ def apply_prophet(df_clean):
|
|
156 |
# Return the dataframe of all anomalies
|
157 |
return all_anomalies
|
158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
tab1, tab2 = st.tabs(["Meta Prophet", "Microsoft TAPEX"])
|
160 |
|
161 |
# Interface para carregar arquivo
|
@@ -169,31 +186,24 @@ with tab1:
|
|
169 |
if df_clean.empty:
|
170 |
st.warning("Não há dados válidos para processar.")
|
171 |
else:
|
172 |
-
#
|
173 |
-
if 'all_anomalies'
|
174 |
with st.spinner('Aplicando modelo de série temporal...'):
|
175 |
all_anomalies = apply_prophet(df_clean)
|
176 |
st.session_state['all_anomalies'] = all_anomalies
|
177 |
|
178 |
with tab2:
|
179 |
-
# Ensure 'all_anomalies' exists in session state before allowing user interaction
|
180 |
if 'all_anomalies' in st.session_state and not st.session_state['all_anomalies'].empty:
|
181 |
-
# Interface para perguntas do usuário
|
182 |
user_question = st.text_input("Escreva sua questão aqui:", "")
|
183 |
if user_question:
|
184 |
bot_response = response(user_question, st.session_state['all_anomalies'])
|
185 |
st.session_state['history'].append(('👤', user_question))
|
186 |
st.session_state['history'].append(('🤖', bot_response))
|
187 |
|
188 |
-
# Mostrar histórico de conversa
|
189 |
for sender, message in st.session_state['history']:
|
190 |
-
|
191 |
-
st.markdown(f"**👤 {message}**")
|
192 |
-
elif sender == '🤖':
|
193 |
-
st.markdown(f"**🤖 {message}**", unsafe_allow_html=True)
|
194 |
|
195 |
-
# Botão para limpar histórico
|
196 |
if st.button("Limpar histórico"):
|
197 |
st.session_state['history'] = []
|
198 |
else:
|
199 |
-
st.warning("Por favor, processe os dados no Meta Prophet primeiro.")
|
|
|
26 |
# Aplicar o markdown combinado no Streamlit
|
27 |
st.markdown(html_content, unsafe_allow_html=True)
|
28 |
|
29 |
+
# Cache models to prevent re-loading on every run
|
30 |
+
@st.cache_resource
|
31 |
+
def load_translation_model(model_name):
|
32 |
+
return T5ForConditionalGeneration.from_pretrained(model_name)
|
33 |
+
|
34 |
+
@st.cache_resource
|
35 |
+
def load_tapex_model():
|
36 |
+
return BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
|
37 |
+
|
38 |
+
@st.cache_resource
|
39 |
+
def load_tapex_tokenizer():
|
40 |
+
return TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
|
41 |
+
|
42 |
+
pt_en_translator = load_translation_model("unicamp-dl/translation-pt-en-t5")
|
43 |
+
en_pt_translator = load_translation_model("unicamp-dl/translation-en-pt-t5")
|
44 |
+
tapex_model = load_tapex_model()
|
45 |
+
tapex_tokenizer = load_tapex_tokenizer()
|
46 |
tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
|
47 |
|
48 |
def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
|
|
|
51 |
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
52 |
return translated_text
|
53 |
|
54 |
+
# Function to translate and interact with TAPEX model
|
55 |
def response(user_question, table_data):
|
56 |
question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
|
57 |
encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
|
|
|
60 |
response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
|
61 |
return response_pt
|
62 |
|
63 |
+
# Load and preprocess the data
|
64 |
def load_data(uploaded_file):
|
65 |
if uploaded_file.name.endswith('.csv'):
|
66 |
df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
|
|
|
103 |
df_clean = new_df.copy()
|
104 |
return df_clean
|
105 |
|
106 |
+
# Cache the Prophet computation to avoid recomputing
|
107 |
+
@st.cache_data
|
108 |
def apply_prophet(df_clean):
|
109 |
if df_clean.empty:
|
110 |
st.error("DataFrame está vazio após o pré-processamento.")
|
|
|
166 |
# Return the dataframe of all anomalies
|
167 |
return all_anomalies
|
168 |
|
169 |
+
# Initialize session states
|
170 |
+
if 'all_anomalies' not in st.session_state:
|
171 |
+
st.session_state['all_anomalies'] = pd.DataFrame()
|
172 |
+
|
173 |
+
if 'history' not in st.session_state:
|
174 |
+
st.session_state['history'] = []
|
175 |
+
|
176 |
tab1, tab2 = st.tabs(["Meta Prophet", "Microsoft TAPEX"])
|
177 |
|
178 |
# Interface para carregar arquivo
|
|
|
186 |
if df_clean.empty:
|
187 |
st.warning("Não há dados válidos para processar.")
|
188 |
else:
|
189 |
+
# Cache the Prophet results
|
190 |
+
if st.session_state['all_anomalies'].empty:
|
191 |
with st.spinner('Aplicando modelo de série temporal...'):
|
192 |
all_anomalies = apply_prophet(df_clean)
|
193 |
st.session_state['all_anomalies'] = all_anomalies
|
194 |
|
195 |
with tab2:
|
|
|
196 |
if 'all_anomalies' in st.session_state and not st.session_state['all_anomalies'].empty:
|
|
|
197 |
user_question = st.text_input("Escreva sua questão aqui:", "")
|
198 |
if user_question:
|
199 |
bot_response = response(user_question, st.session_state['all_anomalies'])
|
200 |
st.session_state['history'].append(('👤', user_question))
|
201 |
st.session_state['history'].append(('🤖', bot_response))
|
202 |
|
|
|
203 |
for sender, message in st.session_state['history']:
|
204 |
+
st.markdown(f"**{sender} {message}**")
|
|
|
|
|
|
|
205 |
|
|
|
206 |
if st.button("Limpar histórico"):
|
207 |
st.session_state['history'] = []
|
208 |
else:
|
209 |
+
st.warning("Por favor, processe os dados no Meta Prophet primeiro.")
|