Spaces:
Running
Running
File size: 3,384 Bytes
c6dc269 6c96c7d 34e67a1 c6a4668 acf8351 10379c1 14c4d74 9bbf8c6 c6dc269 9bbf8c6 acf8351 6c96c7d c6a4668 acf8351 6c96c7d 41debeb acf8351 43ea290 acf8351 3b9304d c6dc269 41debeb 23171a7 4ade980 86088c2 9b8124b 86088c2 4ade980 c6dc269 23171a7 c6dc269 23171a7 c6dc269 23171a7 c6dc269 31a3643 c6dc269 23171a7 0850865 c6dc269 23171a7 c6dc269 31a3643 c6dc269 23171a7 c6dc269 23171a7 c6dc269 31a3643 c6dc269 31a3643 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import streamlit as st
import pandas as pd
import torch
from transformers import pipeline
#from transformers import TapasTokenizer, TapexTokenizer, BartForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
import datetime
#df = pd.read_excel('discrepantes.xlsx', index_col='Unnamed: 0')
df = pd.read_excel('discrepantes.xlsx')
df.fillna(0, inplace=True)
table_data = df.astype(str)
print(table_data.head())
def response(user_question, table_data):
a = datetime.datetime.now()
#model_name = "microsoft/tapex-large-finetuned-wtq"
model_name = ""google/tapas-base-finetuned-wtq""
#model = BartForConditionalGeneration.from_pretrained(model_name)
model = AutoModelForTableQuestionAnswering.from_pretrained(model_name)
#tokenizer = TapexTokenizer.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
queries = [user_question]
encoding = tokenizer(table=table_data, query=queries, padding=True, return_tensors="pt", truncation=True)
# Experiment with generation parameters
outputs = model.generate(
**encoding,
num_beams=5, # Beam search to generate more diverse responses
top_k=50, # Top-k sampling for diversity
top_p=0.95, # Nucleus sampling
temperature=0.7, # Temperature scaling (if supported by the model)
max_length=50, # Limit the length of the generated response
early_stopping=True # Stop generation when an end token is generated
)
ans = tokenizer.batch_decode(outputs, skip_special_tokens=True)
query_result = {
"Resposta": ans[0]
}
b = datetime.datetime.now()
print(b - a)
return query_result
# Streamlit interface
st.markdown("""
<div style='display: flex; align-items: center;'>
<div style='width: 40px; height: 40px; background-color: green; border-radius: 50%; margin-right: 5px;'></div>
<div style='width: 40px; height: 40px; background-color: red; border-radius: 50%; margin-right: 5px;'></div>
<div style='width: 40px; height: 40px; background-color: yellow; border-radius: 50%; margin-right: 5px;'></div>
<span style='font-size: 40px; font-weight: bold;'>Chatbot do Tesouro RS</span>
</div>
""", unsafe_allow_html=True)
# Chat history
if 'history' not in st.session_state:
st.session_state['history'] = []
# Input box for user question
user_question = st.text_input("Escreva sua questão aqui:", "")
if user_question:
# Add person emoji when typing question
st.session_state['history'].append(('👤', user_question))
st.markdown(f"**👤 {user_question}**")
# Generate the response
bot_response = response(user_question, table_data)
# Add robot emoji when generating response and align to the right
st.session_state['history'].append(('🤖', bot_response))
st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)
# Clear history button
if st.button("Limpar"):
st.session_state['history'] = []
# Display chat history
for sender, message in st.session_state['history']:
if sender == '👤':
st.markdown(f"**👤 {message}**")
elif sender == '🤖':
st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)
|