Spaces:

fschwartzer
/

streamlit_chatbot

Running

File size: 3,384 Bytes

c6dc269
 
6c96c7d
34e67a1
c6a4668
 
acf8351
10379c1
14c4d74
 
9bbf8c6
 
 
c6dc269
9bbf8c6
acf8351
6c96c7d
c6a4668
 
 
 
 
 
 
acf8351
6c96c7d
41debeb
 
 
 
 
 
 
 
 
 
 
 
 
acf8351
 
 
43ea290
acf8351
 
 
 
 
3b9304d
c6dc269
41debeb
23171a7
4ade980
 
86088c2
 
9b8124b
86088c2
4ade980
 
c6dc269
23171a7
c6dc269
 
 
23171a7
c6dc269
 
 
23171a7
c6dc269
31a3643
c6dc269
23171a7
0850865
c6dc269
23171a7
c6dc269
31a3643
c6dc269
23171a7
c6dc269
 
 
23171a7
c6dc269
 
31a3643
c6dc269
31a3643

import streamlit as st
import pandas as pd
import torch
from transformers import pipeline
#from transformers import TapasTokenizer, TapexTokenizer, BartForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
import datetime

#df = pd.read_excel('discrepantes.xlsx', index_col='Unnamed: 0')
df = pd.read_excel('discrepantes.xlsx')
df.fillna(0, inplace=True)
table_data = df.astype(str)
print(table_data.head())

def response(user_question, table_data):
    a = datetime.datetime.now()

    #model_name = "microsoft/tapex-large-finetuned-wtq"
    model_name = ""google/tapas-base-finetuned-wtq""
    #model = BartForConditionalGeneration.from_pretrained(model_name)
    model = AutoModelForTableQuestionAnswering.from_pretrained(model_name)
    #tokenizer = TapexTokenizer.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    queries = [user_question]

    encoding = tokenizer(table=table_data, query=queries, padding=True, return_tensors="pt", truncation=True)

    # Experiment with generation parameters
    outputs = model.generate(
        **encoding,
        num_beams=5,            # Beam search to generate more diverse responses
        top_k=50,               # Top-k sampling for diversity
        top_p=0.95,             # Nucleus sampling
        temperature=0.7,        # Temperature scaling (if supported by the model)
        max_length=50,          # Limit the length of the generated response
        early_stopping=True     # Stop generation when an end token is generated
    )

    ans = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    query_result = {
        "Resposta": ans[0]
    }

    b = datetime.datetime.now()
    print(b - a)

    return query_result


# Streamlit interface
st.markdown("""
<div style='display: flex; align-items: center;'>
    <div style='width: 40px; height: 40px; background-color: green; border-radius: 50%; margin-right: 5px;'></div>
    <div style='width: 40px; height: 40px; background-color: red; border-radius: 50%; margin-right: 5px;'></div>
    <div style='width: 40px; height: 40px; background-color: yellow; border-radius: 50%; margin-right: 5px;'></div>
    <span style='font-size: 40px; font-weight: bold;'>Chatbot do Tesouro RS</span>
</div>
""", unsafe_allow_html=True)

# Chat history
if 'history' not in st.session_state:
    st.session_state['history'] = []

# Input box for user question
user_question = st.text_input("Escreva sua questão aqui:", "")

if user_question:
    # Add person emoji when typing question
    st.session_state['history'].append(('👤', user_question))
    st.markdown(f"**👤 {user_question}**")
    
    # Generate the response
    bot_response = response(user_question, table_data)
    
    # Add robot emoji when generating response and align to the right
    st.session_state['history'].append(('🤖', bot_response))
    st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)

# Clear history button
if st.button("Limpar"):
    st.session_state['history'] = []

# Display chat history
for sender, message in st.session_state['history']:
    if sender == '👤':
        st.markdown(f"**👤 {message}**")
    elif sender == '🤖':
        st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)