Spaces:
Sleeping
Sleeping
File size: 6,135 Bytes
7191a40 d301bcb f009f84 6b278f6 2607e34 7191a40 12ef87a 2607e34 12ef87a d8cbae7 7191a40 f009f84 d8cbae7 f009f84 7191a40 a5078fe 12ef87a f009f84 6b278f6 f009f84 12ef87a f009f84 12ef87a 7191a40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import AdamW
import pandas as pd
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.nn.utils.rnn import pad_sequence
# from torch.utils.data import Dataset, DataLoader, random_split, RandomSampler, SequentialSampler
pl.seed_everything(100)
MODEL_NAME='t5-base'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
INPUT_MAX_LEN = 128
OUTPUT_MAX_LEN = 128
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME, model_max_length=512)
class T5Model(pl.LightningModule):
def __init__(self):
super().__init__()
self.model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME, return_dict = True)
def forward(self, input_ids, attention_mask, labels=None):
output = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels
)
return output.loss, output.logits
def training_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels= batch["target"]
loss, logits = self(input_ids , attention_mask, labels)
self.log("train_loss", loss, prog_bar=True, logger=True)
return {'loss': loss}
def validation_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels= batch["target"]
loss, logits = self(input_ids, attention_mask, labels)
self.log("val_loss", loss, prog_bar=True, logger=True)
return {'val_loss': loss}
def configure_optimizers(self):
return AdamW(self.parameters(), lr=0.0001)
train_model = T5Model.load_from_checkpoint('best-model.ckpt',map_location=DEVICE)
train_model.freeze()
def generate_response(question):
inputs_encoding = tokenizer(
question,
add_special_tokens=True,
max_length= INPUT_MAX_LEN,
padding = 'max_length',
truncation='only_first',
return_attention_mask=True,
return_tensors="pt"
)
generate_ids = train_model.model.generate(
input_ids = inputs_encoding["input_ids"],
attention_mask = inputs_encoding["attention_mask"],
max_length = INPUT_MAX_LEN,
num_beams = 4,
num_return_sequences = 1,
no_repeat_ngram_size=2,
early_stopping=True,
)
preds = [
tokenizer.decode(gen_id,
skip_special_tokens=True,
clean_up_tokenization_spaces=True)
for gen_id in generate_ids
]
return "".join(preds)
import uuid
import datetime
import os
import streamlit as st
from streamlit_chat import message
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
password=os.getenv("mongo_pass")
uri = "mongodb+srv://rohank587:"+password+"@rkcluster.e3fpzja.mongodb.net/?retryWrites=true&w=majority"
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))
st.title(":red[_Sarcastic_] Chatbot")
if 'generated' not in st.session_state:
st.session_state['generated'] = []
if 'past' not in st.session_state:
st.session_state['past'] = []
if 'messages' not in st.session_state:
st.session_state['messages'] = [
{"role": "system", "content": "You are a helpful assistant."}
]
# container for chat history
response_container = st.container()
# container for text box
container = st.container()
with container:
with st.form(key='my_form', clear_on_submit=True):
user_input = st.text_input("You:", key='input',placeholder="Disclaimer: Be careful with punctuations like , ? . ! \"")
submit_button = st.form_submit_button(label='Send',use_container_width=True)
col1,col2=st.columns(2)
with col1:
clear_button = st.button("Clear Conversation", key="clear",use_container_width=True)
with col2:
save_button = st.button("Save Conversation", key="save",use_container_width=True)
down_id = st.text_input('Enter ID to download chat',placeholder="Message ID")
if down_id:
info=client['rohank']['table1']
data=info.find_one({'message_id':down_id})
down_button = st.download_button('Download chat', "\n".join(data['message']),file_name="sar_chat.txt")
# reset everything
if clear_button:
st.session_state['generated'] = []
st.session_state['past'] = []
st.session_state['messages'] = [
{"role": "system", "content": "You are a helpful assistant."}
]
if save_button and st.session_state['generated'] and st.session_state['past']:
# Send a ping to confirm a successful connection
try:
client.admin.command('ping')
st.success("Pinged your deployment. You successfully connected to MongoDB! Saved Successfully.")
info=client['rohank']['table1']
chats=list([])
for i in range(len(st.session_state['generated'])):
chats.append("You: "+st.session_state['past'][i])
chats.append("Bot: "+st.session_state['generated'][i])
id=uuid.uuid4()
time=datetime.datetime.now()
info.insert_one({"time of saving":time.strftime("%c"),"message_id":str(id),"message":chats})
st.success("Copy this id "+str(id)+" for downloading saved chat anytime anywhere and then paste it down below!")
except Exception as e:
st.error("Can't connect to MongoDB. Save Failed.")
if submit_button and user_input:
output = generate_response(user_input)
st.session_state['past'].append(user_input)
st.session_state['generated'].append(output)
if st.session_state['generated']:
with response_container:
for i in range(len(st.session_state['generated'])):
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user')
message(st.session_state["generated"][i], key=str(i)) |