Spaces:
Sleeping
Sleeping
File size: 5,106 Bytes
7191a40 d301bcb 7191a40 12ef87a d8cbae7 7191a40 d8cbae7 12ef87a 7191a40 12ef87a 7191a40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import AdamW
import pandas as pd
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.nn.utils.rnn import pad_sequence
# from torch.utils.data import Dataset, DataLoader, random_split, RandomSampler, SequentialSampler
pl.seed_everything(100)
MODEL_NAME='t5-base'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
INPUT_MAX_LEN = 128
OUTPUT_MAX_LEN = 128
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME, model_max_length=512)
class T5Model(pl.LightningModule):
def __init__(self):
super().__init__()
self.model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME, return_dict = True)
def forward(self, input_ids, attention_mask, labels=None):
output = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels
)
return output.loss, output.logits
def training_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels= batch["target"]
loss, logits = self(input_ids , attention_mask, labels)
self.log("train_loss", loss, prog_bar=True, logger=True)
return {'loss': loss}
def validation_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels= batch["target"]
loss, logits = self(input_ids, attention_mask, labels)
self.log("val_loss", loss, prog_bar=True, logger=True)
return {'val_loss': loss}
def configure_optimizers(self):
return AdamW(self.parameters(), lr=0.0001)
train_model = T5Model.load_from_checkpoint('best-model.ckpt',map_location=DEVICE)
train_model.freeze()
def generate_response(question):
inputs_encoding = tokenizer(
question,
add_special_tokens=True,
max_length= INPUT_MAX_LEN,
padding = 'max_length',
truncation='only_first',
return_attention_mask=True,
return_tensors="pt"
)
generate_ids = train_model.model.generate(
input_ids = inputs_encoding["input_ids"],
attention_mask = inputs_encoding["attention_mask"],
max_length = INPUT_MAX_LEN,
num_beams = 4,
num_return_sequences = 1,
no_repeat_ngram_size=2,
early_stopping=True,
)
preds = [
tokenizer.decode(gen_id,
skip_special_tokens=True,
clean_up_tokenization_spaces=True)
for gen_id in generate_ids
]
return "".join(preds)
import streamlit as st
from streamlit_chat import message
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
uri = "mongodb+srv://rohank587:[email protected]/?retryWrites=true&w=majority"
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))
st.title(":red[_Sarcastic_] Chatbot")
if 'generated' not in st.session_state:
st.session_state['generated'] = []
if 'past' not in st.session_state:
st.session_state['past'] = []
if 'messages' not in st.session_state:
st.session_state['messages'] = [
{"role": "system", "content": "You are a helpful assistant."}
]
# container for chat history
response_container = st.container()
# container for text box
container = st.container()
with container:
with st.form(key='my_form', clear_on_submit=True):
user_input = st.text_input("You:", key='input',placeholder="Disclaimer: Be careful with punctuations like , ? . ! \" and IT WILL CUSS YOU")
submit_button = st.form_submit_button(label='Send',use_container_width=True)
clear_button = st.button("Clear Conversation", key="clear",use_container_width=True)
save_button = st.button("Save Conversation", key="save",use_container_width=True)
# reset everything
if clear_button:
st.session_state['generated'] = []
st.session_state['past'] = []
st.session_state['messages'] = [
{"role": "system", "content": "You are a helpful assistant."}
]
if save_button:
# Send a ping to confirm a successful connection
try:
client.admin.command('ping')
st.write("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
st.write(e)
info=client['rohank']['table1']
info.insert_one({"message":"first time"})
if submit_button and user_input:
output = generate_response(user_input)
st.session_state['past'].append(user_input)
st.session_state['generated'].append(output)
if st.session_state['generated']:
with response_container:
for i in range(len(st.session_state['generated'])):
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user')
message(st.session_state["generated"][i], key=str(i)) |