File size: 4,405 Bytes
8ac4f9b 322c3d1 dc6f426 322c3d1 081c79a 322c3d1 f4044e9 1dbac02 322c3d1 f4044e9 322c3d1 f4044e9 322c3d1 f4044e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import streamlit as st
import uuid
import sys
import requests
import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
import pickle
USER_ICON = "images/user-icon.png"
AI_ICON = "images/ai-icon.png"
MAX_HISTORY_LENGTH = 5
if 'user_id' in st.session_state:
user_id = st.session_state['user_id']
else:
user_id = str(uuid.uuid4())
st.session_state['user_id'] = user_id
if 'chat_history' not in st.session_state:
st.session_state['chat_history'] = []
if "chats" not in st.session_state:
st.session_state.chats = [
{
'id': 0,
'question': '',
'answer': ''
}
]
if "questions" not in st.session_state:
st.session_state.questions = []
if "answers" not in st.session_state:
st.session_state.answers = []
if "input" not in st.session_state:
st.session_state.input = ""
st.markdown("""
<style>
.block-container {
padding-top: 32px;
padding-bottom: 32px;
padding-left: 0;
padding-right: 0;
}
.element-container img {
background-color: #000000;
}
.main-header {
font-size: 24px;
}
</style>
""", unsafe_allow_html=True)
# Load the model and tokenizer from Hugging Face Hub
model_name = "tiiuae/falcon-7b-instruct"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load the dataset
dataset = load_dataset("nisaar/Lawyer_GPT_India")
def write_top_bar():
col1, col2, col3 = st.columns([1,10,2])
with col1:
st.image(AI_ICON, use_column_width='always')
with col2:
header = "Cogwise Intelligent Assistant"
st.write(f"<h3 class='main-header'>{header}</h3>", unsafe_allow_html=True)
with col3:
clear = st.button("Clear Chat")
return clear
clear = write_top_bar()
if clear:
st.session_state.questions = []
st.session_state.answers = []
st.session_state.input = ""
st.session_state["chat_history"] = []
def handle_input():
input = st.session_state.input
question_with_id = {
'question': input,
'id': len(st.session_state.questions)
}
st.session_state.questions.append(question_with_id)
chat_history = st.session_state["chat_history"]
if len(chat_history) == MAX_HISTORY_LENGTH:
chat_history = chat_history[:-1]
# Find the most similar example in the dataset
closest_example = find_closest_example(input, dataset) # Implement your own logic to find the closest example
# Generate response using the model
inputs = tokenizer.encode(closest_example, return_tensors="pt")
outputs = model.generate(inputs)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
chat_history.append((input, answer))
st.session_state.answers.append({
'answer': answer,
'id': len(st.session_state.questions)
})
st.session_state.input = ""
def find_closest_example(input, dataset):
# Implement your own logic to find the closest example in the dataset based on the user input
# You can use techniques like cosine similarity, semantic similarity, or any other approach that fits your dataset and requirements
# Return the closest example as a string
pass
def write_user_message(md):
col1, col2 = st.columns([1,12])
with col1:
st.image(USER_ICON, use_column_width='always')
with col2:
st.warning(md['question'])
def render_answer(answer):
col1, col2 = st.columns([1,12])
with col1:
st.image(AI_ICON, use_column_width='always')
with col2:
st.info(answer)
def write_chat_message(md, q):
chat = st.container()
with chat:
render_answer(md['answer'])
with st.container():
for (q, a) in zip(st.session_state.questions, st.session_state.answers):
write_user_message(q)
write_chat_message(a, q)
st.markdown('---')
input = st.text_input("You are talking to an AI, ask any question.", key="input", on_change=handle_input)
|