File size: 4,405 Bytes
8ac4f9b
322c3d1
 
 
 
dc6f426
322c3d1
 
 
 
 
 
 
 
 
 
 
081c79a
322c3d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4044e9
 
 
 
 
 
 
1dbac02
322c3d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4044e9
 
 
 
 
 
 
322c3d1
 
 
 
 
 
 
 
 
f4044e9
 
 
 
 
 
322c3d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4044e9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import streamlit as st
import uuid
import sys
import requests
import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
import pickle

USER_ICON = "images/user-icon.png"
AI_ICON = "images/ai-icon.png"
MAX_HISTORY_LENGTH = 5

if 'user_id' in st.session_state:
    user_id = st.session_state['user_id']
else:
    user_id = str(uuid.uuid4())
    st.session_state['user_id'] = user_id

if 'chat_history' not in st.session_state:
    st.session_state['chat_history'] = []

if "chats" not in st.session_state:
    st.session_state.chats = [
        {
            'id': 0,
            'question': '',
            'answer': ''
        }
    ]

if "questions" not in st.session_state:
    st.session_state.questions = []

if "answers" not in st.session_state:
    st.session_state.answers = []

if "input" not in st.session_state:
    st.session_state.input = ""

st.markdown("""
        <style>
               .block-container {
                    padding-top: 32px;
                    padding-bottom: 32px;
                    padding-left: 0;
                    padding-right: 0;
                }
                .element-container img {
                    background-color: #000000;
                }

                .main-header {
                    font-size: 24px;
                }
        </style>
        """, unsafe_allow_html=True)

# Load the model and tokenizer from Hugging Face Hub
model_name = "tiiuae/falcon-7b-instruct" 
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the dataset
dataset = load_dataset("nisaar/Lawyer_GPT_India")  

def write_top_bar():
    col1, col2, col3 = st.columns([1,10,2])
    with col1:
        st.image(AI_ICON, use_column_width='always')
    with col2:
        header = "Cogwise Intelligent Assistant"
        st.write(f"<h3 class='main-header'>{header}</h3>", unsafe_allow_html=True)
    with col3:
        clear = st.button("Clear Chat")
    return clear

clear = write_top_bar()

if clear:
    st.session_state.questions = []
    st.session_state.answers = []
    st.session_state.input = ""
    st.session_state["chat_history"] = []

def handle_input():
    input = st.session_state.input
    question_with_id = {
        'question': input,
        'id': len(st.session_state.questions)
    }
    st.session_state.questions.append(question_with_id)

    chat_history = st.session_state["chat_history"]
    if len(chat_history) == MAX_HISTORY_LENGTH:
        chat_history = chat_history[:-1]

    # Find the most similar example in the dataset
    closest_example = find_closest_example(input, dataset)  # Implement your own logic to find the closest example

    # Generate response using the model
    inputs = tokenizer.encode(closest_example, return_tensors="pt")
    outputs = model.generate(inputs)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    chat_history.append((input, answer))

    st.session_state.answers.append({
        'answer': answer,
        'id': len(st.session_state.questions)
    })
    st.session_state.input = ""

def find_closest_example(input, dataset):
    # Implement your own logic to find the closest example in the dataset based on the user input
    # You can use techniques like cosine similarity, semantic similarity, or any other approach that fits your dataset and requirements
    # Return the closest example as a string
    pass

def write_user_message(md):
    col1, col2 = st.columns([1,12])

    with col1:
        st.image(USER_ICON, use_column_width='always')
    with col2:
        st.warning(md['question'])

def render_answer(answer):
    col1, col2 = st.columns([1,12])
    with col1:
        st.image(AI_ICON, use_column_width='always')
    with col2:
        st.info(answer)

def write_chat_message(md, q):
    chat = st.container()
    with chat:
        render_answer(md['answer'])

with st.container():
    for (q, a) in zip(st.session_state.questions, st.session_state.answers):
        write_user_message(q)
        write_chat_message(a, q)

st.markdown('---')
input = st.text_input("You are talking to an AI, ask any question.", key="input", on_change=handle_input)