sanjeevbora commited on
Commit
ab7a67c
·
verified ·
1 Parent(s): bd9da71

updated app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -64
app.py CHANGED
@@ -1,82 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- import requests
3
- import webbrowser
4
- from http.server import BaseHTTPRequestHandler, HTTPServer
5
- import threading
 
 
 
 
 
 
6
  import spaces
7
- from urllib.parse import urlencode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # OAuth Configuration
10
  TENANT_ID = '2b093ced-2571-463f-bc3e-b4f8bcb427ee'
11
  CLIENT_ID = '2a7c884c-942d-49e2-9e5d-7a29d8a0d3e5'
12
  CLIENT_SECRET = 'EOF8Q~kKHCRgx8tnlLM-H8e93ifetxI6x7sU6bGW'
13
- REDIRECT_URI = 'https://sanjeevbora-chatbot.hf.space/'
14
  AUTH_URL = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/authorize"
15
  TOKEN_URL = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
16
- SCOPE = 'User.Read'
17
- access_token = None
18
-
19
- class RequestHandler(BaseHTTPRequestHandler):
20
- def do_GET(self):
21
- global access_token
22
- if self.path.startswith("/callback"):
23
- # Capture the authorization code
24
- code = self.path.split("code=")[1]
25
- response = requests.post(TOKEN_URL, data={
26
- 'client_id': CLIENT_ID,
27
- 'client_secret': CLIENT_SECRET,
28
- 'grant_type': 'authorization_code',
29
- 'code': code,
30
- 'redirect_uri': REDIRECT_URI
31
- })
32
- token_data = response.json()
33
- access_token = token_data.get('access_token')
34
- self.send_response(200)
35
- self.end_headers()
36
- self.wfile.write(b"Login successful! You can close this window.")
37
- return
38
-
39
- self.send_response(404)
40
- self.end_headers()
41
-
42
- def start_http_server():
43
- server_address = ('', 8080)
44
- httpd = HTTPServer(server_address, RequestHandler)
45
- httpd.serve_forever()
46
-
47
- def login():
48
- params = {
49
  'client_id': CLIENT_ID,
50
- 'response_type': 'code',
51
- 'redirect_uri': REDIRECT_URI,
52
- 'response_mode': 'query',
53
- 'scope': SCOPE,
54
- 'state': 'random_state_string' # Optional: Use for security
55
  }
56
- login_url = f"{AUTH_URL}?{urlencode(params)}"
57
- return login_url
 
 
 
 
 
 
 
58
 
59
- def check_login():
60
- return "You are logged in!" if access_token else "You are not logged in."
 
 
61
 
62
- def handle_login_click():
63
- login()
64
- return check_login()
65
  @spaces.GPU(duration=60)
66
- def gradio_interface():
67
- with gr.Blocks() as demo:
68
- gr.Markdown("### Welcome to the App")
69
- btn_login = gr.Button("Login with Microsoft")
70
- output = gr.Textbox(label="Status")
 
 
 
 
 
 
 
71
 
72
- btn_login.click(handle_login_click, None, output)
 
 
 
 
 
 
 
73
 
74
- return demo
 
 
75
 
76
- if __name__ == "__main__":
77
- # Start the HTTP server in a separate thread
78
- threading.Thread(target=start_http_server, daemon=True).start()
79
 
80
- # Launch Gradio app
81
- demo = gradio_interface()
82
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+
3
+ script_path = './setup.sh' # Adjust the path if needed
4
+
5
+ # Run the script
6
+ exit_code = subprocess.call(['bash', script_path])
7
+
8
+ if exit_code == 0:
9
+ print("Script executed successfully.")
10
+ else:
11
+ print(f"Script failed with exit code {exit_code}.")
12
+
13
  import gradio as gr
14
+ from langchain.embeddings import HuggingFaceEmbeddings
15
+ from langchain.vectorstores import Chroma
16
+ from langchain.llms import HuggingFacePipeline
17
+ from langchain.chains import RetrievalQA
18
+ from transformers import AutoConfig, AutoTokenizer, pipeline, AutoModelForCausalLM
19
+ from langchain_community.document_loaders import DirectoryLoader
20
+ from torch import bfloat16
21
+ import torch
22
+ import re
23
+ import transformers
24
  import spaces
25
+ import requests
26
+ from urllib.parse import urlencode, urlparse, parse_qs
27
+
28
+ # Initialize embeddings and ChromaDB
29
+ model_name = "sentence-transformers/all-mpnet-base-v2"
30
+ device = "cuda" if torch.cuda.is_available() else "cpu"
31
+ model_kwargs = {"device": device}
32
+ embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
33
+
34
+ loader = DirectoryLoader('./example', glob="**/*.pdf", recursive=True, use_multithreading=True)
35
+ docs = loader.load()
36
+ vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="companies_db")
37
+ books_db = Chroma(persist_directory="./companies_db", embedding_function=embeddings)
38
+ books_db_client = books_db.as_retriever()
39
+
40
+ # Initialize the model and tokenizer
41
+ model_name = "stabilityai/stablelm-zephyr-3b"
42
+
43
+ bnb_config = transformers.BitsAndBytesConfig(
44
+ load_in_4bit=True,
45
+ bnb_4bit_quant_type='nf4',
46
+ bnb_4bit_use_double_quant=True,
47
+ bnb_4bit_compute_dtype=torch.bfloat16
48
+ )
49
+
50
+ model_config = transformers.AutoConfig.from_pretrained(model_name, max_new_tokens=1024)
51
+ model = transformers.AutoModelForCausalLM.from_pretrained(
52
+ model_name,
53
+ trust_remote_code=True,
54
+ config=model_config,
55
+ quantization_config=bnb_config,
56
+ device_map=device,
57
+ )
58
+
59
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
60
+
61
+ query_pipeline = transformers.pipeline(
62
+ "text-generation",
63
+ model=model,
64
+ tokenizer=tokenizer,
65
+ return_full_text=True,
66
+ torch_dtype=torch.float16,
67
+ device_map=device,
68
+ do_sample=True,
69
+ temperature=0.7,
70
+ top_p=0.9,
71
+ top_k=50,
72
+ max_new_tokens=256
73
+ )
74
+
75
+ llm = HuggingFacePipeline(pipeline=query_pipeline)
76
+
77
+ books_db_client_retriever = RetrievalQA.from_chain_type(
78
+ llm=llm,
79
+ chain_type="stuff",
80
+ retriever=books_db_client,
81
+ verbose=True
82
+ )
83
 
84
  # OAuth Configuration
85
  TENANT_ID = '2b093ced-2571-463f-bc3e-b4f8bcb427ee'
86
  CLIENT_ID = '2a7c884c-942d-49e2-9e5d-7a29d8a0d3e5'
87
  CLIENT_SECRET = 'EOF8Q~kKHCRgx8tnlLM-H8e93ifetxI6x7sU6bGW'
88
+ REDIRECT_URI = 'https://sanjeevbora-chatbot.hf.space/' # Your redirect URI here
89
  AUTH_URL = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/authorize"
90
  TOKEN_URL = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
91
+
92
+ # OAuth parameters
93
+ params = {
94
+ 'client_id': CLIENT_ID,
95
+ 'response_type': 'code',
96
+ 'redirect_uri': REDIRECT_URI,
97
+ 'response_mode': 'query',
98
+ 'scope': 'User.Read',
99
+ 'state': '12345'
100
+ }
101
+
102
+ # Construct the login URL
103
+ login_url = f"{AUTH_URL}?{urlencode(params)}"
104
+
105
+ # Function to exchange authorization code for access token
106
+ def exchange_code_for_token(auth_code):
107
+ data = {
108
+ 'grant_type': 'authorization_code',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  'client_id': CLIENT_ID,
110
+ 'client_secret': CLIENT_SECRET,
111
+ 'code': auth_code,
112
+ 'redirect_uri': REDIRECT_URI
 
 
113
  }
114
+
115
+ response = requests.post(TOKEN_URL, data=data)
116
+
117
+ if response.status_code == 200:
118
+ token_data = response.json()
119
+ access_token = token_data.get('access_token')
120
+ return access_token
121
+ else:
122
+ return None
123
 
124
+ # Dummy function to simulate token validation (you will replace this with actual validation)
125
+ def is_logged_in(token):
126
+ # Check if the token exists (or check if it's valid)
127
+ return token is not None
128
 
129
+ # Function to retrieve answer using the RAG system
 
 
130
  @spaces.GPU(duration=60)
131
+ def test_rag(query):
132
+ books_retriever = books_db_client_retriever.run(query)
133
+
134
+ # Extract the relevant answer using regex
135
+ corrected_text_match = re.search(r"Helpful Answer:(.*)", books_retriever, re.DOTALL)
136
+
137
+ if corrected_text_match:
138
+ corrected_text_books = corrected_text_match.group(1).strip()
139
+ else:
140
+ corrected_text_books = "No helpful answer found."
141
+
142
+ return corrected_text_books
143
 
144
+ # Define the Gradio interface
145
+ def chat(query, history=None):
146
+ if history is None:
147
+ history = []
148
+ if query:
149
+ answer = test_rag(query)
150
+ history.append((query, answer))
151
+ return history, "" # Clear input after submission
152
 
153
+ with gr.Blocks() as interface:
154
+ gr.Markdown("## RAG Chatbot")
155
+ gr.Markdown("Please log in to continue.")
156
 
157
+ # Step 1: Provide a link for the user to log in
158
+ login_link = gr.HTML(f'<a href="{login_url}" target="_blank">Click here to login with Microsoft</a>')
 
159
 
160
+ # Step 2: Ask the user to paste the authorization code after login
161
+ auth_code_box = gr.Textbox(label="Copy the link you got after loging in to the website", placeholder="Paste your Website link")
162
+
163
+ # Step 3: Button to handle token exchange after user pastes the authorization code
164
+ login_button = gr.Button("Submit Authorization Code")
165
+
166
+ # Handle login button click
167
+ def handle_login(auth_code):
168
+ # Extract the authorization code from the text box
169
+ parsed_url = urlparse(auth_code) # Parse the URL containing the authorization code
170
+
171
+ # Extract query parameters
172
+ query_params = parse_qs(parsed_url.query)
173
+
174
+ # Get the code value
175
+ code_value = query_params.get('code', [None])[0]
176
+
177
+ token = exchange_code_for_token(code_value)
178
+ if token:
179
+ return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
180
+ else:
181
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
182
+
183
+ # Components for chat (initially hidden)
184
+ input_box = gr.Textbox(label="Enter your question", placeholder="Type your question here...", visible=False)
185
+ submit_btn = gr.Button("Submit", visible=False)
186
+ chat_history = gr.Chatbot(label="Chat History", visible=False)
187
+
188
+ login_button.click(handle_login, inputs=[auth_code_box], outputs=[input_box, submit_btn, chat_history])
189
+
190
+
191
+ # Chat handling
192
+ submit_btn.click(chat, inputs=[input_box, chat_history], outputs=[chat_history, input_box])
193
+
194
+ interface.launch()