Spaces:
Paused
Paused
File size: 6,040 Bytes
53a5038 41b8230 5cf3079 41b8230 ba9e337 9392822 41b8230 7bc17d6 41b8230 05bf013 41b8230 125b60f 3d03f6e 41b8230 43c1570 41b8230 53a5038 ba9e337 824b61b ba9e337 97d3ce4 ba9e337 97d3ce4 ba9e337 97d3ce4 ba9e337 97d3ce4 ba9e337 97d3ce4 41b8230 a75a01d fe10fad 97d3ce4 41b8230 97d3ce4 fe10fad 97d3ce4 41b8230 97d3ce4 21340af 97d3ce4 21340af 97d3ce4 ba9e337 97d3ce4 ba9e337 21340af 97d3ce4 21340af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoConfig, AutoTokenizer, pipeline, AutoModelForCausalLM
from langchain_community.document_loaders import DirectoryLoader
import torch
import re
import requests
from urllib.parse import urlencode
import transformers
import spaces
# Initialize embeddings and ChromaDB
model_name = "sentence-transformers/all-mpnet-base-v2"
device = "cuda" if torch.cuda.is_available() else "cpu"
model_kwargs = {"device": device}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
loader = DirectoryLoader('./example', glob="**/*.pdf", recursive=True, use_multithreading=True)
docs = loader.load()
vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="companies_db")
books_db = Chroma(persist_directory="./companies_db", embedding_function=embeddings)
books_db_client = books_db.as_retriever()
# Initialize the model and tokenizer
model_name = "stabilityai/stablelm-zephyr-3b"
model_config = transformers.AutoConfig.from_pretrained(model_name, max_new_tokens=1024)
model = transformers.AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
config=model_config,
device_map=device,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
query_pipeline = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
return_full_text=True,
torch_dtype=torch.float16,
device_map=device,
do_sample=True,
temperature=0.7,
top_p=0.9,
top_k=50,
max_new_tokens=256
)
llm = HuggingFacePipeline(pipeline=query_pipeline)
books_db_client_retriever = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=books_db_client,
verbose=True
)
# OAuth Configuration
TENANT_ID = '2b093ced-2571-463f-bc3e-b4f8bcb427ee'
CLIENT_ID = '2a7c884c-942d-49e2-9e5d-7a29d8a0d3e5'
CLIENT_SECRET = 'EOF8Q~kKHCRgx8tnlLM-H8e93ifetxI6x7sU6bGW'
REDIRECT_URI = 'https://sanjeevbora-chatbot.hf.space/'
AUTH_URL = f"https://login.microsoftonline.com/2b093ced-2571-463f-bc3e-b4f8bcb427ee/oauth2/v2.0/authorize"
TOKEN_URL = f"https://login.microsoftonline.com/2b093ced-2571-463f-bc3e-b4f8bcb427ee/oauth2/v2.0/token"
# Global variable to store the access token
access_token = None
# OAuth Authorization URL with parameters
def get_auth_url():
params = {
'client_id': CLIENT_ID,
'response_type': 'code',
'redirect_uri': REDIRECT_URI,
'response_mode': 'query',
'scope': 'User.Read',
'state': '12345' # Optional state parameter
}
return f"{AUTH_URL}?{urlencode(params)}"
# Exchange authorization code for an access token
def exchange_code_for_token(auth_code):
data = {
'grant_type': 'authorization_code',
'client_id': CLIENT_ID,
'client_secret': CLIENT_SECRET,
'code': auth_code,
'redirect_uri': REDIRECT_URI
}
response = requests.post(TOKEN_URL, data=data)
token_data = response.json()
return token_data.get('access_token')
# Function to fetch user profile from Microsoft Graph
def get_user_profile(token):
headers = {
'Authorization': f'Bearer {token}'
}
response = requests.get(GRAPH_API_URL, headers=headers)
return response.json()
# Function to check if the user is authenticated
def is_authenticated():
return access_token is not None
# Function to retrieve answer using the RAG system
@spaces.GPU(duration=60)
def test_rag(query):
books_retriever = books_db_client_retriever.run(query)
# Extract the relevant answer using regex
corrected_text_match = re.search(r"Helpful Answer:(.*)", books_retriever, re.DOTALL)
if corrected_text_match:
corrected_text_books = corrected_text_match.group(1).strip()
else:
corrected_text_books = "No helpful answer found."
return corrected_text_books
# Gradio app with OAuth integration
def chat_interface():
global access_token
# If the user is not authenticated, redirect to Microsoft login
if not is_authenticated():
auth_url = get_auth_url()
return gr.Markdown(f"Please [log in]({auth_url}) to use the chatbot.")
# Gradio chatbot interface
def chat(query, history=None):
if history is None:
history = []
if query:
# Chatbot logic here
answer = test_rag(query)
history.append((query, answer))
return history, "" # Clear input after submission
with gr.Blocks() as interface:
gr.Markdown("## RAG Chatbot")
gr.Markdown("Ask a question and get answers based on retrieved documents.")
input_box = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
submit_btn = gr.Button("Submit")
chat_history = gr.Chatbot(label="Chat History")
submit_btn.click(chat, inputs=[input_box, chat_history], outputs=[chat_history, input_box])
return interface
# Function to handle OAuth callback
def handle_auth_callback(auth_code):
global access_token
# Exchange authorization code for access token
access_token = exchange_code_for_token(auth_code)
return "Authentication successful. You can now use the chatbot."
# Gradio app launch
with gr.Blocks() as app:
gr.Markdown("## OAuth2.0 Chatbot")
# Add an input field to manually input the authorization code for testing
auth_code_input = gr.Textbox(label="Enter the OAuth Authorization Code")
# Button to handle authentication and exchange the code for the access token
auth_button = gr.Button("Authenticate")
# Callback for authentication
auth_button.click(fn=handle_auth_callback, inputs=auth_code_input, outputs="text")
# Display the chat interface or authentication prompt
chat_interface()
app.launch() |