Spaces:

sanjeevbora
/

ChatBot

Paused

File size: 6,574 Bytes

import subprocess

script_path = './setup.sh'  # Adjust the path if needed

# Run the script
exit_code = subprocess.call(['bash', script_path])

if exit_code == 0:
    print("Script executed successfully.")
else:
    print(f"Script failed with exit code {exit_code}.")

import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoConfig, AutoTokenizer, pipeline, AutoModelForCausalLM
from langchain_community.document_loaders import DirectoryLoader
from torch import bfloat16
import torch
import re
import transformers
import spaces
import requests
from urllib.parse import urlencode, urlparse, parse_qs
from selenium import webdriver

# Initialize embeddings and ChromaDB
model_name = "sentence-transformers/all-mpnet-base-v2"
device = "cuda" if torch.cuda.is_available() else "cpu"
model_kwargs = {"device": device}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

loader = DirectoryLoader('./example', glob="**/*.pdf", recursive=True, use_multithreading=True)
docs = loader.load()
vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="companies_db")
books_db = Chroma(persist_directory="./companies_db", embedding_function=embeddings)
books_db_client = books_db.as_retriever()

# Initialize the model and tokenizer
model_name = "stabilityai/stablelm-zephyr-3b"

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_config = transformers.AutoConfig.from_pretrained(model_name, max_new_tokens=1024)
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map=device,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

query_pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    torch_dtype=torch.float16,
    device_map=device,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    top_k=50,
    max_new_tokens=256
)

llm = HuggingFacePipeline(pipeline=query_pipeline)

books_db_client_retriever = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=books_db_client,
    verbose=True
)

# OAuth Configuration
TENANT_ID = '2b093ced-2571-463f-bc3e-b4f8bcb427ee'
CLIENT_ID = '2a7c884c-942d-49e2-9e5d-7a29d8a0d3e5'
CLIENT_SECRET = 'EOF8Q~kKHCRgx8tnlLM-H8e93ifetxI6x7sU6bGW'
REDIRECT_URI = 'https://sanjeevbora-chatbot.hf.space/'  # Your redirect URI here
AUTH_URL = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/authorize"
TOKEN_URL = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"

# OAuth parameters
params = {
    'client_id': CLIENT_ID,
    'response_type': 'code',
    'redirect_uri': REDIRECT_URI,
    'response_mode': 'query',
    'scope': 'User.Read',
    'state': '12345'
}

# Construct the login URL
login_url = f"{AUTH_URL}?{urlencode(params)}"

# Function to exchange authorization code for access token
def exchange_code_for_token(auth_code):
    data = {
        'grant_type': 'authorization_code',
        'client_id': CLIENT_ID,
        'client_secret': CLIENT_SECRET,
        'code': auth_code,
        'redirect_uri': REDIRECT_URI
    }
    
    response = requests.post(TOKEN_URL, data=data)
    
    if response.status_code == 200:
        token_data = response.json()
        access_token = token_data.get('access_token')
        return access_token
    else:
        return None

# Dummy function to simulate token validation (you will replace this with actual validation)
def is_logged_in(token):
    # Check if the token exists (or check if it's valid)
    return token is not None

# Function to retrieve answer using the RAG system
@spaces.GPU(duration=60)
def test_rag(query):
    books_retriever = books_db_client_retriever.run(query)
    
    # Extract the relevant answer using regex
    corrected_text_match = re.search(r"Helpful Answer:(.*)", books_retriever, re.DOTALL)
    
    if corrected_text_match:
        corrected_text_books = corrected_text_match.group(1).strip()
    else:
        corrected_text_books = "No helpful answer found."
    
    return corrected_text_books

# Define the Gradio interface
def chat(query, history=None):
    if history is None:
        history = []
    if query:
        answer = test_rag(query)
        history.append((query, answer))
    return history, ""  # Clear input after submission

with gr.Blocks() as interface:
    gr.Markdown("## RAG Chatbot")
    gr.Markdown("Please log in to continue.")

    # Step 1: Provide a link for the user to log in
    login_link = gr.HTML(f'<a href="{login_url}" target="_blank">Click here to login with Microsoft</a>')
    
    # Step 2: Ask the user to paste the authorization code after login
    auth_code_box = gr.Textbox(label="Copy the link you got after loging in to the website", placeholder="Paste your Website link", type="password")

    # Step 3: Button to handle token exchange after user pastes the authorization code
    login_button = gr.Button("Submit Authorization Code")
    
    # Handle login button click
    def handle_login(auth_code):
        # Extract the authorization code from the text box
        parsed_url = urlparse(auth_code)  # Parse the URL containing the authorization code
        
        # Extract query parameters
        query_params = parse_qs(parsed_url.query)
        
        # Get the code value
        code_value = query_params.get('code', [None])[0]
        
        token = exchange_code_for_token(code_value)
        if token:
            return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
        else:
            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)

    # Components for chat (initially hidden)
    input_box = gr.Textbox(label="Enter your question", placeholder="Type your question here...", visible=False)
    submit_btn = gr.Button("Submit", visible=False)
    chat_history = gr.Chatbot(label="Chat History", visible=False)
    
    login_button.click(handle_login, inputs=[auth_code_box], outputs=[input_box, submit_btn, chat_history])
    

    # Chat handling
    submit_btn.click(chat, inputs=[input_box, chat_history], outputs=[chat_history, input_box])

interface.launch()