Spaces:

sanjeevbora
/

eCoreAI_Rag_llama_3.1_3b

Build error

File size: 3,995 Bytes

import gradio as gr
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from dotenv import load_dotenv
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import os
import base64

# Load environment variables
load_dotenv()

# Configure the Llama index settings
Settings.llm = HuggingFaceInferenceAPI(
    model_name="nltpt/Llama-3.2-3B-Instruct",
    tokenizer_name="nltpt/Llama-3.2-3B-Instruct",
    context_window=3000,
    token=os.getenv("HF_TOKEN"),
    max_new_tokens=512,
    generate_kwargs={"temperature": 0.1},
)
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

# Define the directory for persistent storage and data
PERSIST_DIR = "./db"
DATA_DIR = "data"

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

# Function to display PDF (for Gradio it will just show a message saying PDF was uploaded)
def display_pdf(file):
    return f"Uploaded PDF: {file.name}"

# Data ingestion function
def data_ingestion(files):
    for uploaded_file in files:
        filepath = os.path.join(DATA_DIR, uploaded_file.name)
        with open(filepath, "wb") as f:
            f.write(uploaded_file.read())
    documents = SimpleDirectoryReader(DATA_DIR).load_data()
    storage_context = StorageContext.from_defaults()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
    return "PDFs processed successfully!"

# Query handling function
def handle_query(query):
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
    
    chat_text_qa_msgs = [
        (
            "user",
            """You are a Q&A assistant. Your main goal is to provide answers as accurately as possible, based on the context of the document provided. If the question does not match the context or is outside the scope of the document, advise the user to ask questions that are relevant to the document.
            Context:
            {context_str}
            Question:
            {query_str}
            """
        )
    ]
    
    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
    
    answer = query_engine.query(query)
    
    if hasattr(answer, 'response'):
        return answer.response
    elif isinstance(answer, dict) and 'response' in answer:
        return answer['response']
    else:
        return "Sorry, I couldn't find an answer."

# Chatbot functionality
def chatbot(files, user_input, history):
    if files:
        data_ingestion(files)  # Process PDFs
        history.append(("assistant", "Your PDFs have been processed. You can now ask questions."))
    if user_input:
        response = handle_query(user_input)
        history.append(("user", user_input))
        history.append(("assistant", response))
    return history, history

# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("# (PDF) Information and Inference 🗞️")
    gr.Markdown("Upload PDF files and ask questions about their content!")
    
    with gr.Row():
        with gr.Column(scale=2):
            file_upload = gr.File(label="Upload your PDF files", file_types=["pdf"], multiple=True)
        with gr.Column(scale=8):
            chatbot_interface = gr.Chatbot(label="Q&A Assistant", elem_id="chatbot")
    
    user_input = gr.Textbox(label="Ask a question", placeholder="Type your question here...")
    
    history = gr.State([])  # To hold chat history
    
    submit_button = gr.Button("Submit")
    
    submit_button.click(
        fn=chatbot, 
        inputs=[file_upload, user_input, history], 
        outputs=[chatbot_interface, history]
    )

app.launch()