Spaces:

CamiloVega
/

Easy_RAG

Sleeping

App Files Files Community

CamiloVega commited on Dec 13, 2024

Commit

0a80de4

verified ·

1 Parent(s): 985ad05

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -82

app.py CHANGED Viewed

@@ -11,8 +11,9 @@ from langchain.chains import RetrievalQA
 from langchain.prompts import PromptTemplate
 from langchain_community.llms import HuggingFacePipeline
 from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 from huggingface_hub import login
 # Configure logging
 logging.basicConfig(
@@ -30,11 +31,17 @@ class RAGSystem:
     """Main RAG system class."""
     def __init__(self):
         self.upload_folder = UPLOAD_FOLDER
         if os.path.exists(self.upload_folder):
             shutil.rmtree(self.upload_folder)
         os.makedirs(self.upload_folder, exist_ok=True)
         self.max_files = 5
         self.max_file_size = 10 * 1024 * 1024  # 10 MB
         self.supported_formats = ['.pdf', '.txt', '.docx']
@@ -45,7 +52,7 @@ class RAGSystem:
         self.qa_chain = None
         self.documents = []
-        # Initialize embeddings once
         self.initialize_embeddings()
     def initialize_embeddings(self):
@@ -53,12 +60,107 @@ class RAGSystem:
         try:
             self.embeddings = HuggingFaceEmbeddings(
                 model_name=EMBEDDING_MODEL,
-                model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
             )
         except Exception as e:
             logger.error(f"Error initializing embeddings: {str(e)}")
             raise
     def validate_file(self, file_path: str, file_size: int) -> bool:
         """Validate uploaded file."""
         if file_size > self.max_file_size:
@@ -105,7 +207,6 @@ class RAGSystem:
     def update_vector_store(self, new_documents: List):
         """Update vector store with new documents."""
         try:
-            # Process documents
             text_splitter = RecursiveCharacterTextSplitter(
                 chunk_size=500,
                 chunk_overlap=50,
@@ -113,74 +214,17 @@ class RAGSystem:
             )
             chunks = text_splitter.split_documents(new_documents)
-            # Create or update vector store
             if self.vector_store is None:
                 self.vector_store = FAISS.from_documents(chunks, self.embeddings)
             else:
                 self.vector_store.add_documents(chunks)
         except Exception as e:
             logger.error(f"Error updating vector store: {str(e)}")
             raise
-    def initialize_llm(self):
-        """Initialize the language model and QA chain."""
-        try:
-            # Get Hugging Face token
-            hf_token = os.environ.get('HUGGINGFACE_TOKEN')
-            if not hf_token:
-                raise ValueError("Please set HUGGINGFACE_TOKEN environment variable")
-            # Login to Hugging Face
-            login(token=hf_token)
-            # Initialize model and tokenizer
-            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-            model = AutoModelForCausalLM.from_pretrained(
-                MODEL_NAME,
-                torch_dtype=torch.float16,
-                device_map="auto"
-            )
-            # Create pipeline
-            pipe = pipeline(
-                "text-generation",
-                model=model,
-                tokenizer=tokenizer,
-                max_new_tokens=512,
-                temperature=0.1,
-                device_map="auto"
-            )
-            llm = HuggingFacePipeline(pipeline=pipe)
-            # Create QA chain
-            prompt_template = """
-            Context: {context}
-            Based on the context above, please provide a clear and concise answer to the following question.
-            If the information is not in the context, explicitly state so.
-            Question: {question}
-            """
-            PROMPT = PromptTemplate(
-                template=prompt_template,
-                input_variables=["context", "question"]
-            )
-            self.qa_chain = RetrievalQA.from_chain_type(
-                llm=llm,
-                chain_type="stuff",
-                retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
-                return_source_documents=True,
-                chain_type_kwargs={"prompt": PROMPT}
-            )
-        except Exception as e:
-            logger.error(f"Error initializing LLM: {str(e)}")
-            raise
     def process_upload(self, files: List[gr.File]) -> str:
         """Process uploaded files and initialize/update the system."""
         if not files:
@@ -191,7 +235,6 @@ class RAGSystem:
             if current_files + len(files) > self.max_files:
                 return f"Maximum number of documents ({self.max_files}) exceeded"
-            # Process each file
             processed_files = []
             new_documents = []
             for file in files:
@@ -199,15 +242,13 @@ class RAGSystem:
                 new_documents.extend(documents)
                 processed_files.append(os.path.basename(file.name))
-            # Update vector store with new documents
             self.update_vector_store(new_documents)
             self.documents.extend(new_documents)
-            # Initialize LLM if not already initialized
             if self.qa_chain is None:
                 self.initialize_llm()
-            return f"Successfully processed and initialized: {', '.join(processed_files)}"
         except Exception as e:
             return f"Error: {str(e)}"
@@ -270,7 +311,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     """)
     with gr.Row():
-        # Sidebar for document upload
         with gr.Column(scale=1):
             with gr.Group():
                 gr.HTML("""
@@ -295,7 +335,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 )
                 gr.HTML("</div>")
-        # Main chat area
         with gr.Column(scale=3):
             chatbot = gr.Chatbot(
                 show_label=False,
@@ -335,17 +374,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             </div>
         </div>
     """)
-    # Add custom CSS
-    demo.css = """
-        .container {
-            border-radius: 0.5rem;
-            margin: 0.5rem;
-        }
-        #file-upload {
-            margin-bottom: 1rem;
-        }
-    """
     # Set up event handlers
     file_output.upload(
@@ -362,5 +390,38 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     clear.click(lambda: None, None, chatbot)
-if __name__ == "__main__":
-    demo.launch()

 from langchain.prompts import PromptTemplate
 from langchain_community.llms import HuggingFacePipeline
 from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from huggingface_hub import login
+import bitsandbytes as bnb
 # Configure logging
 logging.basicConfig(
     """Main RAG system class."""
     def __init__(self):
+        # Initialize device
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {self.device}")
+        # Initialize folders
         self.upload_folder = UPLOAD_FOLDER
         if os.path.exists(self.upload_folder):
             shutil.rmtree(self.upload_folder)
         os.makedirs(self.upload_folder, exist_ok=True)
+        # Set limits
         self.max_files = 5
         self.max_file_size = 10 * 1024 * 1024  # 10 MB
         self.supported_formats = ['.pdf', '.txt', '.docx']
         self.qa_chain = None
         self.documents = []
+        # Initialize embeddings
         self.initialize_embeddings()
     def initialize_embeddings(self):
         try:
             self.embeddings = HuggingFaceEmbeddings(
                 model_name=EMBEDDING_MODEL,
+                model_kwargs={
+                    'device': self.device,
+                    'torch_dtype': torch.float32,
+                }
             )
+            logger.info("Embeddings initialized successfully")
         except Exception as e:
             logger.error(f"Error initializing embeddings: {str(e)}")
             raise
+    def initialize_llm(self):
+        """Initialize the language model and QA chain."""
+        try:
+            # Get Hugging Face token
+            hf_token = os.environ.get('HUGGINGFACE_TOKEN')
+            if not hf_token:
+                raise ValueError("Please set HUGGINGFACE_TOKEN environment variable")
+            # Login to Hugging Face
+            login(token=hf_token)
+            # Configure model loading based on device
+            if self.device == "cuda":
+                model_config = {
+                    'torch_dtype': torch.float16,
+                    'device_map': "auto",
+                }
+            else:
+                quantization_config = BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_compute_dtype=torch.float32,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_use_double_quant=True,
+                )
+                model_config = {
+                    'quantization_config': quantization_config,
+                    'device_map': "auto",
+                    'torch_dtype': torch.float32,
+                    'low_cpu_mem_usage': True,
+                }
+            # Initialize tokenizer and model
+            tokenizer = AutoTokenizer.from_pretrained(
+                MODEL_NAME,
+                token=hf_token,
+                trust_remote_code=True
+            )
+            model = AutoModelForCausalLM.from_pretrained(
+                MODEL_NAME,
+                token=hf_token,
+                trust_remote_code=True,
+                **model_config
+            )
+            # Create pipeline
+            pipe_config = {
+                "model": model,
+                "tokenizer": tokenizer,
+                "max_new_tokens": 512,
+                "temperature": 0.1,
+                "device_map": "auto",
+                "torch_dtype": torch.float32 if self.device == "cpu" else torch.float16,
+            }
+            if self.device == "cpu":
+                pipe_config["model"] = pipe_config["model"].to('cpu')
+            pipe = pipeline("text-generation", **pipe_config)
+            # Create QA chain
+            llm = HuggingFacePipeline(pipeline=pipe)
+            prompt_template = """
+            Context: {context}
+            Based on the context above, please provide a clear and concise answer to the following question.
+            If the information is not in the context, explicitly state so.
+            Question: {question}
+            """
+            PROMPT = PromptTemplate(
+                template=prompt_template,
+                input_variables=["context", "question"]
+            )
+            self.qa_chain = RetrievalQA.from_chain_type(
+                llm=llm,
+                chain_type="stuff",
+                retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
+                return_source_documents=True,
+                chain_type_kwargs={"prompt": PROMPT}
+            )
+            logger.info("LLM initialized successfully")
+        except Exception as e:
+            logger.error(f"Error initializing LLM: {str(e)}")
+            raise
     def validate_file(self, file_path: str, file_size: int) -> bool:
         """Validate uploaded file."""
         if file_size > self.max_file_size:
     def update_vector_store(self, new_documents: List):
         """Update vector store with new documents."""
         try:
             text_splitter = RecursiveCharacterTextSplitter(
                 chunk_size=500,
                 chunk_overlap=50,
             )
             chunks = text_splitter.split_documents(new_documents)
             if self.vector_store is None:
                 self.vector_store = FAISS.from_documents(chunks, self.embeddings)
             else:
                 self.vector_store.add_documents(chunks)
+            logger.info(f"Vector store updated with {len(chunks)} chunks")
         except Exception as e:
             logger.error(f"Error updating vector store: {str(e)}")
             raise
     def process_upload(self, files: List[gr.File]) -> str:
         """Process uploaded files and initialize/update the system."""
         if not files:
             if current_files + len(files) > self.max_files:
                 return f"Maximum number of documents ({self.max_files}) exceeded"
             processed_files = []
             new_documents = []
             for file in files:
                 new_documents.extend(documents)
                 processed_files.append(os.path.basename(file.name))
             self.update_vector_store(new_documents)
             self.documents.extend(new_documents)
             if self.qa_chain is None:
                 self.initialize_llm()
+            return f"Successfully processed: {', '.join(processed_files)}"
         except Exception as e:
             return f"Error: {str(e)}"
     """)
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Group():
                 gr.HTML("""
                 )
                 gr.HTML("</div>")
         with gr.Column(scale=3):
             chatbot = gr.Chatbot(
                 show_label=False,
             </div>
         </div>
     """)
     # Set up event handlers
     file_output.upload(
     clear.click(lambda: None, None, chatbot)
+    if __name__ == "__main__":
+    # Log system information
+    logger.info("Starting Easy RAG system...")
+    logger.info(f"PyTorch version: {torch.__version__}")
+    logger.info(f"CUDA available: {torch.cuda.is_available()}")
+    if torch.cuda.is_available():
+        logger.info(f"CUDA device: {torch.cuda.get_device_name(0)}")
+    else:
+        logger.info("Running on CPU mode with optimizations")
+    # Check for HUGGINGFACE_TOKEN
+    if not os.environ.get('HUGGINGFACE_TOKEN'):
+        logger.warning("HUGGINGFACE_TOKEN not found in environment variables")
+        logger.warning("Please set it before running the application")
+        print("Please set your HUGGINGFACE_TOKEN environment variable")
+        print("Example: export HUGGINGFACE_TOKEN=your_token_here")
+        exit(1)
+    # Create upload directory if it doesn't exist
+    if not os.path.exists(UPLOAD_FOLDER):
+        os.makedirs(UPLOAD_FOLDER)
+        logger.info(f"Created upload directory: {UPLOAD_FOLDER}")
+    try:
+        # Launch the Gradio interface
+        demo.launch(
+            share=False,          # Set to True if you want to create a public link
+            server_name="0.0.0.0",  # Listen on all network interfaces
+            server_port=7860,     # Default Gradio port
+            show_error=True,
+            enable_queue=True
+        )
+    except Exception as e:
+        logger.error(f"Error launching Gradio interface: {str(e)}")
+        raise