Spaces:

CamiloVega
/

Easy_RAG

Sleeping

App Files Files Community

CamiloVega commited on Dec 13, 2024

Commit

316a253

verified ·

1 Parent(s): f91396d

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -144

app.py CHANGED Viewed

@@ -56,153 +56,102 @@ class RAGSystem:
         self.initialize_embeddings()
     def initialize_embeddings(self):
-        """Initialize embedding model."""
-        try:
-            self.embeddings = HuggingFaceEmbeddings(
-                model_name=EMBEDDING_MODEL,
-                model_kwargs={
-                    'device': self.device,
-                    'torch_dtype': torch.float32,
-                }
-            )
-            logger.info("Embeddings initialized successfully")
-        except Exception as e:
-            logger.error(f"Error initializing embeddings: {str(e)}")
-            raise
-    def initialize_llm(self):
-        """Initialize the language model and QA chain."""
-        try:
-            # Get Hugging Face token
-            hf_token = os.environ.get('HUGGINGFACE_TOKEN')
-            if not hf_token:
-                raise ValueError("Please set HUGGINGFACE_TOKEN environment variable")
-            # Login to Hugging Face
-            login(token=hf_token)
-            # Configure model loading based on device
-            if self.device == "cuda":
-                model_config = {
-                    'torch_dtype': torch.float16,
-                    'device_map': "auto",
-                }
-            else:
-                quantization_config = BitsAndBytesConfig(
-                    load_in_4bit=True,
-                    bnb_4bit_compute_dtype=torch.float32,
-                    bnb_4bit_quant_type="nf4",
-                    bnb_4bit_use_double_quant=True,
-                )
-                model_config = {
-                    'quantization_config': quantization_config,
-                    'device_map': "auto",
-                    'torch_dtype': torch.float32,
-                    'low_cpu_mem_usage': True,
-                }
-            # Initialize tokenizer and model
-            tokenizer = AutoTokenizer.from_pretrained(
-                MODEL_NAME,
-                token=hf_token,
-                trust_remote_code=True
-            )
-            model = AutoModelForCausalLM.from_pretrained(
-                MODEL_NAME,
-                token=hf_token,
-                trust_remote_code=True,
-                **model_config
-            )
-            # Create pipeline
-            pipe_config = {
-                "model": model,
-                "tokenizer": tokenizer,
-                "max_new_tokens": 512,
-                "temperature": 0.1,
-                "device_map": "auto",
-                "torch_dtype": torch.float32 if self.device == "cpu" else torch.float16,
             }
-            if self.device == "cpu":
-                pipe_config["model"] = pipe_config["model"].to('cpu')
-            pipe = pipeline("text-generation", **pipe_config)
-            # Create QA chain
-            llm = HuggingFacePipeline(pipeline=pipe)
-            prompt_template = """
-            Context: {context}
-            Based on the context above, please provide a clear and concise answer to the following question.
-            If the information is not in the context, explicitly state so.
-            Question: {question}
-            """
-            PROMPT = PromptTemplate(
-                template=prompt_template,
-                input_variables=["context", "question"]
-            )
-            self.qa_chain = RetrievalQA.from_chain_type(
-                llm=llm,
-                chain_type="stuff",
-                retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
-                return_source_documents=True,
-                chain_type_kwargs={"prompt": PROMPT}
-            )
-            logger.info("LLM initialized successfully")
-        except Exception as e:
-            logger.error(f"Error initializing LLM: {str(e)}")
-            raise
-    def validate_file(self, file_path: str, file_size: int) -> bool:
-        """Validate uploaded file."""
-        if file_size > self.max_file_size:
-            raise ValueError(f"File size exceeds {self.max_file_size // 1024 // 1024}MB limit")
-        ext = os.path.splitext(file_path)[1].lower()
-        if ext not in self.supported_formats:
-            raise ValueError(f"Unsupported format. Supported: {', '.join(self.supported_formats)}")
-        return True
-    def process_file(self, file: gr.File) -> List:
-        """Process a single file and return documents."""
-        try:
-            file_path = file.name
-            file_size = os.path.getsize(file_path)
-            self.validate_file(file_path, file_size)
-            # Copy file to upload directory
-            filename = os.path.basename(file_path)
-            save_path = os.path.join(self.upload_folder, filename)
-            shutil.copy2(file_path, save_path)
-            # Load documents based on file type
-            ext = os.path.splitext(file_path)[1].lower()
-            if ext == '.pdf':
-                loader = PyPDFLoader(save_path)
-            elif ext == '.txt':
-                loader = TextLoader(save_path)
-            else:  # .docx
-                loader = Docx2txtLoader(save_path)
-            documents = loader.load()
-            for doc in documents:
-                doc.metadata.update({
-                    'source': filename,
-                    'type': 'uploaded'
-                })
-            return documents
-        except Exception as e:
-            logger.error(f"Error processing {file_path}: {str(e)}")
-            raise
     def update_vector_store(self, new_documents: List):
         """Update vector store with new documents."""

         self.initialize_embeddings()
     def initialize_embeddings(self):
+    """Initialize embedding model."""
+    try:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.embeddings = HuggingFaceEmbeddings(
+            model_name=EMBEDDING_MODEL,
+            model_kwargs={
+                'device': device
+            },
+            encode_kwargs={
+                'normalize_embeddings': True
             }
+        )
+        logger.info(f"Embeddings initialized successfully on {device}")
+    except Exception as e:
+        logger.error(f"Error initializing embeddings: {str(e)}")
+        raise
+def initialize_llm(self):
+    """Initialize the language model and QA chain."""
+    try:
+        # Get Hugging Face token
+        hf_token = os.environ.get('HUGGINGFACE_TOKEN')
+        if not hf_token:
+            raise ValueError("Please set HUGGINGFACE_TOKEN environment variable")
+        # Login to Hugging Face
+        login(token=hf_token)
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Configure model loading based on device
+        if device == "cuda":
+            model_config = {
+                'torch_dtype': torch.float16,
+                'device_map': "auto",
+            }
+        else:
+            model_config = {
+                'device_map': "auto",
+                'low_cpu_mem_usage': True,
+            }
+        # Initialize tokenizer and model
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_NAME,
+            token=hf_token,
+            trust_remote_code=True
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            token=hf_token,
+            trust_remote_code=True,
+            **model_config
+        )
+        # Create pipeline
+        pipe_config = {
+            "model": model,
+            "tokenizer": tokenizer,
+            "max_new_tokens": 512,
+            "temperature": 0.1,
+            "device_map": "auto",
+        }
+        pipe = pipeline("text-generation", **pipe_config)
+        llm = HuggingFacePipeline(pipeline=pipe)
+        # Create QA chain
+        prompt_template = """
+        Context: {context}
+        Based on the context above, please provide a clear and concise answer to the following question.
+        If the information is not in the context, explicitly state so.
+        Question: {question}
+        """
+        PROMPT = PromptTemplate(
+            template=prompt_template,
+            input_variables=["context", "question"]
+        )
+        self.qa_chain = RetrievalQA.from_chain_type(
+            llm=llm,
+            chain_type="stuff",
+            retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
+            return_source_documents=True,
+            chain_type_kwargs={"prompt": PROMPT}
+        )
+        logger.info("LLM initialized successfully")
+    except Exception as e:
+        logger.error(f"Error initializing LLM: {str(e)}")
+        raise
     def update_vector_store(self, new_documents: List):
         """Update vector store with new documents."""