Spaces:

hmrizal
/

CSVBot-OpenSource

Sleeping

App Files Files Community

hmrizal commited on Apr 9

Commit

7fb09f2

verified ·

1 Parent(s): c1c3142

update change_model, process_file, create_llm_pipeline, explicit button to change model

Browse files

Files changed (1) hide show

app.py +102 -59

app.py CHANGED Viewed

@@ -113,35 +113,45 @@ def initialize_model_once(model_key):
 def create_llm_pipeline(model_key):
     """Create a new pipeline using the specified model"""
-    tokenizer, model, is_t5 = initialize_model_once(model_key)
-    # Create appropriate pipeline based on model type
-    if is_t5:
-        pipe = pipeline(
-            "text2text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            max_new_tokens=256,
-            temperature=0.3,
-            top_p=0.9,
-            return_full_text=False,
-        )
-    else:
-        pipe = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            max_new_tokens=256,
-            temperature=0.3,
-            top_p=0.9,
-            top_k=30,
-            repetition_penalty=1.2,
-            return_full_text=False,
-        )
-    # Wrap pipeline in HuggingFacePipeline for LangChain compatibility
-    return HuggingFacePipeline(pipeline=pipe)
 def create_conversational_chain(db, file_path, model_key):
     llm = create_llm_pipeline(model_key)
@@ -281,14 +291,16 @@ class ChatBot:
     def process_file(self, file, model_key=None):
         if model_key:
             self.model_key = model_key
         if file is None:
             return "Mohon upload file CSV terlebih dahulu."
         try:
             # Handle file from Gradio
             file_path = file.name if hasattr(file, 'name') else str(file)
             self.csv_file_path = file_path
             # Copy to user directory
             user_file_path = f"{self.user_dir}/uploaded.csv"
@@ -301,22 +313,25 @@ class ChatBot:
                 # Save a copy in user directory
                 df.to_csv(user_file_path, index=False)
                 self.csv_file_path = user_file_path
             except Exception as e:
                 return f"Error membaca CSV: {str(e)}"
             # Load document with reduced chunk size for better memory usage
             try:
-                loader = CSVLoader(file_path=file_path, encoding="utf-8", csv_args={
                     'delimiter': ','})
                 data = loader.load()
                 print(f"Documents loaded: {len(data)}")
             except Exception as e:
                 return f"Error loading documents: {str(e)}"
             # Create vector database with optimized settings
             try:
                 db_path = f"{self.user_dir}/db_faiss"
                 # Use CPU-friendly embeddings with smaller dimensions
                 embeddings = HuggingFaceEmbeddings(
                     model_name='sentence-transformers/all-MiniLM-L6-v2',
@@ -327,13 +342,18 @@ class ChatBot:
                 db.save_local(db_path)
                 print(f"Vector database created at {db_path}")
             except Exception as e:
                 return f"Error creating vector database: {str(e)}"
             # Create custom chain
             try:
                 self.chain = create_conversational_chain(db, self.csv_file_path, self.model_key)
-                print(f"Chain created successfully using model: {self.model_key}")
             except Exception as e:
                 return f"Error creating chain: {str(e)}"
             # Add basic file info to chat history for context
@@ -348,32 +368,54 @@ class ChatBot:
     def change_model(self, model_key):
         """Change the model being used and recreate the chain if necessary"""
-        if model_key == self.model_key:
-            return f"Model {model_key} sudah digunakan."
-        self.model_key = model_key
-        # If we have an active session with a file already loaded, recreate the chain
-        if self.csv_file_path:
-            try:
-                # Load existing database
-                db_path = f"{self.user_dir}/db_faiss"
-                embeddings = HuggingFaceEmbeddings(
-                    model_name='sentence-transformers/all-MiniLM-L6-v2',
-                    model_kwargs={'device': 'cpu'}
-                )
-                # Tambahkan flag allow_dangerous_deserialization=True
-                db = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
-                # Create new chain with the selected model
-                self.chain = create_conversational_chain(db, self.csv_file_path, self.model_key)
-                return f"Model berhasil diubah ke {model_key}."
-            except Exception as e:
-                return f"Error mengubah model: {str(e)}"
-        else:
-            return f"Model diubah ke {model_key}. Silakan upload file CSV untuk memulai."
     def chat(self, message, history):
         if self.chain is None:
@@ -430,6 +472,7 @@ def create_gradio_interface():
                     model_info = gr.Markdown(
                         value=f"**{default_model}**: {MODEL_CONFIG[default_model]['description']}"
                     )
                 # Process button AFTER the accordion
                 process_button = gr.Button("Proses CSV")
@@ -478,7 +521,7 @@ def create_gradio_interface():
             result = chatbot.change_model(model_key)
             return chatbot, chatbot.chat_history + [(None, result)]
-        model_dropdown.change(
             fn=handle_model_change,
             inputs=[model_dropdown, chatbot_state, session_id],
             outputs=[chatbot_state, chatbot_interface]

 def create_llm_pipeline(model_key):
     """Create a new pipeline using the specified model"""
+    try:
+        print(f"Creating pipeline for model: {model_key}")
+        tokenizer, model, is_t5 = initialize_model_once(model_key)
+        # Create appropriate pipeline based on model type
+        if is_t5:
+            print("Creating T5 pipeline")
+            pipe = pipeline(
+                "text2text-generation",
+                model=model,
+                tokenizer=tokenizer,
+                max_new_tokens=256,
+                temperature=0.3,
+                top_p=0.9,
+                return_full_text=False,
+            )
+        else:
+            print("Creating causal LM pipeline")
+            pipe = pipeline(
+                "text-generation",
+                model=model,
+                tokenizer=tokenizer,
+                max_new_tokens=256,
+                temperature=0.3,
+                top_p=0.9,
+                top_k=30,
+                repetition_penalty=1.2,
+                return_full_text=False,
+            )
+        print("Pipeline created successfully")
+        # Wrap pipeline in HuggingFacePipeline for LangChain compatibility
+        return HuggingFacePipeline(pipeline=pipe)
+    except Exception as e:
+        import traceback
+        print(f"Error creating pipeline: {str(e)}")
+        print(traceback.format_exc())
+        raise
 def create_conversational_chain(db, file_path, model_key):
     llm = create_llm_pipeline(model_key)
     def process_file(self, file, model_key=None):
         if model_key:
             self.model_key = model_key
         if file is None:
             return "Mohon upload file CSV terlebih dahulu."
         try:
+            print(f"Processing file using model: {self.model_key}")
             # Handle file from Gradio
             file_path = file.name if hasattr(file, 'name') else str(file)
             self.csv_file_path = file_path
+            print(f"CSV file path: {file_path}")
             # Copy to user directory
             user_file_path = f"{self.user_dir}/uploaded.csv"
                 # Save a copy in user directory
                 df.to_csv(user_file_path, index=False)
                 self.csv_file_path = user_file_path
+                print(f"CSV saved to {user_file_path}")
             except Exception as e:
+                print(f"Error reading CSV: {str(e)}")
                 return f"Error membaca CSV: {str(e)}"
             # Load document with reduced chunk size for better memory usage
             try:
+                loader = CSVLoader(file_path=user_file_path, encoding="utf-8", csv_args={
                     'delimiter': ','})
                 data = loader.load()
                 print(f"Documents loaded: {len(data)}")
             except Exception as e:
+                print(f"Error loading documents: {str(e)}")
                 return f"Error loading documents: {str(e)}"
             # Create vector database with optimized settings
             try:
                 db_path = f"{self.user_dir}/db_faiss"
                 # Use CPU-friendly embeddings with smaller dimensions
                 embeddings = HuggingFaceEmbeddings(
                     model_name='sentence-transformers/all-MiniLM-L6-v2',
                 db.save_local(db_path)
                 print(f"Vector database created at {db_path}")
             except Exception as e:
+                print(f"Error creating vector database: {str(e)}")
                 return f"Error creating vector database: {str(e)}"
             # Create custom chain
             try:
+                print(f"Creating conversation chain with model: {self.model_key}")
                 self.chain = create_conversational_chain(db, self.csv_file_path, self.model_key)
+                print("Chain created successfully")
             except Exception as e:
+                import traceback
+                print(f"Error creating chain: {str(e)}")
+                print(traceback.format_exc())
                 return f"Error creating chain: {str(e)}"
             # Add basic file info to chat history for context
     def change_model(self, model_key):
         """Change the model being used and recreate the chain if necessary"""
+        try:
+            if model_key == self.model_key:
+                return f"Model {model_key} sudah digunakan."
+            print(f"Changing model from {self.model_key} to {model_key}")
+            self.model_key = model_key
+            # If we have an active session with a file already loaded, recreate the chain
+            if self.csv_file_path and os.path.exists(self.csv_file_path):
+                try:
+                    # Load existing database
+                    db_path = f"{self.user_dir}/db_faiss"
+                    if not os.path.exists(db_path):
+                        return f"Error: Database tidak ditemukan. Silakan upload file CSV kembali."
+                    print(f"Loading embeddings from {db_path}")
+                    embeddings = HuggingFaceEmbeddings(
+                        model_name='sentence-transformers/all-MiniLM-L6-v2',
+                        model_kwargs={'device': 'cpu'}
+                    )
+                    # Tambahkan flag allow_dangerous_deserialization=True
+                    db = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
+                    print(f"FAISS database loaded successfully")
+                    # Create new chain with the selected model
+                    print(f"Creating new conversation chain with {model_key}")
+                    self.chain = create_conversational_chain(db, self.csv_file_path, self.model_key)
+                    print(f"Chain created successfully")
+                    # Add notification to chat history
+                    self.chat_history.append(("System", f"Model berhasil diubah ke {model_key}."))
+                    return f"Model berhasil diubah ke {model_key}."
+                except Exception as e:
+                    import traceback
+                    error_trace = traceback.format_exc()
+                    print(f"Detailed error in change_model: {error_trace}")
+                    return f"Error mengubah model: {str(e)}"
+            else:
+                # Just update the model key if no file is loaded yet
+                print(f"No CSV file loaded yet, just updating model preference to {model_key}")
+                return f"Model diubah ke {model_key}. Silakan upload file CSV untuk memulai."
+        except Exception as e:
+            import traceback
+            error_trace = traceback.format_exc()
+            print(f"Unexpected error in change_model: {error_trace}")
+            return f"Error tidak terduga saat mengubah model: {str(e)}"
     def chat(self, message, history):
         if self.chain is None:
                     model_info = gr.Markdown(
                         value=f"**{default_model}**: {MODEL_CONFIG[default_model]['description']}"
                     )
+                    change_model_button = gr.Button("Terapkan Perubahan Model")
                 # Process button AFTER the accordion
                 process_button = gr.Button("Proses CSV")
             result = chatbot.change_model(model_key)
             return chatbot, chatbot.chat_history + [(None, result)]
+        change_model_button.click(
             fn=handle_model_change,
             inputs=[model_dropdown, chatbot_state, session_id],
             outputs=[chatbot_state, chatbot_interface]