Spaces:

Shriharsh
/

Customer_Support_Bot_with_Document_Training

Sleeping

App Files Files Community

Shriharsh commited on Mar 14

Commit

76d85a4

verified ·

1 Parent(s): 03000c3

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -27

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import gradio as gr
 from transformers import pipeline
 from sentence_transformers import SentenceTransformer, util
 import PyPDF2
 # Set up logging with a dedicated file handler
 logger = logging.getLogger('SupportBot')
@@ -10,24 +11,36 @@ logger.setLevel(logging.INFO)
 # Remove any existing handlers to avoid conflicts
 if logger.handlers:
     logger.handlers.clear()
 # Create a file handler with append mode
-handler = logging.FileHandler('support_bot_log.txt', mode='a')
-handler.setLevel(logging.INFO)
 formatter = logging.Formatter('%(asctime)s - %(message)s')
-handler.setFormatter(formatter)
-logger.addHandler(handler)
 # Load models
 qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
 embedder = SentenceTransformer('all-MiniLM-L6-v2')
-# Helper function to extract text from PDF
 def extract_text_from_pdf(file_path):
     text = ""
     with open(file_path, "rb") as file:
         pdf_reader = PyPDF2.PdfReader(file)
         for page in pdf_reader.pages:
-            text += page.extract_text() + "\n"
     return text
 # Find the most relevant section in the document
@@ -44,7 +57,7 @@ def find_relevant_section(query, sections, section_embeddings):
     SIMILARITY_THRESHOLD = 0.4
     if similarity_score >= SIMILARITY_THRESHOLD:
         logger.info(f"Found relevant section using embeddings (score: {similarity_score})")
-        handler.flush()  # Ensure log is written immediately
         return best_section
     logger.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
@@ -54,11 +67,11 @@ def find_relevant_section(query, sections, section_embeddings):
         common_words = query_words.intersection(section_words)
         if len(common_words) >= 2:
             logger.info(f"Keyword match found with common words: {common_words}")
-            handler.flush()
             return section
     logger.info("No good match found. Returning default response.")
-    handler.flush()
     return "I don’t have enough information to answer that."
 # Process the uploaded file
@@ -66,20 +79,25 @@ def process_file(file, state):
     logger.info("Received file upload request")
     if file is None:
         logger.info("No file uploaded")
-        handler.flush()
         return [("Bot", "Please upload a file.")], state
     file_path = file.name
-    if file_path.lower().endswith(".pdf"):
-        logger.info(f"Processing PDF file: {file_path}")
-        text = extract_text_from_pdf(file_path)
-    elif file_path.lower().endswith(".txt"):
-        logger.info(f"Processing TXT file: {file_path}")
-        with open(file_path, 'r', encoding='utf-8') as f:
             text = f.read()
     else:
-        logger.error(f"Unsupported file format: {file_path}")
-        handler.flush()
         return [("Bot", "Unsupported file format. Please upload a PDF or TXT file.")], state
     sections = text.split('\n\n')
@@ -91,8 +109,8 @@ def process_file(file, state):
     state['feedback_count'] = 0
     state['mode'] = 'waiting_for_query'
     state['chat_history'] = [("Bot", "File processed. You can now ask questions.")]
-    logger.info(f"File processed successfully: {file_path}")
-    handler.flush()
     return state['chat_history'], state
 # Handle user input (queries and feedback)
@@ -100,7 +118,7 @@ def handle_input(user_input, state):
     if state['mode'] == 'waiting_for_upload':
         logger.info("User input received before file upload")
         state['chat_history'].append(("Bot", "Please upload a file first."))
-        handler.flush()
     elif state['mode'] == 'waiting_for_query':
         query = user_input
         logger.info(f"User query: {query}")
@@ -117,7 +135,7 @@ def handle_input(user_input, state):
         state['chat_history'].append(("User", query))
         state['chat_history'].append(("Bot", f"Answer: {answer}\nPlease provide feedback: good, too vague, not helpful."))
         logger.info(f"Generated answer: {answer}")
-        handler.flush()
     elif state['mode'] == 'waiting_for_feedback':
         feedback = user_input.lower()
         logger.info(f"User feedback: {feedback}")
@@ -130,7 +148,7 @@ def handle_input(user_input, state):
             else:
                 state['chat_history'].append(("Bot", "Maximum feedback iterations reached. You can ask another question."))
                 logger.info("Max feedback iterations (2) reached. Ready for next query.")
-            handler.flush()
         else:
             query = state['current_query']
             context = find_relevant_section(query, state['sections'], state['section_embeddings'])
@@ -143,13 +161,13 @@ def handle_input(user_input, state):
             else:
                 state['chat_history'].append(("Bot", "Please provide valid feedback: good, too vague, not helpful."))
                 logger.info(f"Invalid feedback received: {feedback}")
-                handler.flush()
                 return state['chat_history'], state
             state['last_answer'] = adjusted_answer
             state['feedback_count'] += 1
             state['chat_history'].append(("Bot", f"Updated answer: {adjusted_answer}\nPlease provide feedback: good, too vague, not helpful."))
             logger.info(f"Updated answer: {adjusted_answer}")
-            handler.flush()
     return state['chat_history'], state
 # Initial state
@@ -171,9 +189,10 @@ with gr.Blocks() as demo:
     chat = gr.Chatbot()
     user_input = gr.Textbox(label="Your query or feedback")
     submit_btn = gr.Button("Submit")
-    log_file = gr.File(label="Download Log File", value="support_bot_log.txt")
     file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
     submit_btn.click(handle_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
-demo.launch(share=True)

 from transformers import pipeline
 from sentence_transformers import SentenceTransformer, util
 import PyPDF2
+import os
 # Set up logging with a dedicated file handler
 logger = logging.getLogger('SupportBot')
 # Remove any existing handlers to avoid conflicts
 if logger.handlers:
     logger.handlers.clear()
+# Define log file path in a writable directory (/tmp)
+log_file_path = '/tmp/support_bot_log.txt'
 # Create a file handler with append mode
+file_handler = logging.FileHandler(log_file_path, mode='a')
+file_handler.setLevel(logging.INFO)
 formatter = logging.Formatter('%(asctime)s - %(message)s')
+file_handler.setFormatter(formatter)
+logger.addHandler(file_handler)
+# Add a stream handler to output logs to the console as well
+stream_handler = logging.StreamHandler()
+stream_handler.setLevel(logging.INFO)
+stream_handler.setFormatter(formatter)
+logger.addHandler(stream_handler)
 # Load models
 qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
 embedder = SentenceTransformer('all-MiniLM-L6-v2')
+# Helper function to extract text from a PDF
 def extract_text_from_pdf(file_path):
     text = ""
     with open(file_path, "rb") as file:
         pdf_reader = PyPDF2.PdfReader(file)
         for page in pdf_reader.pages:
+            extracted_text = page.extract_text()
+            if extracted_text:
+                text += extracted_text + "\n"
     return text
 # Find the most relevant section in the document
     SIMILARITY_THRESHOLD = 0.4
     if similarity_score >= SIMILARITY_THRESHOLD:
         logger.info(f"Found relevant section using embeddings (score: {similarity_score})")
+        file_handler.flush()  # Ensure log is written immediately
         return best_section
     logger.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
         common_words = query_words.intersection(section_words)
         if len(common_words) >= 2:
             logger.info(f"Keyword match found with common words: {common_words}")
+            file_handler.flush()
             return section
     logger.info("No good match found. Returning default response.")
+    file_handler.flush()
     return "I don’t have enough information to answer that."
 # Process the uploaded file
     logger.info("Received file upload request")
     if file is None:
         logger.info("No file uploaded")
+        file_handler.flush()
         return [("Bot", "Please upload a file.")], state
+    # Save the uploaded file to a temporary location
     file_path = file.name
+    temp_file_path = os.path.join("/tmp", os.path.basename(file_path))
+    with open(temp_file_path, "wb") as f:
+        f.write(file.read())
+    if temp_file_path.lower().endswith(".pdf"):
+        logger.info(f"Processing PDF file: {temp_file_path}")
+        text = extract_text_from_pdf(temp_file_path)
+    elif temp_file_path.lower().endswith(".txt"):
+        logger.info(f"Processing TXT file: {temp_file_path}")
+        with open(temp_file_path, 'r', encoding='utf-8') as f:
             text = f.read()
     else:
+        logger.error(f"Unsupported file format: {temp_file_path}")
+        file_handler.flush()
         return [("Bot", "Unsupported file format. Please upload a PDF or TXT file.")], state
     sections = text.split('\n\n')
     state['feedback_count'] = 0
     state['mode'] = 'waiting_for_query'
     state['chat_history'] = [("Bot", "File processed. You can now ask questions.")]
+    logger.info(f"File processed successfully: {temp_file_path}")
+    file_handler.flush()
     return state['chat_history'], state
 # Handle user input (queries and feedback)
     if state['mode'] == 'waiting_for_upload':
         logger.info("User input received before file upload")
         state['chat_history'].append(("Bot", "Please upload a file first."))
+        file_handler.flush()
     elif state['mode'] == 'waiting_for_query':
         query = user_input
         logger.info(f"User query: {query}")
         state['chat_history'].append(("User", query))
         state['chat_history'].append(("Bot", f"Answer: {answer}\nPlease provide feedback: good, too vague, not helpful."))
         logger.info(f"Generated answer: {answer}")
+        file_handler.flush()
     elif state['mode'] == 'waiting_for_feedback':
         feedback = user_input.lower()
         logger.info(f"User feedback: {feedback}")
             else:
                 state['chat_history'].append(("Bot", "Maximum feedback iterations reached. You can ask another question."))
                 logger.info("Max feedback iterations (2) reached. Ready for next query.")
+            file_handler.flush()
         else:
             query = state['current_query']
             context = find_relevant_section(query, state['sections'], state['section_embeddings'])
             else:
                 state['chat_history'].append(("Bot", "Please provide valid feedback: good, too vague, not helpful."))
                 logger.info(f"Invalid feedback received: {feedback}")
+                file_handler.flush()
                 return state['chat_history'], state
             state['last_answer'] = adjusted_answer
             state['feedback_count'] += 1
             state['chat_history'].append(("Bot", f"Updated answer: {adjusted_answer}\nPlease provide feedback: good, too vague, not helpful."))
             logger.info(f"Updated answer: {adjusted_answer}")
+            file_handler.flush()
     return state['chat_history'], state
 # Initial state
     chat = gr.Chatbot()
     user_input = gr.Textbox(label="Your query or feedback")
     submit_btn = gr.Button("Submit")
+    # Point the log file download to the writable log file path
+    log_file = gr.File(label="Download Log File", value=log_file_path)
     file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
     submit_btn.click(handle_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
+demo.launch(share=True)