Spaces:

Shriharsh
/

Customer_Support_Bot_with_Document_Training

Running

App Files Files Community

Shriharsh commited on 17 days ago

Commit

0a81317

verified ·

1 Parent(s): 5133aad

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -91

app.py CHANGED Viewed

@@ -1,9 +1,18 @@
 import gradio as gr
 from transformers import pipeline
 from sentence_transformers import SentenceTransformer, util
 import PyPDF2
-import datetime
-import os
 # Load models
 qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
@@ -19,7 +28,7 @@ def extract_text_from_pdf(file_path):
     return text
 # Find the most relevant section in the document
-def find_relevant_section(query, sections, section_embeddings, log_messages):
     stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
     # Semantic search
@@ -31,40 +40,40 @@ def find_relevant_section(query, sections, section_embeddings, log_messages):
     SIMILARITY_THRESHOLD = 0.4
     if similarity_score >= SIMILARITY_THRESHOLD:
-        log_messages = log_message(f"Found relevant section using embeddings for query: {query}", log_messages)
-        return best_section, log_messages
-    log_messages = log_message(f"Low similarity ({similarity_score}). Falling back to keyword search.", log_messages)
     # Keyword-based fallback search with stopword filtering
-    query_words = {word for word in query.lower().split() if word not in stopwords}  # Corrected line
     for section in sections:
         section_words = {word for word in section.lower().split() if word not in stopwords}
         common_words = query_words.intersection(section_words)
         if len(common_words) >= 2:
-            log_messages = log_message(f"Keyword match found for query: {query} with common words: {common_words}", log_messages)
-            return section, log_messages
-    log_messages = log_message(f"No good keyword match found. Returning default fallback response.", log_messages)
-    return "I don’t have enough information to answer that.", log_messages
 # Process the uploaded file with detailed logging
-def process_file(file, state, log_messages):
     if file is None:
-        log_messages = log_message("No file uploaded.", log_messages)
-        return [("Bot", "Please upload a file.")], state, log_messages
     file_path = file.name
     if file_path.lower().endswith(".pdf"):
-        log_messages = log_message(f"Uploaded PDF file: {file_path}", log_messages)
         text = extract_text_from_pdf(file_path)
     elif file_path.lower().endswith(".txt"):
-        log_messages = log_message(f"Uploaded TXT file: {file_path}", log_messages)
         with open(file_path, 'r', encoding='utf-8') as f:
             text = f.read()
     else:
-        log_messages = log_message(f"Unsupported file format: {file_path}", log_messages)
-        return [("Bot", "Unsupported file format. Please upload a PDF or TXT file.")], state, log_messages
     sections = text.split('\n\n')
     section_embeddings = embedder.encode(sections, convert_to_tensor=True)
@@ -75,27 +84,19 @@ def process_file(file, state, log_messages):
     state['feedback_count'] = 0
     state['mode'] = 'waiting_for_query'
     state['chat_history'] = [("Bot", "File processed. You can now ask questions.")]
-    log_messages = log_message(f"Processed file: {file_path}", log_messages)
-    return state['chat_history'], state, log_messages
 # Handle user input (queries and feedback)
-def handle_input(user_input, state, log_messages):
     if state['mode'] == 'waiting_for_upload':
         state['chat_history'].append(("Bot", "Please upload a file first."))
-        log_messages = log_message("User attempted to interact without uploading a file.", log_messages)
-        return state['chat_history'], state, log_messages
     elif state['mode'] == 'waiting_for_query':
-        if user_input.lower() == "exit":
-            log_messages = log_message("User entered 'exit'. Ending session.", log_messages)
-            state['mode'] = 'exited'
-            state['chat_history'].append(("User", "exit"))
-            state['chat_history'].append(("Bot", "Session ended. You can download the log file."))
-            return state['chat_history'], state, log_messages
         query = user_input
         state['current_query'] = query
         state['feedback_count'] = 0
-        context, log_messages = find_relevant_section(query, state['sections'], state['section_embeddings'], log_messages)
         if context == "I don’t have enough information to answer that.":
             answer = context
         else:
@@ -105,46 +106,47 @@ def handle_input(user_input, state, log_messages):
         state['mode'] = 'waiting_for_feedback'
         state['chat_history'].append(("User", query))
         state['chat_history'].append(("Bot", f"Answer: {answer}\nPlease provide feedback: good, too vague, not helpful."))
-        # Log the query and initial answer here:
-        log_messages = log_message(f"Query: {query}, Answer: {answer}", log_messages)
     elif state['mode'] == 'waiting_for_feedback':
-        if user_input.lower() == "exit":
-            log_messages = log_message("User entered 'exit'. Ending session.", log_messages)
-            state['mode'] = 'exited'
-            state['chat_history'].append(("User", "exit"))
-            state['chat_history'].append(("Bot", "Session ended. You can download the log file."))
-            return state['chat_history'], state, log_messages
         feedback = user_input.lower()
         state['chat_history'].append(("User", feedback))
-        log_messages = log_message(f"Feedback: {feedback}", log_messages)
         if feedback == "good" or state['feedback_count'] >= 2:
             state['mode'] = 'waiting_for_query'
             if feedback == "good":
                 state['chat_history'].append(("Bot", "Thank you for your feedback. You can ask another question."))
-                log_messages = log_message("Feedback accepted as 'good'. Waiting for next query.", log_messages)
             else:
                 state['chat_history'].append(("Bot", "Maximum feedback iterations reached. You can ask another question."))
-                log_messages = log_message("Max feedback iterations reached. Waiting for next query.", log_messages)
         else:
             query = state['current_query']
-            context, log_messages = find_relevant_section(query, state['sections'], state['section_embeddings'], log_messages)
             if feedback == "too vague":
                 adjusted_answer = f"{state['last_answer']}\n\n(More details:\n{context[:500]}...)"
             elif feedback == "not helpful":
                 adjusted_answer = qa_model(question=query + " Please provide more detailed information with examples.", context=context)['answer']
             else:
                 state['chat_history'].append(("Bot", "Please provide valid feedback: good, too vague, not helpful."))
-                log_messages = log_message(f"Invalid feedback received: {feedback}", log_messages)
-                return state['chat_history'], state, log_messages
             state['last_answer'] = adjusted_answer
             state['feedback_count'] += 1
             state['chat_history'].append(("Bot", f"Updated answer: {adjusted_answer}\nPlease provide feedback: good, too vague, not helpful."))
-            log_messages = log_message(f"Adjusted answer: {adjusted_answer}", log_messages)
-    elif state['mode'] == 'exited':
-        state['chat_history'].append(("Bot", "Session is over. Please download the log."))
-        log_messages = log_message("User interacted after exiting.", log_messages)
-    return state['chat_history'], state, log_messages
 # Initial state
 initial_state = {
@@ -158,55 +160,26 @@ initial_state = {
     'last_answer': None
 }
-# Initialize log_messages outside initial_state
-log_messages = []
-# Logging function to store messages in memory
-def log_message(message, log_messages):
-    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    log_entry = f"{timestamp} - {message}"
-    log_messages.append(log_entry)
-    return log_messages
-# Function to save logs to file
-def save_logs_to_file(log_messages):
-    with open("support_bot_log.txt", "w") as log_file:
-        for log_message in log_messages:
-            log_file.write(log_message + "\n")
 # Gradio interface
 with gr.Blocks() as demo:
     state = gr.State(initial_state)
-    file_upload = gr.File(label="Upload PDF or TXT file")
     chat = gr.Chatbot()
     user_input = gr.Textbox(label="Your query or feedback")
     submit_btn = gr.Button("Submit")
-    download_log_btn = gr.Button("Download Log File")  # Changed to Button
-    log_file = gr.File(label="Log File")  # Keep File for serving
     # Process file upload
-    file_upload.upload(process_file, inputs=[file_upload, state, gr.State(log_messages)], outputs=[chat, state, gr.State(log_messages)])
     # Handle user input and clear the textbox
-    submit_btn.click(handle_input, inputs=[user_input, state, gr.State(log_messages)], outputs=[chat, state, gr.State(log_messages)]).then(lambda: "", None, user_input)
-    # Update the log file just before download
-    download_log_btn.click(
-        lambda log_messages: "support_bot_log.txt",
-        inputs=[gr.State(log_messages)],
-        outputs=[log_file]
-    )
-    # Also save logs when user exits
-    user_input.submit(
-        lambda user_input, state, log_messages: (
-            save_logs_to_file(log_messages) if user_input.lower() == "exit" else None,
-            state
-        ),
-        [user_input, state, gr.State(log_messages)],
-        [log_file, state]
-    )
-demo.launch(share=True)

+import os
+import logging
 import gradio as gr
 from transformers import pipeline
 from sentence_transformers import SentenceTransformer, util
 import PyPDF2
+# Set up logging with immediate writing
+logging.basicConfig(
+    filename='support_bot_log.txt',
+    level=logging.INFO,
+    format='%(asctime)s - %(message)s',
+    force=True  # Ensures any existing handlers are replaced and logging starts fresh
+)
+logger = logging.getLogger()
 # Load models
 qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
     return text
 # Find the most relevant section in the document
+def find_relevant_section(query, sections, section_embeddings):
     stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
     # Semantic search
     SIMILARITY_THRESHOLD = 0.4
     if similarity_score >= SIMILARITY_THRESHOLD:
+        logger.info(f"Found relevant section using embeddings for query: {query}")
+        return best_section
+    logger.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
     # Keyword-based fallback search with stopword filtering
+    query_words = {word for word in query.lower().split() if word not in stopwords}
     for section in sections:
         section_words = {word for word in section.lower().split() if word not in stopwords}
         common_words = query_words.intersection(section_words)
         if len(common_words) >= 2:
+            logger.info(f"Keyword match found for query: {query} with common words: {common_words}")
+            return section
+    logger.info(f"No good keyword match found. Returning default fallback response.")
+    return "I don’t have enough information to answer that."
 # Process the uploaded file with detailed logging
+def process_file(file, state):
     if file is None:
+        logger.info("No file uploaded.")
+        return [("Bot", "Please upload a file.")], state
     file_path = file.name
     if file_path.lower().endswith(".pdf"):
+        logger.info(f"Uploaded PDF file: {file_path}")
         text = extract_text_from_pdf(file_path)
     elif file_path.lower().endswith(".txt"):
+        logger.info(f"Uploaded TXT file: {file_path}")
         with open(file_path, 'r', encoding='utf-8') as f:
             text = f.read()
     else:
+        logger.error(f"Unsupported file format: {file_path}")
+        return [("Bot", "Unsupported file format. Please upload a PDF or TXT file.")], state
     sections = text.split('\n\n')
     section_embeddings = embedder.encode(sections, convert_to_tensor=True)
     state['feedback_count'] = 0
     state['mode'] = 'waiting_for_query'
     state['chat_history'] = [("Bot", "File processed. You can now ask questions.")]
+    logger.info(f"Processed file: {file_path}")
+    return state['chat_history'], state
 # Handle user input (queries and feedback)
+def handle_input(user_input, state):
     if state['mode'] == 'waiting_for_upload':
         state['chat_history'].append(("Bot", "Please upload a file first."))
+        logger.info("User attempted to interact without uploading a file.")
     elif state['mode'] == 'waiting_for_query':
         query = user_input
         state['current_query'] = query
         state['feedback_count'] = 0
+        context = find_relevant_section(query, state['sections'], state['section_embeddings'])
         if context == "I don’t have enough information to answer that.":
             answer = context
         else:
         state['mode'] = 'waiting_for_feedback'
         state['chat_history'].append(("User", query))
         state['chat_history'].append(("Bot", f"Answer: {answer}\nPlease provide feedback: good, too vague, not helpful."))
+        logger.info(f"Query: {query}, Answer: {answer}")
     elif state['mode'] == 'waiting_for_feedback':
         feedback = user_input.lower()
         state['chat_history'].append(("User", feedback))
+        logger.info(f"Feedback: {feedback}")
         if feedback == "good" or state['feedback_count'] >= 2:
             state['mode'] = 'waiting_for_query'
             if feedback == "good":
                 state['chat_history'].append(("Bot", "Thank you for your feedback. You can ask another question."))
+                logger.info("Feedback accepted as 'good'. Waiting for next query.")
             else:
                 state['chat_history'].append(("Bot", "Maximum feedback iterations reached. You can ask another question."))
+                logger.info("Max feedback iterations reached. Waiting for next query.")
         else:
             query = state['current_query']
+            context = find_relevant_section(query, state['sections'], state['section_embeddings'])
             if feedback == "too vague":
                 adjusted_answer = f"{state['last_answer']}\n\n(More details:\n{context[:500]}...)"
             elif feedback == "not helpful":
                 adjusted_answer = qa_model(question=query + " Please provide more detailed information with examples.", context=context)['answer']
             else:
                 state['chat_history'].append(("Bot", "Please provide valid feedback: good, too vague, not helpful."))
+                logger.info(f"Invalid feedback received: {feedback}")
+                return state['chat_history'], state
             state['last_answer'] = adjusted_answer
             state['feedback_count'] += 1
             state['chat_history'].append(("Bot", f"Updated answer: {adjusted_answer}\nPlease provide feedback: good, too vague, not helpful."))
+            logger.info(f"Adjusted answer: {adjusted_answer}")
+    return state['chat_history'], state
+# Function to return the up-to-date log file for download
+def get_log_file():
+    # Flush all log handlers to ensure log file is current
+    for handler in logger.handlers:
+        handler.flush()
+    # Ensure the log file exists; if not, create an empty one.
+    if not os.path.exists("support_bot_log.txt"):
+        with open("support_bot_log.txt", "w", encoding="utf-8") as f:
+            f.write("")
+    logger.info("Log file downloaded by user.")
+    return "support_bot_log.txt"
 # Initial state
 initial_state = {
     'last_answer': None
 }
 # Gradio interface
 with gr.Blocks() as demo:
     state = gr.State(initial_state)
+    with gr.Row():
+        file_upload = gr.File(label="Upload PDF or TXT file")
+        download_btn = gr.Button("Download Log")
+        download_file = gr.File(label="Log File", interactive=False)
     chat = gr.Chatbot()
     user_input = gr.Textbox(label="Your query or feedback")
     submit_btn = gr.Button("Submit")
     # Process file upload
+    file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
     # Handle user input and clear the textbox
+    submit_btn.click(handle_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
+    # Set up download log button
+    download_btn.click(fn=get_log_file, inputs=[], outputs=download_file)
+demo.launch(share=True)