File size: 8,017 Bytes
e764d84 e367093 e764d84 2a28b9c e367093 c428223 e367093 2a28b9c e367093 2a28b9c e367093 2a28b9c e367093 e764d84 e367093 e764d84 e367093 2a28b9c e367093 2a28b9c e367093 2a28b9c e367093 2a28b9c e367093 2a28b9c e367093 2a28b9c e367093 2a28b9c e367093 2a28b9c e367093 e764d84 e367093 e764d84 e367093 e764d84 e367093 e764d84 e367093 e764d84 2a28b9c e367093 e764d84 e367093 e764d84 2a28b9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import logging
import os
import gradio as gr
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import PyPDF2
# Set up logging: write logs to a writable directory (/tmp)
log_file_path = "/tmp/support_bot_log.txt"
logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(message)s')
class SupportBotAgent:
def __init__(self, document_path):
# Load a pre-trained question-answering model
self.qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
# Set up an embedding model for finding relevant sections
self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
# Load the document text and split it into sections (by paragraphs)
self.document_text = self.load_document(document_path)
self.sections = self.document_text.split('\n\n')
self.section_embeddings = self.embedder.encode(self.sections, convert_to_tensor=True)
logging.info(f"Loaded document: {document_path}")
def load_document(self, path):
"""Loads and extracts text from a TXT or PDF file."""
if path.lower().endswith(".txt"):
file_type = "Text File"
with open(path, 'r', encoding='utf-8') as file:
text = file.read()
elif path.lower().endswith(".pdf"):
file_type = "PDF File"
text = ""
with open(path, "rb") as file:
pdf_reader = PyPDF2.PdfReader(file)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
else:
file_type = "Unsupported Format"
logging.error(f"Unsupported file format: {path}")
raise ValueError("Unsupported file format. Please provide a TXT or PDF file.")
logging.info(f"Loaded {file_type}: {path}")
return text
def find_relevant_section(self, query):
"""
Uses semantic similarity first, falling back to keyword search if needed.
"""
stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
query_embedding = self.embedder.encode(query, convert_to_tensor=True)
similarities = util.cos_sim(query_embedding, self.section_embeddings)[0]
best_idx = similarities.argmax().item()
best_section = self.sections[best_idx]
similarity_score = similarities[best_idx].item()
SIMILARITY_THRESHOLD = 0.4
if similarity_score >= SIMILARITY_THRESHOLD:
logging.info(f"Found relevant section using embeddings for query: {query}")
return best_section
logging.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
query_words = {word for word in query.lower().split() if word not in stopwords}
for section in self.sections:
section_words = {word for word in section.lower().split() if word not in stopwords}
common_words = query_words.intersection(section_words)
if len(common_words) >= 2:
logging.info(f"Keyword match for query: {query} with common words: {common_words}")
return section
logging.info("No good keyword match found. Returning default response.")
return "I don’t have enough information to answer that."
def answer_query(self, query):
context = self.find_relevant_section(query)
if not context:
answer = "I don’t have enough information to answer that."
else:
result = self.qa_model(question=query, context=context, max_answer_len=50)
answer = result["answer"]
logging.info(f"Answer for query '{query}': {answer}")
return answer
def adjust_response(self, query, response, feedback):
"""Adjusts the response based on feedback."""
if feedback == "too vague":
context = self.find_relevant_section(query)
adjusted_response = f"{response}\n\n(More details:\n{context[:500]}...)"
elif feedback == "not helpful":
adjusted_response = self.answer_query(query + " Please provide more detailed information with examples.")
else:
adjusted_response = response
logging.info(f"Adjusted answer for query '{query}': {adjusted_response}")
return adjusted_response
# --- Gradio Functions and App Workflow ---
def process_file(file, state):
"""Handles file upload and initializes the SupportBotAgent."""
logging.info("Received file upload request")
if file is None:
logging.info("No file uploaded")
return [("Bot", "Please upload a TXT or PDF file.")], state
# Save the uploaded file to /tmp. Handle both file objects and NamedString.
temp_path = os.path.join("/tmp", file.name)
with open(temp_path, "wb") as f:
if hasattr(file, "read"):
content = file.read()
else:
content = file
if isinstance(content, str):
content = content.encode("utf-8")
f.write(content)
try:
state["agent"] = SupportBotAgent(temp_path)
except Exception as e:
return [("Bot", f"Error processing file: {str(e)}")], state
state["chat_history"] = [("Bot", "File loaded successfully. Enter your query (or type 'exit' to end):")]
state["mode"] = "query"
state["last_query"] = ""
state["last_answer"] = ""
state["feedback_count"] = 0
return state["chat_history"], state
def process_input(user_input, state):
"""
Processes user input: as a query or feedback.
Typing 'exit' stops the session.
"""
if state.get("mode", "query") == "ended":
return state["chat_history"], state
if user_input.lower() == "exit":
state["chat_history"].append(("Bot", "Session ended. You may now download the log file."))
state["mode"] = "ended"
return state["chat_history"], state
if state["mode"] == "query":
state["last_query"] = user_input
answer = state["agent"].answer_query(user_input)
state["last_answer"] = answer
state["feedback_count"] = 0
state["chat_history"].append(("User", user_input))
state["chat_history"].append(("Bot", f"Answer: {answer}\nPlease provide feedback (good, too vague, not helpful):"))
state["mode"] = "feedback"
elif state["mode"] == "feedback":
feedback = user_input.lower()
state["chat_history"].append(("User", feedback))
if feedback == "good" or state["feedback_count"] >= 1:
state["chat_history"].append(("Bot", "Thank you for your feedback. Enter your next query (or type 'exit' to end):"))
state["mode"] = "query"
else:
new_answer = state["agent"].adjust_response(state["last_query"], state["last_answer"], feedback)
state["last_answer"] = new_answer
state["feedback_count"] += 1
state["chat_history"].append(("Bot", f"Updated Answer: {new_answer}\nPlease provide feedback (good, too vague, not helpful):"))
return state["chat_history"], state
# --- Gradio UI Setup ---
with gr.Blocks() as demo:
state = gr.State({"mode": "idle"})
gr.Markdown("## Customer Support Bot with Document Training")
file_upload = gr.File(label="Upload TXT or PDF file")
chat = gr.Chatbot()
user_input = gr.Textbox(label="Enter your query or feedback")
submit_btn = gr.Button("Submit")
# Provide a file component to download the log file
log_file = gr.File(label="Download Log File", file_count="single", interactive=False, value=log_file_path)
file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
submit_btn.click(process_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
demo.launch(share=True)
|