Shriharsh commited on
Commit
033375f
·
verified ·
1 Parent(s): 7dbadbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -186
app.py CHANGED
@@ -1,203 +1,165 @@
1
  import logging
2
- import os
3
  import gradio as gr
4
  from transformers import pipeline
5
  from sentence_transformers import SentenceTransformer, util
6
  import PyPDF2
7
 
8
- # Set up logging: write logs to a writable directory (/tmp)
9
- log_file_path = "/tmp/support_bot_log.txt"
10
- logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(message)s')
11
-
12
- def flush_logs():
13
- for handler in logging.getLogger().handlers:
14
- handler.flush()
15
-
16
- class SupportBotAgent:
17
- def __init__(self, document_path):
18
- # Load a pre-trained question-answering model
19
- self.qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
20
- # Set up an embedding model for finding relevant sections
21
- self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
22
- # Load the document text and split it into sections (by paragraphs)
23
- self.document_text = self.load_document(document_path)
24
- self.sections = self.document_text.split('\n\n')
25
- flush_logs()
26
- # Log document length for debugging
27
- logging.info(f"Document length: {len(self.document_text)} characters")
28
- flush_logs()
29
- # Create embeddings for all sections
30
- self.section_embeddings = self.embedder.encode(self.sections, convert_to_tensor=True)
31
- logging.info(f"Loaded document: {document_path}")
32
- flush_logs()
33
-
34
- def load_document(self, path):
35
- """Loads and extracts text from a TXT or PDF file."""
36
- if path.lower().endswith(".txt"):
37
- file_type = "Text File"
38
- with open(path, 'r', encoding='utf-8') as file:
39
- text = file.read()
40
- elif path.lower().endswith(".pdf"):
41
- file_type = "PDF File"
42
- text = ""
43
- with open(path, "rb") as file:
44
- pdf_reader = PyPDF2.PdfReader(file)
45
- for page in pdf_reader.pages:
46
- page_text = page.extract_text()
47
- if page_text:
48
- text += page_text + "\n"
49
- else:
50
- file_type = "Unsupported Format"
51
- logging.error(f"Unsupported file format: {path}")
52
- flush_logs()
53
- raise ValueError("Unsupported file format. Please provide a TXT or PDF file.")
54
- logging.info(f"Loaded {file_type}: {path}")
55
- flush_logs()
56
- return text
57
-
58
- def find_relevant_section(self, query):
59
- """
60
- Uses semantic similarity first, falling back to keyword search if needed.
61
- """
62
- stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
63
- query_embedding = self.embedder.encode(query, convert_to_tensor=True)
64
- similarities = util.cos_sim(query_embedding, self.section_embeddings)[0]
65
- best_idx = similarities.argmax().item()
66
- best_section = self.sections[best_idx]
67
- similarity_score = similarities[best_idx].item()
68
- SIMILARITY_THRESHOLD = 0.4 # Adjust if needed
69
-
70
- if similarity_score >= SIMILARITY_THRESHOLD:
71
- logging.info(f"Found relevant section using embeddings for query: {query} (score: {similarity_score})")
72
- flush_logs()
73
- return best_section
74
-
75
- logging.info(f"Low similarity ({similarity_score}) for query: {query}. Falling back to keyword search.")
76
- flush_logs()
77
- query_words = {word for word in query.lower().split() if word not in stopwords}
78
- for section in self.sections:
79
- section_words = {word for word in section.lower().split() if word not in stopwords}
80
- common_words = query_words.intersection(section_words)
81
- if len(common_words) >= 2:
82
- logging.info(f"Keyword match for query: {query} with common words: {common_words}")
83
- flush_logs()
84
- return section
85
-
86
- logging.info("No good keyword match found. Returning default response.")
87
- flush_logs()
88
- return "I don’t have enough information to answer that."
89
-
90
- def answer_query(self, query):
91
- context = self.find_relevant_section(query)
92
- if not context:
93
- answer = "I don’t have enough information to answer that."
94
- else:
95
- result = self.qa_model(question=query, context=context, max_answer_len=50)
96
- answer = result["answer"]
97
- logging.info(f"Answer for query '{query}': {answer}")
98
- flush_logs()
99
- return answer
100
-
101
- def adjust_response(self, query, response, feedback):
102
- """Adjusts the response based on feedback."""
103
- if feedback == "too vague":
104
- context = self.find_relevant_section(query)
105
- adjusted_response = f"{response}\n\n(More details:\n{context[:500]}...)"
106
- elif feedback == "not helpful":
107
- adjusted_response = self.answer_query(query + " Please provide more detailed information with examples.")
108
- else:
109
- adjusted_response = response
110
- logging.info(f"Adjusted answer for query '{query}': {adjusted_response}")
111
- flush_logs()
112
- return adjusted_response
113
-
114
- # --- Gradio Functions and App Workflow ---
115
-
116
  def process_file(file, state):
117
- """Handles file upload and initializes the SupportBotAgent."""
118
- logging.info("Received file upload request")
119
- flush_logs()
120
  if file is None:
121
- logging.info("No file uploaded")
122
- flush_logs()
123
- return [("Bot", "Please upload a TXT or PDF file.")], state
124
-
125
- # Save the uploaded file to /tmp. Handle both file objects and NamedString.
126
- temp_path = os.path.join("/tmp", file.name)
127
- with open(temp_path, "wb") as f:
128
- if hasattr(file, "read"):
129
- content = file.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  else:
131
- content = file
132
- if isinstance(content, str):
133
- content = content.encode("utf-8")
134
- f.write(content)
135
- logging.info(f"Saved uploaded file to {temp_path} (size: {os.path.getsize(temp_path)} bytes)")
136
- flush_logs()
137
-
138
- try:
139
- state["agent"] = SupportBotAgent(temp_path)
140
- except Exception as e:
141
- logging.error(f"Error processing file: {str(e)}")
142
- flush_logs()
143
- return [("Bot", f"Error processing file: {str(e)}")], state
144
-
145
- state["chat_history"] = [("Bot", "File loaded successfully. Enter your query (or type 'exit' to end):")]
146
- state["mode"] = "query"
147
- state["last_query"] = ""
148
- state["last_answer"] = ""
149
- state["feedback_count"] = 0
150
- return state["chat_history"], state
151
-
152
- def process_input(user_input, state):
153
- """
154
- Processes user input: as a query or feedback.
155
- Typing 'exit' stops the session.
156
- """
157
- if state.get("mode", "query") == "ended":
158
- return state["chat_history"], state
159
-
160
- if user_input.lower() == "exit":
161
- state["chat_history"].append(("Bot", "Session ended. You may now download the log file."))
162
- state["mode"] = "ended"
163
- flush_logs()
164
- return state["chat_history"], state
165
-
166
- if state["mode"] == "query":
167
- state["last_query"] = user_input
168
- answer = state["agent"].answer_query(user_input)
169
- state["last_answer"] = answer
170
- state["feedback_count"] = 0
171
- state["chat_history"].append(("User", user_input))
172
- state["chat_history"].append(("Bot", f"Answer: {answer}\nPlease provide feedback (good, too vague, not helpful):"))
173
- state["mode"] = "feedback"
174
- elif state["mode"] == "feedback":
175
  feedback = user_input.lower()
176
- state["chat_history"].append(("User", feedback))
177
- if feedback == "good" or state["feedback_count"] >= 1:
178
- state["chat_history"].append(("Bot", "Thank you for your feedback. Enter your next query (or type 'exit' to end):"))
179
- state["mode"] = "query"
 
 
 
 
 
 
180
  else:
181
- new_answer = state["agent"].adjust_response(state["last_query"], state["last_answer"], feedback)
182
- state["last_answer"] = new_answer
183
- state["feedback_count"] += 1
184
- state["chat_history"].append(("Bot", f"Updated Answer: {new_answer}\nPlease provide feedback (good, too vague, not helpful):"))
185
- flush_logs()
186
- return state["chat_history"], state
187
-
188
- # --- Gradio UI Setup ---
189
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  with gr.Blocks() as demo:
191
- state = gr.State({"mode": "idle"})
192
- gr.Markdown("## Customer Support Bot with Document Training")
193
- file_upload = gr.File(label="Upload TXT or PDF file")
194
  chat = gr.Chatbot()
195
- user_input = gr.Textbox(label="Enter your query or feedback")
196
  submit_btn = gr.Button("Submit")
197
- # Provide a file component to download the log file
198
- log_file = gr.File(label="Download Log File", file_count="single", interactive=False, value=log_file_path)
199
 
 
200
  file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
201
- submit_btn.click(process_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
202
 
203
- demo.launch(share=True)
 
 
 
 
1
  import logging
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  from sentence_transformers import SentenceTransformer, util
5
  import PyPDF2
6
 
7
+ # Set up logging with immediate writing
8
+ logging.basicConfig(
9
+ filename='support_bot_log.txt',
10
+ level=logging.INFO,
11
+ format='%(asctime)s - %(message)s',
12
+ force=True # Ensures any existing handlers are replaced and logging starts fresh
13
+ )
14
+ logger = logging.getLogger()
15
+
16
+ # Load models
17
+ qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
18
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
19
+
20
+ # Helper function to extract text from PDF
21
+ def extract_text_from_pdf(file_path):
22
+ text = ""
23
+ with open(file_path, "rb") as file:
24
+ pdf_reader = PyPDF2.PdfReader(file)
25
+ for page in pdf_reader.pages:
26
+ text += page.extract_text() + "\n"
27
+ return text
28
+
29
+ # Find the most relevant section in the document
30
+ def find_relevant_section(query, sections, section_embeddings):
31
+ stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
32
+
33
+ # Semantic search
34
+ query_embedding = embedder.encode(query, convert_to_tensor=True)
35
+ similarities = util.cos_sim(query_embedding, section_embeddings)[0]
36
+ best_idx = similarities.argmax().item()
37
+ best_section = sections[best_idx]
38
+ similarity_score = similarities[best_idx].item()
39
+
40
+ SIMILARITY_THRESHOLD = 0.4
41
+ if similarity_score >= SIMILARITY_THRESHOLD:
42
+ logger.info(f"Found relevant section using embeddings for query: {query}")
43
+ return best_section
44
+
45
+ logger.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
46
+
47
+ # Keyword-based fallback search with stopword filtering
48
+ query_words = {word for word in query.lower().split() if word not in stopwords}
49
+ for section in sections:
50
+ section_words = {word for word in section.lower().split() if word not in stopwords}
51
+ common_words = query_words.intersection(section_words)
52
+ if len(common_words) >= 2:
53
+ logger.info(f"Keyword match found for query: {query} with common words: {common_words}")
54
+ return section
55
+
56
+ logger.info(f"No good keyword match found. Returning default fallback response.")
57
+ return "I don’t have enough information to answer that."
58
+
59
+ # Process the uploaded file with detailed logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  def process_file(file, state):
 
 
 
61
  if file is None:
62
+ logger.info("No file uploaded.")
63
+ return [("Bot", "Please upload a file.")], state
64
+
65
+ file_path = file.name
66
+ if file_path.lower().endswith(".pdf"):
67
+ logger.info(f"Uploaded PDF file: {file_path}")
68
+ text = extract_text_from_pdf(file_path)
69
+ elif file_path.lower().endswith(".txt"):
70
+ logger.info(f"Uploaded TXT file: {file_path}")
71
+ with open(file_path, 'r', encoding='utf-8') as f:
72
+ text = f.read()
73
+ else:
74
+ logger.error(f"Unsupported file format: {file_path}")
75
+ return [("Bot", "Unsupported file format. Please upload a PDF or TXT file.")], state
76
+
77
+ sections = text.split('\n\n')
78
+ section_embeddings = embedder.encode(sections, convert_to_tensor=True)
79
+ state['document_text'] = text
80
+ state['sections'] = sections
81
+ state['section_embeddings'] = section_embeddings
82
+ state['current_query'] = None
83
+ state['feedback_count'] = 0
84
+ state['mode'] = 'waiting_for_query'
85
+ state['chat_history'] = [("Bot", "File processed. You can now ask questions.")]
86
+ logger.info(f"Processed file: {file_path}")
87
+ return state['chat_history'], state
88
+
89
+ # Handle user input (queries and feedback)
90
+ def handle_input(user_input, state):
91
+ if state['mode'] == 'waiting_for_upload':
92
+ state['chat_history'].append(("Bot", "Please upload a file first."))
93
+ logger.info("User attempted to interact without uploading a file.")
94
+ elif state['mode'] == 'waiting_for_query':
95
+ query = user_input
96
+ state['current_query'] = query
97
+ state['feedback_count'] = 0
98
+ context = find_relevant_section(query, state['sections'], state['section_embeddings'])
99
+ if context == "I don’t have enough information to answer that.":
100
+ answer = context
101
  else:
102
+ result = qa_model(question=query, context=context)
103
+ answer = result["answer"]
104
+ state['last_answer'] = answer
105
+ state['mode'] = 'waiting_for_feedback'
106
+ state['chat_history'].append(("User", query))
107
+ state['chat_history'].append(("Bot", f"Answer: {answer}\nPlease provide feedback: good, too vague, not helpful."))
108
+ logger.info(f"Query: {query}, Answer: {answer}")
109
+ elif state['mode'] == 'waiting_for_feedback':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  feedback = user_input.lower()
111
+ state['chat_history'].append(("User", feedback))
112
+ logger.info(f"Feedback: {feedback}")
113
+ if feedback == "good" or state['feedback_count'] >= 2:
114
+ state['mode'] = 'waiting_for_query'
115
+ if feedback == "good":
116
+ state['chat_history'].append(("Bot", "Thank you for your feedback. You can ask another question."))
117
+ logger.info("Feedback accepted as 'good'. Waiting for next query.")
118
+ else:
119
+ state['chat_history'].append(("Bot", "Maximum feedback iterations reached. You can ask another question."))
120
+ logger.info("Max feedback iterations reached. Waiting for next query.")
121
  else:
122
+ query = state['current_query']
123
+ context = find_relevant_section(query, state['sections'], state['section_embeddings'])
124
+ if feedback == "too vague":
125
+ adjusted_answer = f"{state['last_answer']}\n\n(More details:\n{context[:500]}...)"
126
+ elif feedback == "not helpful":
127
+ adjusted_answer = qa_model(question=query + " Please provide more detailed information with examples.", context=context)['answer']
128
+ else:
129
+ state['chat_history'].append(("Bot", "Please provide valid feedback: good, too vague, not helpful."))
130
+ logger.info(f"Invalid feedback received: {feedback}")
131
+ return state['chat_history'], state
132
+ state['last_answer'] = adjusted_answer
133
+ state['feedback_count'] += 1
134
+ state['chat_history'].append(("Bot", f"Updated answer: {adjusted_answer}\nPlease provide feedback: good, too vague, not helpful."))
135
+ logger.info(f"Adjusted answer: {adjusted_answer}")
136
+ return state['chat_history'], state
137
+
138
+ # Initial state
139
+ initial_state = {
140
+ 'document_text': None,
141
+ 'sections': None,
142
+ 'section_embeddings': None,
143
+ 'current_query': None,
144
+ 'feedback_count': 0,
145
+ 'mode': 'waiting_for_upload',
146
+ 'chat_history': [("Bot", "Please upload a PDF or TXT file to start.")],
147
+ 'last_answer': None
148
+ }
149
+
150
+ # Gradio interface
151
  with gr.Blocks() as demo:
152
+ state = gr.State(initial_state)
153
+ file_upload = gr.File(label="Upload PDF or TXT file")
 
154
  chat = gr.Chatbot()
155
+ user_input = gr.Textbox(label="Your query or feedback")
156
  submit_btn = gr.Button("Submit")
157
+ log_file = gr.File(label="Download Log File", value="support_bot_log.txt") # Added for log download
 
158
 
159
+ # Process file upload
160
  file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
 
161
 
162
+ # Handle user input and clear the textbox
163
+ submit_btn.click(handle_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
164
+
165
+ demo.launch(share=True)