Shriharsh commited on
Commit
e367093
·
verified ·
1 Parent(s): c428223

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -187
app.py CHANGED
@@ -1,204 +1,164 @@
1
  import logging
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  from sentence_transformers import SentenceTransformer, util
5
  import PyPDF2
6
- import os
7
 
8
- # Set up logging with a dedicated file handler
9
- logger = logging.getLogger('SupportBot')
10
- logger.setLevel(logging.INFO)
11
- if logger.handlers:
12
- logger.handlers.clear()
13
-
14
- # Define log file path in a writable directory (/tmp)
15
- log_file_path = '/tmp/support_bot_log.txt'
16
-
17
- # Create a file handler with append mode
18
- file_handler = logging.FileHandler(log_file_path, mode='a')
19
- file_handler.setLevel(logging.INFO)
20
- formatter = logging.Formatter('%(asctime)s - %(message)s')
21
- file_handler.setFormatter(formatter)
22
- logger.addHandler(file_handler)
23
-
24
- # Add a stream handler to output logs to the console as well
25
- stream_handler = logging.StreamHandler()
26
- stream_handler.setLevel(logging.INFO)
27
- stream_handler.setFormatter(formatter)
28
- logger.addHandler(stream_handler)
29
-
30
- # Load models
31
- qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
32
- embedder = SentenceTransformer('all-MiniLM-L6-v2')
33
-
34
- # Helper function to extract text from a PDF
35
- def extract_text_from_pdf(file_path):
36
- text = ""
37
- with open(file_path, "rb") as file:
38
- pdf_reader = PyPDF2.PdfReader(file)
39
- for page in pdf_reader.pages:
40
- extracted_text = page.extract_text()
41
- if extracted_text:
42
- text += extracted_text + "\n"
43
- return text
44
-
45
- # Find the most relevant section in the document
46
- def find_relevant_section(query, sections, section_embeddings):
47
- stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
48
-
49
- logger.info(f"Searching for relevant section for query: {query}")
50
- query_embedding = embedder.encode(query, convert_to_tensor=True)
51
- similarities = util.cos_sim(query_embedding, section_embeddings)[0]
52
- best_idx = similarities.argmax().item()
53
- best_section = sections[best_idx]
54
- similarity_score = similarities[best_idx].item()
55
-
56
- SIMILARITY_THRESHOLD = 0.4
57
- if similarity_score >= SIMILARITY_THRESHOLD:
58
- logger.info(f"Found relevant section using embeddings (score: {similarity_score})")
59
- file_handler.flush() # Ensure log is written immediately
60
- return best_section
61
-
62
- logger.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
63
- query_words = {word for word in query.lower().split() if word not in stopwords}
64
- for section in sections:
65
- section_words = {word for word in section.lower().split() if word not in stopwords}
66
- common_words = query_words.intersection(section_words)
67
- if len(common_words) >= 2:
68
- logger.info(f"Keyword match found with common words: {common_words}")
69
- file_handler.flush()
70
- return section
71
-
72
- logger.info("No good match found. Returning default response.")
73
- file_handler.flush()
74
- return "I don’t have enough information to answer that."
75
-
76
- # Process the uploaded file
77
- def process_file(file, state):
78
- logger.info("Received file upload request")
79
- if file is None:
80
- logger.info("No file uploaded")
81
- file_handler.flush()
82
- return [("Bot", "Please upload a file.")], state
83
-
84
- # Save the uploaded file to a temporary location
85
- file_path = file.name
86
- temp_file_path = os.path.join("/tmp", os.path.basename(file_path))
87
- with open(temp_file_path, "wb") as f:
88
- # Check if the file has a 'read' method; if not, assume it's already the content.
89
- if hasattr(file, "read"):
90
- content = file.read()
91
  else:
92
- content = file
93
- if isinstance(content, str):
94
- content = content.encode("utf-8")
95
- f.write(content)
96
-
97
- if temp_file_path.lower().endswith(".pdf"):
98
- logger.info(f"Processing PDF file: {temp_file_path}")
99
- text = extract_text_from_pdf(temp_file_path)
100
- elif temp_file_path.lower().endswith(".txt"):
101
- logger.info(f"Processing TXT file: {temp_file_path}")
102
- with open(temp_file_path, 'r', encoding='utf-8') as f:
103
- text = f.read()
104
- else:
105
- logger.error(f"Unsupported file format: {temp_file_path}")
106
- file_handler.flush()
107
- return [("Bot", "Unsupported file format. Please upload a PDF or TXT file.")], state
108
-
109
- sections = text.split('\n\n')
110
- section_embeddings = embedder.encode(sections, convert_to_tensor=True)
111
- state['document_text'] = text
112
- state['sections'] = sections
113
- state['section_embeddings'] = section_embeddings
114
- state['current_query'] = None
115
- state['feedback_count'] = 0
116
- state['mode'] = 'waiting_for_query'
117
- state['chat_history'] = [("Bot", "File processed. You can now ask questions.")]
118
- logger.info(f"File processed successfully: {temp_file_path}")
119
- file_handler.flush()
120
- return state['chat_history'], state
121
-
122
- # Handle user input (queries and feedback)
123
- def handle_input(user_input, state):
124
- if state['mode'] == 'waiting_for_upload':
125
- logger.info("User input received before file upload")
126
- state['chat_history'].append(("Bot", "Please upload a file first."))
127
- file_handler.flush()
128
- elif state['mode'] == 'waiting_for_query':
129
- query = user_input
130
- logger.info(f"User query: {query}")
131
- state['current_query'] = query
132
- state['feedback_count'] = 0
133
- context = find_relevant_section(query, state['sections'], state['section_embeddings'])
134
- if context == "I don’t have enough information to answer that.":
135
- answer = context
136
  else:
137
- result = qa_model(question=query, context=context)
138
  answer = result["answer"]
139
- state['last_answer'] = answer
140
- state['mode'] = 'waiting_for_feedback'
141
- state['chat_history'].append(("User", query))
142
- state['chat_history'].append(("Bot", f"Answer: {answer}\nPlease provide feedback: good, too vague, not helpful."))
143
- logger.info(f"Generated answer: {answer}")
144
- file_handler.flush()
145
- elif state['mode'] == 'waiting_for_feedback':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  feedback = user_input.lower()
147
- logger.info(f"User feedback: {feedback}")
148
- state['chat_history'].append(("User", feedback))
149
- if feedback == "good" or state['feedback_count'] >= 2:
150
- state['mode'] = 'waiting_for_query'
151
- if feedback == "good":
152
- state['chat_history'].append(("Bot", "Thank you for your feedback. You can ask another question."))
153
- logger.info("Feedback 'good' received. Ready for next query.")
154
- else:
155
- state['chat_history'].append(("Bot", "Maximum feedback iterations reached. You can ask another question."))
156
- logger.info("Max feedback iterations (2) reached. Ready for next query.")
157
- file_handler.flush()
158
  else:
159
- query = state['current_query']
160
- context = find_relevant_section(query, state['sections'], state['section_embeddings'])
161
- if feedback == "too vague":
162
- adjusted_answer = f"{state['last_answer']}\n\n(More details:\n{context[:500]}...)"
163
- logger.info("Feedback 'too vague'. Providing context.")
164
- elif feedback == "not helpful":
165
- adjusted_answer = qa_model(question=query + " Please provide more detailed information with examples.", context=context)['answer']
166
- logger.info("Feedback 'not helpful'. Re-searching with modified query.")
167
- else:
168
- state['chat_history'].append(("Bot", "Please provide valid feedback: good, too vague, not helpful."))
169
- logger.info(f"Invalid feedback received: {feedback}")
170
- file_handler.flush()
171
- return state['chat_history'], state
172
- state['last_answer'] = adjusted_answer
173
- state['feedback_count'] += 1
174
- state['chat_history'].append(("Bot", f"Updated answer: {adjusted_answer}\nPlease provide feedback: good, too vague, not helpful."))
175
- logger.info(f"Updated answer: {adjusted_answer}")
176
- file_handler.flush()
177
- return state['chat_history'], state
178
-
179
- # Initial state
180
- initial_state = {
181
- 'document_text': None,
182
- 'sections': None,
183
- 'section_embeddings': None,
184
- 'current_query': None,
185
- 'feedback_count': 0,
186
- 'mode': 'waiting_for_upload',
187
- 'chat_history': [("Bot", "Please upload a PDF or TXT file to start.")],
188
- 'last_answer': None
189
- }
190
-
191
- # Gradio interface
192
  with gr.Blocks() as demo:
193
- state = gr.State(initial_state)
194
- file_upload = gr.File(label="Upload PDF or TXT file")
 
195
  chat = gr.Chatbot()
196
- user_input = gr.Textbox(label="Your query or feedback")
197
  submit_btn = gr.Button("Submit")
198
- # Point the log file download to the writable log file path
199
- log_file = gr.File(label="Download Log File", value=log_file_path)
200
 
201
  file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
202
- submit_btn.click(handle_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
203
 
204
- demo.launch(share=True)
 
1
  import logging
2
+ import os
3
  import gradio as gr
4
  from transformers import pipeline
5
  from sentence_transformers import SentenceTransformer, util
6
  import PyPDF2
 
7
 
8
+ # Set up logging: we write logs to /tmp so that it's writable on Spaces.
9
+ log_file_path = "/tmp/support_bot_log.txt"
10
+ logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(message)s')
11
+
12
+ class SupportBotAgent:
13
+ def __init__(self, document_path):
14
+ # Load a pre-trained question-answering model
15
+ self.qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
16
+ # Set up an embedding model for finding relevant sections
17
+ self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
18
+ # Load the document text and split it into sections (by paragraphs)
19
+ self.document_text = self.load_document(document_path)
20
+ self.sections = self.document_text.split('\n\n')
21
+ self.section_embeddings = self.embedder.encode(self.sections, convert_to_tensor=True)
22
+ logging.info(f"Loaded document: {document_path}")
23
+
24
+ def load_document(self, path):
25
+ """Loads and extracts text from a TXT or PDF file."""
26
+ if path.lower().endswith(".txt"):
27
+ file_type = "Text File"
28
+ with open(path, 'r', encoding='utf-8') as file:
29
+ text = file.read()
30
+ elif path.lower().endswith(".pdf"):
31
+ file_type = "PDF File"
32
+ text = ""
33
+ with open(path, "rb") as file:
34
+ pdf_reader = PyPDF2.PdfReader(file)
35
+ for page in pdf_reader.pages:
36
+ page_text = page.extract_text()
37
+ if page_text:
38
+ text += page_text + "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  else:
40
+ file_type = "Unsupported Format"
41
+ logging.error(f"Unsupported file format: {path}")
42
+ raise ValueError("Unsupported file format. Please provide a TXT or PDF file.")
43
+ logging.info(f"Loaded {file_type}: {path}")
44
+ return text
45
+
46
+ def find_relevant_section(self, query):
47
+ """
48
+ First uses semantic similarity. If similarity is too low, falls back to a keyword search.
49
+ """
50
+ stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
51
+ query_embedding = self.embedder.encode(query, convert_to_tensor=True)
52
+ similarities = util.cos_sim(query_embedding, self.section_embeddings)[0]
53
+ best_idx = similarities.argmax().item()
54
+ best_section = self.sections[best_idx]
55
+ similarity_score = similarities[best_idx].item()
56
+ SIMILARITY_THRESHOLD = 0.4
57
+
58
+ if similarity_score >= SIMILARITY_THRESHOLD:
59
+ logging.info(f"Found relevant section using embeddings for query: {query}")
60
+ return best_section
61
+
62
+ logging.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
63
+ query_words = {word for word in query.lower().split() if word not in stopwords}
64
+ for section in self.sections:
65
+ section_words = {word for word in section.lower().split() if word not in stopwords}
66
+ common_words = query_words.intersection(section_words)
67
+ if len(common_words) >= 2:
68
+ logging.info(f"Keyword match found for query: {query} with common words: {common_words}")
69
+ return section
70
+
71
+ logging.info(f"No good keyword match found. Returning default fallback response.")
72
+ return "I don’t have enough information to answer that."
73
+
74
+ def answer_query(self, query):
75
+ context = self.find_relevant_section(query)
76
+ if not context:
77
+ answer = "I don’t have enough information to answer that."
 
 
 
 
 
 
78
  else:
79
+ result = self.qa_model(question=query, context=context, max_answer_len=50)
80
  answer = result["answer"]
81
+ logging.info(f"Answer for query '{query}': {answer}")
82
+ return answer
83
+
84
+ def adjust_response(self, query, response, feedback):
85
+ """Modify the response based on user feedback."""
86
+ if feedback == "too vague":
87
+ context = self.find_relevant_section(query)
88
+ adjusted_response = f"{response}\n\n(More details:\n{context[:500]}...)"
89
+ elif feedback == "not helpful":
90
+ adjusted_response = self.answer_query(query + " Please provide more detailed information with examples.")
91
+ else:
92
+ adjusted_response = response
93
+ logging.info(f"Adjusted answer for query '{query}': {adjusted_response}")
94
+ return adjusted_response
95
+
96
+ # --- Gradio Functions and App Workflow ---
97
+
98
+ def process_file(file, state):
99
+ """Handles the file upload and initializes the SupportBotAgent."""
100
+ if file is None:
101
+ logging.info("No file uploaded")
102
+ return [("Bot", "Please upload a TXT or PDF file.")], state
103
+ # Save the uploaded file to /tmp
104
+ temp_path = os.path.join("/tmp", file.name)
105
+ with open(temp_path, "wb") as f:
106
+ f.write(file.read())
107
+ try:
108
+ state["agent"] = SupportBotAgent(temp_path)
109
+ except Exception as e:
110
+ return [("Bot", f"Error processing file: {str(e)}")], state
111
+ state["chat_history"] = [("Bot", "File loaded successfully. Enter your query (or type 'exit' to end):")]
112
+ state["mode"] = "query"
113
+ state["last_query"] = ""
114
+ state["last_answer"] = ""
115
+ state["feedback_count"] = 0
116
+ return state["chat_history"], state
117
+
118
+ def process_input(user_input, state):
119
+ """
120
+ Processes user input as either a query or feedback based on the current mode.
121
+ Typing 'exit' stops the session.
122
+ """
123
+ if state.get("mode", "query") == "ended":
124
+ return state["chat_history"], state
125
+ if user_input.lower() == "exit":
126
+ state["chat_history"].append(("Bot", "Session ended. You may now download the log file."))
127
+ state["mode"] = "ended"
128
+ return state["chat_history"], state
129
+ if state["mode"] == "query":
130
+ state["last_query"] = user_input
131
+ answer = state["agent"].answer_query(user_input)
132
+ state["last_answer"] = answer
133
+ state["feedback_count"] = 0
134
+ state["chat_history"].append(("User", user_input))
135
+ state["chat_history"].append(("Bot", f"Answer: {answer}\nPlease provide feedback (good, too vague, not helpful):"))
136
+ state["mode"] = "feedback"
137
+ elif state["mode"] == "feedback":
138
  feedback = user_input.lower()
139
+ state["chat_history"].append(("User", feedback))
140
+ if feedback == "good" or state["feedback_count"] >= 1:
141
+ state["chat_history"].append(("Bot", "Thank you for your feedback. Enter your next query (or type 'exit' to end):"))
142
+ state["mode"] = "query"
 
 
 
 
 
 
 
143
  else:
144
+ new_answer = state["agent"].adjust_response(state["last_query"], state["last_answer"], feedback)
145
+ state["last_answer"] = new_answer
146
+ state["feedback_count"] += 1
147
+ state["chat_history"].append(("Bot", f"Updated Answer: {new_answer}\nPlease provide feedback (good, too vague, not helpful):"))
148
+ return state["chat_history"], state
149
+
150
+ # --- Gradio UI Setup ---
151
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  with gr.Blocks() as demo:
153
+ state = gr.State({"mode": "idle"})
154
+ gr.Markdown("## Customer Support Bot with Document Training")
155
+ file_upload = gr.File(label="Upload TXT or PDF file")
156
  chat = gr.Chatbot()
157
+ user_input = gr.Textbox(label="Enter your query or feedback")
158
  submit_btn = gr.Button("Submit")
159
+ log_file = gr.File(label="Download Log File", file_count="single", interactive=False, value=log_file_path)
 
160
 
161
  file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
162
+ submit_btn.click(process_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
163
 
164
+ demo.launch(share=True)