Shriharsh commited on
Commit
0a81317
·
verified ·
1 Parent(s): 5133aad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -91
app.py CHANGED
@@ -1,9 +1,18 @@
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  from sentence_transformers import SentenceTransformer, util
4
  import PyPDF2
5
- import datetime
6
- import os
 
 
 
 
 
 
 
7
 
8
  # Load models
9
  qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
@@ -19,7 +28,7 @@ def extract_text_from_pdf(file_path):
19
  return text
20
 
21
  # Find the most relevant section in the document
22
- def find_relevant_section(query, sections, section_embeddings, log_messages):
23
  stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
24
 
25
  # Semantic search
@@ -31,40 +40,40 @@ def find_relevant_section(query, sections, section_embeddings, log_messages):
31
 
32
  SIMILARITY_THRESHOLD = 0.4
33
  if similarity_score >= SIMILARITY_THRESHOLD:
34
- log_messages = log_message(f"Found relevant section using embeddings for query: {query}", log_messages)
35
- return best_section, log_messages
36
 
37
- log_messages = log_message(f"Low similarity ({similarity_score}). Falling back to keyword search.", log_messages)
38
 
39
  # Keyword-based fallback search with stopword filtering
40
- query_words = {word for word in query.lower().split() if word not in stopwords} # Corrected line
41
  for section in sections:
42
  section_words = {word for word in section.lower().split() if word not in stopwords}
43
  common_words = query_words.intersection(section_words)
44
  if len(common_words) >= 2:
45
- log_messages = log_message(f"Keyword match found for query: {query} with common words: {common_words}", log_messages)
46
- return section, log_messages
47
 
48
- log_messages = log_message(f"No good keyword match found. Returning default fallback response.", log_messages)
49
- return "I don’t have enough information to answer that.", log_messages
50
 
51
  # Process the uploaded file with detailed logging
52
- def process_file(file, state, log_messages):
53
  if file is None:
54
- log_messages = log_message("No file uploaded.", log_messages)
55
- return [("Bot", "Please upload a file.")], state, log_messages
56
 
57
  file_path = file.name
58
  if file_path.lower().endswith(".pdf"):
59
- log_messages = log_message(f"Uploaded PDF file: {file_path}", log_messages)
60
  text = extract_text_from_pdf(file_path)
61
  elif file_path.lower().endswith(".txt"):
62
- log_messages = log_message(f"Uploaded TXT file: {file_path}", log_messages)
63
  with open(file_path, 'r', encoding='utf-8') as f:
64
  text = f.read()
65
  else:
66
- log_messages = log_message(f"Unsupported file format: {file_path}", log_messages)
67
- return [("Bot", "Unsupported file format. Please upload a PDF or TXT file.")], state, log_messages
68
 
69
  sections = text.split('\n\n')
70
  section_embeddings = embedder.encode(sections, convert_to_tensor=True)
@@ -75,27 +84,19 @@ def process_file(file, state, log_messages):
75
  state['feedback_count'] = 0
76
  state['mode'] = 'waiting_for_query'
77
  state['chat_history'] = [("Bot", "File processed. You can now ask questions.")]
78
- log_messages = log_message(f"Processed file: {file_path}", log_messages)
79
- return state['chat_history'], state, log_messages
80
 
81
  # Handle user input (queries and feedback)
82
- def handle_input(user_input, state, log_messages):
83
  if state['mode'] == 'waiting_for_upload':
84
  state['chat_history'].append(("Bot", "Please upload a file first."))
85
- log_messages = log_message("User attempted to interact without uploading a file.", log_messages)
86
- return state['chat_history'], state, log_messages
87
  elif state['mode'] == 'waiting_for_query':
88
- if user_input.lower() == "exit":
89
- log_messages = log_message("User entered 'exit'. Ending session.", log_messages)
90
- state['mode'] = 'exited'
91
- state['chat_history'].append(("User", "exit"))
92
- state['chat_history'].append(("Bot", "Session ended. You can download the log file."))
93
- return state['chat_history'], state, log_messages
94
-
95
  query = user_input
96
  state['current_query'] = query
97
  state['feedback_count'] = 0
98
- context, log_messages = find_relevant_section(query, state['sections'], state['section_embeddings'], log_messages)
99
  if context == "I don’t have enough information to answer that.":
100
  answer = context
101
  else:
@@ -105,46 +106,47 @@ def handle_input(user_input, state, log_messages):
105
  state['mode'] = 'waiting_for_feedback'
106
  state['chat_history'].append(("User", query))
107
  state['chat_history'].append(("Bot", f"Answer: {answer}\nPlease provide feedback: good, too vague, not helpful."))
108
- # Log the query and initial answer here:
109
- log_messages = log_message(f"Query: {query}, Answer: {answer}", log_messages)
110
  elif state['mode'] == 'waiting_for_feedback':
111
- if user_input.lower() == "exit":
112
- log_messages = log_message("User entered 'exit'. Ending session.", log_messages)
113
- state['mode'] = 'exited'
114
- state['chat_history'].append(("User", "exit"))
115
- state['chat_history'].append(("Bot", "Session ended. You can download the log file."))
116
- return state['chat_history'], state, log_messages
117
-
118
  feedback = user_input.lower()
119
  state['chat_history'].append(("User", feedback))
120
- log_messages = log_message(f"Feedback: {feedback}", log_messages)
121
  if feedback == "good" or state['feedback_count'] >= 2:
122
  state['mode'] = 'waiting_for_query'
123
  if feedback == "good":
124
  state['chat_history'].append(("Bot", "Thank you for your feedback. You can ask another question."))
125
- log_messages = log_message("Feedback accepted as 'good'. Waiting for next query.", log_messages)
126
  else:
127
  state['chat_history'].append(("Bot", "Maximum feedback iterations reached. You can ask another question."))
128
- log_messages = log_message("Max feedback iterations reached. Waiting for next query.", log_messages)
129
  else:
130
  query = state['current_query']
131
- context, log_messages = find_relevant_section(query, state['sections'], state['section_embeddings'], log_messages)
132
  if feedback == "too vague":
133
  adjusted_answer = f"{state['last_answer']}\n\n(More details:\n{context[:500]}...)"
134
  elif feedback == "not helpful":
135
  adjusted_answer = qa_model(question=query + " Please provide more detailed information with examples.", context=context)['answer']
136
  else:
137
  state['chat_history'].append(("Bot", "Please provide valid feedback: good, too vague, not helpful."))
138
- log_messages = log_message(f"Invalid feedback received: {feedback}", log_messages)
139
- return state['chat_history'], state, log_messages
140
  state['last_answer'] = adjusted_answer
141
  state['feedback_count'] += 1
142
  state['chat_history'].append(("Bot", f"Updated answer: {adjusted_answer}\nPlease provide feedback: good, too vague, not helpful."))
143
- log_messages = log_message(f"Adjusted answer: {adjusted_answer}", log_messages)
144
- elif state['mode'] == 'exited':
145
- state['chat_history'].append(("Bot", "Session is over. Please download the log."))
146
- log_messages = log_message("User interacted after exiting.", log_messages)
147
- return state['chat_history'], state, log_messages
 
 
 
 
 
 
 
 
 
148
 
149
  # Initial state
150
  initial_state = {
@@ -158,55 +160,26 @@ initial_state = {
158
  'last_answer': None
159
  }
160
 
161
- # Initialize log_messages outside initial_state
162
- log_messages = []
163
-
164
- # Logging function to store messages in memory
165
- def log_message(message, log_messages):
166
- timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
167
- log_entry = f"{timestamp} - {message}"
168
- log_messages.append(log_entry)
169
- return log_messages
170
-
171
- # Function to save logs to file
172
- def save_logs_to_file(log_messages):
173
- with open("support_bot_log.txt", "w") as log_file:
174
- for log_message in log_messages:
175
- log_file.write(log_message + "\n")
176
-
177
  # Gradio interface
178
  with gr.Blocks() as demo:
179
  state = gr.State(initial_state)
180
- file_upload = gr.File(label="Upload PDF or TXT file")
 
 
 
 
 
181
  chat = gr.Chatbot()
182
  user_input = gr.Textbox(label="Your query or feedback")
183
  submit_btn = gr.Button("Submit")
184
- download_log_btn = gr.Button("Download Log File") # Changed to Button
185
- log_file = gr.File(label="Log File") # Keep File for serving
186
 
187
  # Process file upload
188
- file_upload.upload(process_file, inputs=[file_upload, state, gr.State(log_messages)], outputs=[chat, state, gr.State(log_messages)])
189
 
190
  # Handle user input and clear the textbox
191
- submit_btn.click(handle_input, inputs=[user_input, state, gr.State(log_messages)], outputs=[chat, state, gr.State(log_messages)]).then(lambda: "", None, user_input)
192
-
193
- # Update the log file just before download
194
-
195
-
196
- download_log_btn.click(
197
- lambda log_messages: "support_bot_log.txt",
198
- inputs=[gr.State(log_messages)],
199
- outputs=[log_file]
200
- )
201
 
202
- # Also save logs when user exits
203
- user_input.submit(
204
- lambda user_input, state, log_messages: (
205
- save_logs_to_file(log_messages) if user_input.lower() == "exit" else None,
206
- state
207
- ),
208
- [user_input, state, gr.State(log_messages)],
209
- [log_file, state]
210
- )
211
 
212
- demo.launch(share=True)
 
1
+ import os
2
+ import logging
3
  import gradio as gr
4
  from transformers import pipeline
5
  from sentence_transformers import SentenceTransformer, util
6
  import PyPDF2
7
+
8
+ # Set up logging with immediate writing
9
+ logging.basicConfig(
10
+ filename='support_bot_log.txt',
11
+ level=logging.INFO,
12
+ format='%(asctime)s - %(message)s',
13
+ force=True # Ensures any existing handlers are replaced and logging starts fresh
14
+ )
15
+ logger = logging.getLogger()
16
 
17
  # Load models
18
  qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
 
28
  return text
29
 
30
  # Find the most relevant section in the document
31
+ def find_relevant_section(query, sections, section_embeddings):
32
  stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
33
 
34
  # Semantic search
 
40
 
41
  SIMILARITY_THRESHOLD = 0.4
42
  if similarity_score >= SIMILARITY_THRESHOLD:
43
+ logger.info(f"Found relevant section using embeddings for query: {query}")
44
+ return best_section
45
 
46
+ logger.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
47
 
48
  # Keyword-based fallback search with stopword filtering
49
+ query_words = {word for word in query.lower().split() if word not in stopwords}
50
  for section in sections:
51
  section_words = {word for word in section.lower().split() if word not in stopwords}
52
  common_words = query_words.intersection(section_words)
53
  if len(common_words) >= 2:
54
+ logger.info(f"Keyword match found for query: {query} with common words: {common_words}")
55
+ return section
56
 
57
+ logger.info(f"No good keyword match found. Returning default fallback response.")
58
+ return "I don’t have enough information to answer that."
59
 
60
  # Process the uploaded file with detailed logging
61
+ def process_file(file, state):
62
  if file is None:
63
+ logger.info("No file uploaded.")
64
+ return [("Bot", "Please upload a file.")], state
65
 
66
  file_path = file.name
67
  if file_path.lower().endswith(".pdf"):
68
+ logger.info(f"Uploaded PDF file: {file_path}")
69
  text = extract_text_from_pdf(file_path)
70
  elif file_path.lower().endswith(".txt"):
71
+ logger.info(f"Uploaded TXT file: {file_path}")
72
  with open(file_path, 'r', encoding='utf-8') as f:
73
  text = f.read()
74
  else:
75
+ logger.error(f"Unsupported file format: {file_path}")
76
+ return [("Bot", "Unsupported file format. Please upload a PDF or TXT file.")], state
77
 
78
  sections = text.split('\n\n')
79
  section_embeddings = embedder.encode(sections, convert_to_tensor=True)
 
84
  state['feedback_count'] = 0
85
  state['mode'] = 'waiting_for_query'
86
  state['chat_history'] = [("Bot", "File processed. You can now ask questions.")]
87
+ logger.info(f"Processed file: {file_path}")
88
+ return state['chat_history'], state
89
 
90
  # Handle user input (queries and feedback)
91
+ def handle_input(user_input, state):
92
  if state['mode'] == 'waiting_for_upload':
93
  state['chat_history'].append(("Bot", "Please upload a file first."))
94
+ logger.info("User attempted to interact without uploading a file.")
 
95
  elif state['mode'] == 'waiting_for_query':
 
 
 
 
 
 
 
96
  query = user_input
97
  state['current_query'] = query
98
  state['feedback_count'] = 0
99
+ context = find_relevant_section(query, state['sections'], state['section_embeddings'])
100
  if context == "I don’t have enough information to answer that.":
101
  answer = context
102
  else:
 
106
  state['mode'] = 'waiting_for_feedback'
107
  state['chat_history'].append(("User", query))
108
  state['chat_history'].append(("Bot", f"Answer: {answer}\nPlease provide feedback: good, too vague, not helpful."))
109
+ logger.info(f"Query: {query}, Answer: {answer}")
 
110
  elif state['mode'] == 'waiting_for_feedback':
 
 
 
 
 
 
 
111
  feedback = user_input.lower()
112
  state['chat_history'].append(("User", feedback))
113
+ logger.info(f"Feedback: {feedback}")
114
  if feedback == "good" or state['feedback_count'] >= 2:
115
  state['mode'] = 'waiting_for_query'
116
  if feedback == "good":
117
  state['chat_history'].append(("Bot", "Thank you for your feedback. You can ask another question."))
118
+ logger.info("Feedback accepted as 'good'. Waiting for next query.")
119
  else:
120
  state['chat_history'].append(("Bot", "Maximum feedback iterations reached. You can ask another question."))
121
+ logger.info("Max feedback iterations reached. Waiting for next query.")
122
  else:
123
  query = state['current_query']
124
+ context = find_relevant_section(query, state['sections'], state['section_embeddings'])
125
  if feedback == "too vague":
126
  adjusted_answer = f"{state['last_answer']}\n\n(More details:\n{context[:500]}...)"
127
  elif feedback == "not helpful":
128
  adjusted_answer = qa_model(question=query + " Please provide more detailed information with examples.", context=context)['answer']
129
  else:
130
  state['chat_history'].append(("Bot", "Please provide valid feedback: good, too vague, not helpful."))
131
+ logger.info(f"Invalid feedback received: {feedback}")
132
+ return state['chat_history'], state
133
  state['last_answer'] = adjusted_answer
134
  state['feedback_count'] += 1
135
  state['chat_history'].append(("Bot", f"Updated answer: {adjusted_answer}\nPlease provide feedback: good, too vague, not helpful."))
136
+ logger.info(f"Adjusted answer: {adjusted_answer}")
137
+ return state['chat_history'], state
138
+
139
+ # Function to return the up-to-date log file for download
140
+ def get_log_file():
141
+ # Flush all log handlers to ensure log file is current
142
+ for handler in logger.handlers:
143
+ handler.flush()
144
+ # Ensure the log file exists; if not, create an empty one.
145
+ if not os.path.exists("support_bot_log.txt"):
146
+ with open("support_bot_log.txt", "w", encoding="utf-8") as f:
147
+ f.write("")
148
+ logger.info("Log file downloaded by user.")
149
+ return "support_bot_log.txt"
150
 
151
  # Initial state
152
  initial_state = {
 
160
  'last_answer': None
161
  }
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  # Gradio interface
164
  with gr.Blocks() as demo:
165
  state = gr.State(initial_state)
166
+
167
+ with gr.Row():
168
+ file_upload = gr.File(label="Upload PDF or TXT file")
169
+ download_btn = gr.Button("Download Log")
170
+ download_file = gr.File(label="Log File", interactive=False)
171
+
172
  chat = gr.Chatbot()
173
  user_input = gr.Textbox(label="Your query or feedback")
174
  submit_btn = gr.Button("Submit")
 
 
175
 
176
  # Process file upload
177
+ file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
178
 
179
  # Handle user input and clear the textbox
180
+ submit_btn.click(handle_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
 
 
 
 
 
 
 
 
 
181
 
182
+ # Set up download log button
183
+ download_btn.click(fn=get_log_file, inputs=[], outputs=download_file)
 
 
 
 
 
 
 
184
 
185
+ demo.launch(share=True)