Shriharsh commited on
Commit
e764d84
·
verified ·
1 Parent(s): 8d300dd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -0
app.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import gradio as gr
3
+ from transformers import pipeline
4
+ from sentence_transformers import SentenceTransformer, util
5
+ import PyPDF2
6
+
7
+ # Set up logging
8
+ logging.basicConfig(filename='support_bot_log.txt', level=logging.INFO)
9
+
10
+ # Load models
11
+ qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
12
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
13
+
14
+ # Helper function to extract text from PDF
15
+ def extract_text_from_pdf(file_path):
16
+ text = ""
17
+ with open(file_path, "rb") as file:
18
+ pdf_reader = PyPDF2.PdfReader(file)
19
+ for page in pdf_reader.pages:
20
+ text += page.extract_text() + "\n"
21
+ return text
22
+
23
+ # Find the most relevant section in the document
24
+ def find_relevant_section(query, sections, section_embeddings):
25
+ stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
26
+
27
+ # Semantic search
28
+ query_embedding = embedder.encode(query, convert_to_tensor=True)
29
+ similarities = util.cos_sim(query_embedding, section_embeddings)[0]
30
+ best_idx = similarities.argmax().item()
31
+ best_section = sections[best_idx]
32
+ similarity_score = similarities[best_idx].item()
33
+
34
+ SIMILARITY_THRESHOLD = 0.4
35
+ if similarity_score >= SIMILARITY_THRESHOLD:
36
+ logging.info(f"Found relevant section using embeddings for query: {query}")
37
+ return best_section
38
+
39
+ logging.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
40
+
41
+ # Keyword-based fallback search with stopword filtering
42
+ query_words = {word for word in query.lower().split() if word not in stopwords}
43
+ for section in sections:
44
+ section_words = {word for word in section.lower().split() if word not in stopwords}
45
+ common_words = query_words.intersection(section_words)
46
+ if len(common_words) >= 2:
47
+ logging.info(f"Keyword match found for query: {query} with common words: {common_words}")
48
+ return section
49
+
50
+ logging.info(f"No good keyword match found. Returning default fallback response.")
51
+ return "I don’t have enough information to answer that."
52
+
53
+ # Process the uploaded file
54
+ def process_file(file, state):
55
+ if file is None:
56
+ return [("Bot", "Please upload a file.")], state
57
+
58
+ file_path = file.name
59
+ if file_path.lower().endswith(".pdf"):
60
+ text = extract_text_from_pdf(file_path)
61
+ elif file_path.lower().endswith(".txt"):
62
+ with open(file_path, 'r', encoding='utf-8') as f:
63
+ text = f.read()
64
+ else:
65
+ return [("Bot", "Unsupported file format. Please upload a PDF or TXT file.")], state
66
+
67
+ sections = text.split('\n\n')
68
+ section_embeddings = embedder.encode(sections, convert_to_tensor=True)
69
+ state['document_text'] = text
70
+ state['sections'] = sections
71
+ state['section_embeddings'] = section_embeddings
72
+ state['current_query'] = None
73
+ state['feedback_count'] = 0
74
+ state['mode'] = 'waiting_for_query'
75
+ state['chat_history'] = [("Bot", "File processed. You can now ask questions.")]
76
+ logging.info(f"Processed file: {file_path}")
77
+ return state['chat_history'], state
78
+
79
+ # Handle user input (queries and feedback)
80
+ def handle_input(user_input, state):
81
+ if state['mode'] == 'waiting_for_upload':
82
+ state['chat_history'].append(("Bot", "Please upload a file first."))
83
+ elif state['mode'] == 'waiting_for_query':
84
+ query = user_input
85
+ state['current_query'] = query
86
+ state['feedback_count'] = 0
87
+ context = find_relevant_section(query, state['sections'], state['section_embeddings'])
88
+ if context == "I don’t have enough information to answer that.":
89
+ answer = context
90
+ else:
91
+ result = qa_model(question=query, context=context)
92
+ answer = result["answer"]
93
+ state['last_answer'] = answer
94
+ state['mode'] = 'waiting_for_feedback'
95
+ state['chat_history'].append(("User", query))
96
+ state['chat_history'].append(("Bot", f"Answer: {answer}\nPlease provide feedback: good, too vague, not helpful."))
97
+ logging.info(f"Query: {query}, Answer: {answer}")
98
+ elif state['mode'] == 'waiting_for_feedback':
99
+ feedback = user_input.lower()
100
+ state['chat_history'].append(("User", feedback))
101
+ logging.info(f"Feedback: {feedback}")
102
+ if feedback == "good" or state['feedback_count'] >= 2:
103
+ state['mode'] = 'waiting_for_query'
104
+ if feedback == "good":
105
+ state['chat_history'].append(("Bot", "Thank you for your feedback. You can ask another question."))
106
+ else:
107
+ state['chat_history'].append(("Bot", "Maximum feedback iterations reached. You can ask another question."))
108
+ else:
109
+ query = state['current_query']
110
+ context = find_relevant_section(query, state['sections'], state['section_embeddings'])
111
+ if feedback == "too vague":
112
+ adjusted_answer = f"{state['last_answer']}\n\n(More details:\n{context[:500]}...)"
113
+ elif feedback == "not helpful":
114
+ adjusted_answer = qa_model(question=query + " Please provide more detailed information with examples.", context=context)['answer']
115
+ else:
116
+ state['chat_history'].append(("Bot", "Please provide valid feedback: good, too vague, not helpful."))
117
+ return state['chat_history'], state
118
+ state['last_answer'] = adjusted_answer
119
+ state['feedback_count'] += 1
120
+ state['chat_history'].append(("Bot", f"Updated answer: {adjusted_answer}\nPlease provide feedback: good, too vague, not helpful."))
121
+ logging.info(f"Adjusted answer: {adjusted_answer}")
122
+ return state['chat_history'], state
123
+
124
+ # Initial state
125
+ initial_state = {
126
+ 'document_text': None,
127
+ 'sections': None,
128
+ 'section_embeddings': None,
129
+ 'current_query': None,
130
+ 'feedback_count': 0,
131
+ 'mode': 'waiting_for_upload',
132
+ 'chat_history': [("Bot", "Please upload a PDF or TXT file to start.")],
133
+ 'last_answer': None
134
+ }
135
+
136
+ # Gradio interface
137
+ with gr.Blocks() as demo:
138
+ state = gr.State(initial_state)
139
+ file_upload = gr.File(label="Upload PDF or TXT file")
140
+ chat = gr.Chatbot()
141
+ user_input = gr.Textbox(label="Your query or feedback")
142
+ submit_btn = gr.Button("Submit")
143
+
144
+ # Process file upload
145
+ file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
146
+
147
+ # Handle user input and clear the textbox
148
+ submit_btn.click(handle_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)
149
+
150
+ demo.launch()