Neurolingua commited on
Commit
6156a6a
·
verified ·
1 Parent(s): a8f0234

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -167
app.py CHANGED
@@ -10,15 +10,11 @@ from langchain.vectorstores.chroma import Chroma
10
  from langchain.prompts import ChatPromptTemplate
11
  from langchain_community.llms.ollama import Ollama
12
  from get_embedding_function import get_embedding_function
13
- from langchain.document_loaders.pdf import PyPDFDirectoryLoader
14
- from langchain_text_splitters import RecursiveCharacterTextSplitter
15
- from langchain.schema.document import Document
16
  import tempfile
17
 
18
- # Create a temporary directory for Chroma if running in Hugging Face Spaces
19
-
20
-
21
-
22
  app = Flask(__name__)
23
  UPLOAD_FOLDER = '/code/uploads'
24
  CHROMA_PATH = tempfile.mkdtemp() # Use the same folder for Chroma
@@ -54,21 +50,6 @@ Answer the question based only on the following context:
54
  Answer the question based on the above context: {question}
55
  """
56
 
57
- from bs4 import BeautifulSoup
58
- import requests
59
- from requests.auth import HTTPBasicAuth
60
- from PIL import Image
61
- from io import BytesIO
62
- import pandas as pd
63
- from urllib.parse import urlparse
64
- import os
65
- from pypdf import PdfReader
66
- from ai71 import AI71
67
- import uuid
68
-
69
- from inference_sdk import InferenceHTTPClient
70
- import base64
71
-
72
  AI71_API_KEY = os.environ.get('AI71_API_KEY')
73
 
74
  def generate_response(query, chat_history):
@@ -85,23 +66,6 @@ def generate_response(query, chat_history):
85
  response += chunk.choices[0].delta.content
86
  return response.replace("###", '').replace('\nUser:', '')
87
 
88
- def predict_pest(filepath):
89
- CLIENT = InferenceHTTPClient(
90
- api_url="https://detect.roboflow.com",
91
- api_key="oF1aC4b1FBCDtK8CoKx7"
92
- )
93
- result = CLIENT.infer(filepath, model_id="pest-detection-ueoco/1")
94
- return result['predictions'][0]
95
-
96
-
97
- def predict_disease(filepath):
98
- CLIENT = InferenceHTTPClient(
99
- api_url="https://classify.roboflow.com",
100
- api_key="oF1aC4b1FBCDtK8CoKx7"
101
- )
102
- result = CLIENT.infer(filepath, model_id="plant-disease-detection-iefbi/1")
103
- return result['predicted_classes'][0]
104
-
105
  def convert_img(url, account_sid, auth_token):
106
  try:
107
  response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token))
@@ -137,40 +101,6 @@ def get_weather(city):
137
  temperature = soup.find('div', attrs={'class': 'BNeawe iBp4i AP7Wnd'}).text
138
  return temperature
139
 
140
- from zenrows import ZenRowsClient
141
- Zenrow_api = os.environ.get('Zenrow_api')
142
- zenrows_client = ZenRowsClient(Zenrow_api)
143
-
144
- def get_rates():
145
- url = "https://www.kisandeals.com/mandiprices/ALL/TAMIL-NADU/ALL"
146
- response = zenrows_client.get(url)
147
-
148
- if response.status_code == 200:
149
- soup = BeautifulSoup(response.content, 'html.parser')
150
- rows = soup.select('table tbody tr')
151
- data = {}
152
- for row in rows:
153
- columns = row.find_all('td')
154
- if len(columns) >= 2:
155
- commodity = columns[0].get_text(strip=True)
156
- price = columns[1].get_text(strip=True)
157
- if '₹' in price:
158
- data[commodity] = price
159
- return str(data) + " These are the prices for 1 kg"
160
-
161
- def get_news():
162
- news = []
163
- url = "https://economictimes.indiatimes.com/news/economy/agriculture?from=mdr"
164
- response = zenrows_client.get(url)
165
-
166
- if response.status_code == 200:
167
- soup = BeautifulSoup(response.content, 'html.parser')
168
- headlines = soup.find_all("div", class_="eachStory")
169
- for story in headlines:
170
- headline = story.find('h3').text.strip()
171
- news.append(headline)
172
- return news
173
-
174
  def download_and_save_as_txt(url, account_sid, auth_token):
175
  try:
176
  response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token))
@@ -208,15 +138,7 @@ initialize_chroma()
208
  def query_rag(query_text: str):
209
  embedding_function = get_embedding_function()
210
  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
211
- print(query_text)
212
- # Check if the query is related to a PDF
213
- if "from pdf" in query_text.lower() or "in pdf" in query_text.lower():
214
- # Provide some context about handling PDFs
215
- response_text = "I see you're asking about a PDF-related query. Let me check the context from the PDF."
216
- else:
217
- # Regular RAG functionality
218
- response_text = "Your query is not related to PDFs. Please make sure your question is clear."
219
-
220
  results = db.similarity_search_with_score(query_text, k=5)
221
 
222
  if not results:
@@ -241,38 +163,19 @@ def query_rag(query_text: str):
241
  response_text = response.replace("###", '').replace('\nUser:', '')
242
 
243
  return response_text
244
-
245
- def download_file(url, extension):
246
- try:
247
- response = requests.get(url)
248
- response.raise_for_status()
249
- filename = f"{uuid.uuid4()}{extension}"
250
- file_path = os.path.join(UPLOAD_FOLDER, filename)
251
-
252
- with open(file_path, 'wb') as file:
253
- file.write(response.content)
254
-
255
- print(f"File downloaded and saved as {file_path}")
256
- return file_path
257
 
258
- except requests.exceptions.HTTPError as err:
259
- print(f"HTTP error occurred: {err}")
260
- except Exception as err:
261
- print(f"An error occurred: {err}")
262
- return None
263
  def save_pdf_and_update_database(pdf_filepath):
264
  try:
265
- document_loader = PyPDFDirectoryLoader(UPLOAD_FOLDER)
266
  documents = document_loader.load()
267
-
268
  text_splitter = RecursiveCharacterTextSplitter(
269
  chunk_size=800,
270
  chunk_overlap=80,
271
  length_function=len,
272
- is_separator_regex=False,
273
  )
274
  chunks = text_splitter.split_documents(documents)
275
-
276
  add_to_chroma(chunks)
277
  print(f"PDF processed and data updated in Chroma.")
278
  except Exception as e:
@@ -294,6 +197,7 @@ def add_to_chroma(chunks: list[Document]):
294
  print(f"Chunks added to Chroma.")
295
  except Exception as e:
296
  print(f"Error adding chunks to Chroma: {e}")
 
297
  def calculate_chunk_ids(chunks):
298
  last_page_id = None
299
  current_chunk_index = 0
@@ -314,7 +218,6 @@ def calculate_chunk_ids(chunks):
314
 
315
  return chunks
316
 
317
-
318
  @app.route('/whatsapp', methods=['POST'])
319
  def whatsapp_webhook():
320
  incoming_msg = request.values.get('Body', '').lower()
@@ -331,74 +234,30 @@ def whatsapp_webhook():
331
  # Handle image processing (disease/pest detection)
332
  filepath = convert_img(media_url, account_sid, auth_token)
333
  response_text = handle_image(filepath)
334
- else:
335
  # Handle PDF processing
336
  filepath = download_and_save_as_txt(media_url, account_sid, auth_token)
337
- response_text = process_and_query_pdf(filepath)
338
- elif ('weather' in incoming_msg.lower()) or ('climate' in incoming_msg.lower()) or (
339
- 'temperature' in incoming_msg.lower()):
340
- response_text = get_weather(incoming_msg.lower())
341
- elif 'bookkeeping' in incoming_msg:
342
- response_text = "Please provide the details you'd like to record."
343
- elif ('rates' in incoming_msg.lower()) or ('price' in incoming_msg.lower()) or (
344
- 'market' in incoming_msg.lower()) or ('rate' in incoming_msg.lower()) or ('prices' in incoming_msg.lower()):
345
- rates = get_rates()
346
- response_text = generate_response(incoming_msg + ' data is ' + rates, chat_history)
347
- elif ('news' in incoming_msg.lower()) or ('information' in incoming_msg.lower()):
348
- news = get_news()
349
- response_text = generate_response(incoming_msg + ' data is ' + str(news), chat_history)
350
  else:
 
351
  response_text = query_rag(incoming_msg)
352
 
353
- conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text})
354
- send_message(sender, response_text)
355
- return '', 204
356
-
357
- def handle_image(filepath):
358
- try:
359
- disease = predict_disease(filepath)
360
- except:
361
- disease = None
362
- try:
363
- pest = predict_pest(filepath)
364
- except:
365
- pest = None
366
-
367
- if disease:
368
- response_text = f"Detected disease: {disease}"
369
- disease_info = generate_response(f"Provide brief information about {disease} in plants", chat_history)
370
- response_text += f"\n\nAdditional information: {disease_info}"
371
- elif pest:
372
- response_text = f"Detected pest: {pest}"
373
- pest_info = generate_response(f"Provide brief information about {pest} in agriculture", chat_history)
374
- response_text += f"\n\nAdditional information: {pest_info}"
375
- else:
376
- response_text = "Please upload another image with good quality."
377
-
378
- return response_text
379
-
380
- def process_and_query_pdf(filepath):
381
- # Assuming the PDF processing and embedding are handled here.
382
- add_to_chroma(load_documents())
383
- return query_rag("from pdf") # Replace with a more specific query if needed
384
 
385
-
386
- def send_message(to, body):
387
- try:
388
- message = client.messages.create(
389
- from_=from_whatsapp_number,
390
- body=body,
391
- to=to
392
- )
393
- print(f"Message sent with SID: {message.sid}")
394
- except Exception as e:
395
- print(f"Error sending message: {e}")
396
-
397
- def send_initial_message(to_number):
398
- send_message(
399
- f'whatsapp:{to_number}',
400
- 'Welcome to the Agri AI Chatbot! How can I assist you today? You can send an image with "pest" or "disease" to classify it.'
401
- )
402
  if __name__ == "__main__":
403
  send_initial_message('919080522395')
404
  send_initial_message('916382792828')
 
10
  from langchain.prompts import ChatPromptTemplate
11
  from langchain_community.llms.ollama import Ollama
12
  from get_embedding_function import get_embedding_function
13
+ from langchain.document_loaders.pdf import PyPDFLoader
14
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
15
+ from langchain.schema import Document
16
  import tempfile
17
 
 
 
 
 
18
  app = Flask(__name__)
19
  UPLOAD_FOLDER = '/code/uploads'
20
  CHROMA_PATH = tempfile.mkdtemp() # Use the same folder for Chroma
 
50
  Answer the question based on the above context: {question}
51
  """
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  AI71_API_KEY = os.environ.get('AI71_API_KEY')
54
 
55
  def generate_response(query, chat_history):
 
66
  response += chunk.choices[0].delta.content
67
  return response.replace("###", '').replace('\nUser:', '')
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def convert_img(url, account_sid, auth_token):
70
  try:
71
  response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token))
 
101
  temperature = soup.find('div', attrs={'class': 'BNeawe iBp4i AP7Wnd'}).text
102
  return temperature
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def download_and_save_as_txt(url, account_sid, auth_token):
105
  try:
106
  response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token))
 
138
  def query_rag(query_text: str):
139
  embedding_function = get_embedding_function()
140
  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
141
+
 
 
 
 
 
 
 
 
142
  results = db.similarity_search_with_score(query_text, k=5)
143
 
144
  if not results:
 
163
  response_text = response.replace("###", '').replace('\nUser:', '')
164
 
165
  return response_text
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
 
 
 
 
 
167
  def save_pdf_and_update_database(pdf_filepath):
168
  try:
169
+ document_loader = PyPDFLoader(pdf_filepath)
170
  documents = document_loader.load()
171
+
172
  text_splitter = RecursiveCharacterTextSplitter(
173
  chunk_size=800,
174
  chunk_overlap=80,
175
  length_function=len,
 
176
  )
177
  chunks = text_splitter.split_documents(documents)
178
+
179
  add_to_chroma(chunks)
180
  print(f"PDF processed and data updated in Chroma.")
181
  except Exception as e:
 
197
  print(f"Chunks added to Chroma.")
198
  except Exception as e:
199
  print(f"Error adding chunks to Chroma: {e}")
200
+
201
  def calculate_chunk_ids(chunks):
202
  last_page_id = None
203
  current_chunk_index = 0
 
218
 
219
  return chunks
220
 
 
221
  @app.route('/whatsapp', methods=['POST'])
222
  def whatsapp_webhook():
223
  incoming_msg = request.values.get('Body', '').lower()
 
234
  # Handle image processing (disease/pest detection)
235
  filepath = convert_img(media_url, account_sid, auth_token)
236
  response_text = handle_image(filepath)
237
+ elif content_type == 'application/pdf':
238
  # Handle PDF processing
239
  filepath = download_and_save_as_txt(media_url, account_sid, auth_token)
240
+ save_pdf_and_update_database(filepath)
241
+ response_text = "PDF received and processed."
242
+ else:
243
+ response_text = "Unsupported media type. Please send a PDF or image file."
244
+ elif "weather" in incoming_msg:
245
+ city = incoming_msg.replace("weather", "").strip()
246
+ temperature = get_weather(city)
247
+ response_text = f"The current temperature in {city} is {temperature}"
 
 
 
 
 
248
  else:
249
+ # Generate response using the question and chat history
250
  response_text = query_rag(incoming_msg)
251
 
252
+ # Add interaction to memory
253
+ interaction = {'role': 'user', 'content': incoming_msg, 'response': response_text}
254
+ conversation_memory.add_to_memory(interaction)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
+ # Send the response
257
+ resp = MessagingResponse()
258
+ msg = resp.message()
259
+ msg.body(response_text)
260
+ return str(resp)
 
 
 
 
 
 
 
 
 
 
 
 
261
  if __name__ == "__main__":
262
  send_initial_message('919080522395')
263
  send_initial_message('916382792828')