Spaces:
Runtime error
Runtime error
import os | |
import faiss | |
import torch | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer | |
from flask import Flask, request, jsonify, render_template | |
from flask_cors import CORS | |
from pyngrok import ngrok | |
import requests | |
import cloudinary | |
import cloudinary.uploader | |
import cloudinary.api | |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig | |
from peft import PeftModel, PeftConfig | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
from happytransformer import HappyTextToText, TTSettings | |
import io | |
import logging | |
import geocoder | |
from geopy.distance import geodesic | |
import webrtcvad | |
import collections | |
import time | |
from werkzeug.utils import secure_filename | |
from geopy.geocoders import Nominatim | |
import pickle | |
import numpy as np | |
import tempfile | |
from pathlib import Path | |
# Update the numpy version check | |
if not hasattr(np, '__version__') or tuple(map(int, np.__version__.split('.'))) != (1, 23, 5): | |
print(f"Warning: Using numpy version {np.__version__}. Expected version 1.23.5") | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
# Load environment variables | |
API_KEY = os.getenv("AIzaSyC5FSchUVhKWetUIYPMe92B_1oRqhGplqI") | |
CSE_ID = os.getenv("c03c5384c2c5d424b") | |
CLOUDINARY_CLOUD_NAME = os.getenv("dn4rackei") | |
CLOUDINARY_API_KEY = os.getenv("599266248716888") | |
CLOUDINARY_API_SECRET = os.getenv("DRAaasqskCvfAhJhcKB6AKxrD7U") | |
# Define paths | |
load_dir = "./models/new_rag_model/" | |
model_path = os.path.join(load_dir, "model_state_dict.pth") | |
faiss_index_path = os.path.join(load_dir, "property_faiss.index") | |
dataset_path = os.path.join(load_dir, "property_data.csv") | |
model_dir = "./models/llm_model" | |
# model_dir = "/content/drive/MyDrive/newllmmodel/final_model" | |
# model_dir = "/content/drive/MyDrive/real_estate_model/final_model" | |
# model_dir = "/content/drive/MyDrive/rag" | |
# Check device | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
print(f"Using device: {device}") | |
# Configure cache directories | |
os.environ['TRANSFORMERS_CACHE'] = '/cache' | |
os.environ['HF_HOME'] = '/cache' | |
os.environ['XDG_CACHE_HOME'] = '/cache' | |
# Load SentenceTransformer model | |
def load_sentence_transformer(): | |
print("Loading SentenceTransformer model...") | |
try: | |
# Create cache directory if it doesn't exist | |
cache_dir = Path('/cache') | |
cache_dir.mkdir(parents=True, exist_ok=True) | |
# Import einops here to ensure it's available | |
try: | |
import einops | |
except ImportError: | |
raise ImportError("einops is required. Please install it with 'pip install einops'") | |
model_embedding = SentenceTransformer( | |
"jinaai/jina-embeddings-v3", | |
trust_remote_code=True, | |
cache_folder=str(cache_dir) | |
).to(device) | |
if os.path.exists(model_path): | |
state_dict = torch.load(model_path, map_location=device) | |
# Handle tensor types | |
for key, tensor in state_dict.items(): | |
if hasattr(tensor, 'dequantize'): | |
state_dict[key] = tensor.dequantize().to(dtype=torch.float32) | |
elif tensor.dtype == torch.bfloat16: | |
state_dict[key] = tensor.to(dtype=torch.float32) | |
model_embedding.load_state_dict(state_dict) | |
print("SentenceTransformer model loaded successfully.") | |
else: | |
print(f"Warning: Model file not found at {model_path}") | |
return model_embedding | |
except Exception as e: | |
print(f"Error loading model: {str(e)}") | |
raise | |
# Load FAISS index | |
def load_faiss_index(): | |
print("Loading FAISS index...") | |
try: | |
index = faiss.read_index(faiss_index_path) | |
# Ensure the index is on CPU | |
if hasattr(faiss, 'StandardGpuResources'): | |
index = faiss.index_gpu_to_cpu(index) | |
print("FAISS index loaded successfully.") | |
return index | |
except Exception as e: | |
print(f"Error loading FAISS index: {str(e)}") | |
raise | |
# Load dataset | |
def load_dataset(): | |
print("Loading dataset...") | |
df = pd.read_csv(dataset_path) | |
print("Dataset loaded successfully.") | |
return df | |
# Custom Retriever Class | |
class CustomRagRetriever: | |
def __init__(self, faiss_index, model): | |
self.index = faiss_index | |
self.model = model | |
self.pca = None | |
# Load PCA if it exists | |
pca_path = os.path.join(os.path.dirname(model_path), "pca_model.pkl") | |
if os.path.exists(pca_path): | |
try: | |
with open(pca_path, 'rb') as f: | |
self.pca = pickle.load(f) | |
except ModuleNotFoundError: | |
print("Warning: Could not load PCA model due to numpy version mismatch. Continuing without PCA.") | |
self.pca = None | |
except Exception as e: | |
print(f"Warning: Error loading PCA model: {str(e)}. Continuing without PCA.") | |
self.pca = None | |
def retrieve(self, query, top_k=10): | |
print(f"Retrieving properties for query: {query}") | |
try: | |
# Get query embedding with optimizations | |
with torch.no_grad(): | |
query_embedding = self.model.encode( | |
[query], | |
convert_to_numpy=True, | |
device=device, | |
normalize_embeddings=True | |
) | |
# Convert to FP32 | |
query_embedding = query_embedding.astype(np.float32) | |
# Only apply PCA if it was successfully loaded | |
if self.pca is not None: | |
try: | |
query_embedding = self.pca.transform(query_embedding) | |
except Exception as e: | |
print(f"Warning: Error applying PCA transformation: {str(e)}") | |
distances, indices = self.index.search(query_embedding, top_k) | |
retrieved_properties = [] | |
for idx, dist in zip(indices[0], distances[0]): | |
property_data = df.iloc[idx] | |
retrieved_properties.append({ | |
"property": property_data, | |
"image_url": property_data["property_image"], | |
"distance": float(dist) | |
}) | |
print(f"Retrieved {len(retrieved_properties)} properties") | |
return retrieved_properties | |
except Exception as e: | |
print(f"Error in retrieve: {str(e)}") | |
raise | |
# Initialize components | |
df = load_dataset() | |
model_embedding = load_sentence_transformer() | |
index = load_faiss_index() | |
retriever = CustomRagRetriever(index, model_embedding) | |
# Load tokenizer and LLM model | |
def load_tokenizer_and_model(): | |
print("Loading tokenizer...") | |
try: | |
# Load base model first | |
base_model_name = "unsloth/llama-3.2-3b-instruct-unsloth-bnb-4bit" | |
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True) | |
print("Tokenizer loaded successfully.") | |
print("Loading LLM model...") | |
# Load the base model with 4-bit quantization | |
base_model = AutoModelForCausalLM.from_pretrained( | |
base_model_name, | |
trust_remote_code=True, | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.float16, | |
device_map="auto" | |
) | |
# Load the PEFT adapter | |
model_llm = PeftModel.from_pretrained( | |
base_model, | |
model_dir, | |
device_map="auto", | |
is_trainable=False | |
) | |
print("LLM model loaded successfully.") | |
return tokenizer, model_llm | |
except Exception as e: | |
print(f"Error loading model: {str(e)}") | |
raise | |
tokenizer, model_llm = load_tokenizer_and_model() | |
# Configure Cloudinary | |
def configure_cloudinary(): | |
print("Configuring Cloudinary...") | |
cloudinary.config( | |
cloud_name=CLOUDINARY_CLOUD_NAME, | |
api_key=CLOUDINARY_API_KEY, | |
api_secret=CLOUDINARY_API_SECRET | |
) | |
print("Cloudinary configured successfully.") | |
configure_cloudinary() | |
# Search real estate properties | |
def search_real_estate(query, retriever, top_k=10, raw_results=False): | |
print(f"Searching real estate properties for query: {query}") | |
search_results = retriever.retrieve(query, top_k) | |
if raw_results: | |
return search_results | |
formatted_results = [] | |
for result in search_results: | |
property_info = result['property'] | |
formatted_result = { | |
"Property Name": property_info.get('PropertyName', 'N/A'), | |
"Address": property_info.get('Address', 'N/A'), | |
"ZipCode": int(float(property_info.get('ZipCode', 0))), | |
"LeasableSquareFeet": int(float(property_info.get('LeasableSquareFeet', 0))), | |
"YearBuilt": int(float(property_info.get('YearBuilt', 0))), | |
"NumberOfRooms": int(float(property_info.get('NumberOfRooms', 0))), | |
"ParkingSpaces": int(float(property_info.get('ParkingSpaces', 0))), | |
"PropertyManager": property_info.get('PropertyManager', 'N/A'), | |
"MarketValue": float(property_info.get('MarketValue', 0)), | |
"TaxAssessmentNumber": property_info.get('TaxAssessmentNumber', 'N/A'), | |
"Latitude": float(property_info.get('Latitude', 0)), | |
"Longitude": float(property_info.get('Longitude', 0)), | |
"CreateDate": property_info.get('CreateDate', 'N/A'), | |
"LastModifiedDate": property_info.get('LastModifiedDate', 'N/A'), | |
"City": property_info.get('City', 'N/A'), | |
"State": property_info.get('State', 'N/A'), | |
"Country": property_info.get('Country', 'N/A'), | |
"PropertyType": property_info.get('PropertyType', 'N/A'), | |
"PropertyStatus": property_info.get('PropertyStatus', 'N/A'), | |
"Description": property_info.get('Description', 'N/A'), | |
"ViewNumber": int(float(property_info.get('ViewNumber', 0))), | |
"Contact": int(float(property_info.get('Contact', 0))), | |
"TotalSquareFeet": int(float(property_info.get('TotalSquareFeet', 0))), | |
"IsDeleted": bool(property_info.get('IsDeleted', False)), | |
"Beds": int(float(property_info.get('Beds', 0))), | |
"Baths": int(float(property_info.get('Baths', 0))), | |
"AgentName": property_info.get('AgentName', 'N/A'), | |
"AgentPhoneNumber": property_info.get('AgentPhoneNumber', 'N/A'), | |
"AgentEmail": property_info.get('AgentEmail', 'N/A'), | |
"KeyFeatures": property_info.get('KeyFeatures', 'N/A'), | |
"NearbyAmenities": property_info.get('NearbyAmenities', 'N/A'), | |
"Property Image": result['image_url'], | |
"Distance": result['distance'] | |
} | |
formatted_results.append(formatted_result) | |
print(f"Found {len(formatted_results)} matching properties") | |
return formatted_results | |
# Generate response with optimized parameters | |
def generate_response(query, max_new_tokens=100, temperature=0.7, top_k=30, top_p=0.8, repetition_penalty=1.05): | |
print(f"\nGenerating response for query: {query}\n") | |
# Print parameter settings | |
print("Generation Parameters:") | |
print(f"- Max New Tokens: {max_new_tokens}") | |
print(f"- Temperature: {temperature}") | |
print(f"- Top-K Sampling: {top_k}") | |
print(f"- Top-P Sampling: {top_p}") | |
print(f"- Repetition Penalty: {repetition_penalty}") | |
print(f"- Sampling Enabled: True (do_sample=True)\n") | |
input_text = f"User: {query}\nAssistant:" | |
inputs = tokenizer(input_text, return_tensors="pt").to(device) | |
start_time = time.time() # Record start time | |
try: | |
outputs = model_llm.generate( | |
inputs.input_ids, | |
max_new_tokens=max_new_tokens, | |
temperature=temperature, | |
top_k=top_k, | |
top_p=top_p, | |
repetition_penalty=repetition_penalty, | |
do_sample=True, | |
eos_token_id=tokenizer.eos_token_id, | |
pad_token_id=tokenizer.pad_token_id | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
response = response.replace(input_text, "").strip() | |
end_time = time.time() # Record end time | |
duration = end_time - start_time # Calculate duration | |
print(f"\nGenerated Response:\n{response}\n") | |
print(f"Time taken to generate response: {duration:.2f} seconds\n") | |
return response, duration | |
except Exception as e: | |
logging.error(f"Error generating response: {e}") | |
return "An error occurred while generating the response.", None | |
# Combined model response with optimized parameters | |
def combined_model_response(query, retriever, top_k=5, max_new_tokens=512, temperature=0.5, top_k_sampling=30, repetition_penalty=1.0): | |
print(f"Generating combined model response for query: {query}") | |
retrieved_results = search_real_estate(query, retriever, top_k, raw_results=True) | |
if not retrieved_results: | |
return "No relevant properties found." | |
combined_property_details = [] | |
for i, result in enumerate(retrieved_results, 1): | |
property_info = result['property'] | |
property_details = ( | |
f"Property {i}:\n" | |
f"Property Name: {property_info['PropertyName']}\n" | |
f"Address: {property_info['Address']}, {property_info['City']}, {property_info['State']}, {property_info['ZipCode']}, {property_info['Country']}\n" | |
f"Leasable Area: {property_info['LeasableSquareFeet']} sqft\n" | |
f"Year Built: {property_info['YearBuilt']}\n" | |
f"Beds: {property_info['Beds']} Baths: {property_info['Baths']}\n" | |
f"Parking Spaces: {property_info['ParkingSpaces']}\n" | |
f"Market Value: {property_info['MarketValue']}\n" | |
# f"Tax Assessment Number: {property_info['TaxAssessmentNumber']}\n" | |
# f"Coordinates: {property_info['Latitude']}, {property_info['Longitude']}\n" | |
f"Property Type: {property_info['PropertyType']}\n" | |
f"Property Status: {property_info['PropertyStatus']}\n" | |
f"Description: {property_info['Description']}\n" | |
# f"View Count: {property_info['ViewNumber']}\n" | |
f"Contact: {property_info['Contact']}\n" | |
f"Total Square Feet: {property_info['TotalSquareFeet']} sqft\n" | |
# f"Deleted: {'Yes' if property_info['IsDeleted'] else 'No'}\n" | |
f"Agent Name: {property_info['AgentName']}\n" | |
f"Agent Phone Number: {property_info['AgentPhoneNumber']}\n" | |
f"Agent Email: {property_info['AgentEmail']}\n" | |
f"Key Features: {property_info['KeyFeatures']}\n" | |
f"Nearby Amenities: {property_info['NearbyAmenities']}\n" | |
f"Created Date: {property_info['CreateDate']}\n" | |
f"Last Modified Date: {property_info['LastModifiedDate']}\n" | |
) | |
combined_property_details.append(property_details) | |
prompt = f"User Query: {query}\nProperty Details:\n" + "\n".join(combined_property_details) + "\nGenerate a concise response based on the user's query and retrieved property details." | |
print(f"User Query: {query}") | |
response, duration = generate_response(prompt, max_new_tokens=max_new_tokens) | |
print(f"Combined model response: {response}") | |
print(f"Time taken to generate combined model response: {duration:.2f} seconds\n") | |
return response, duration | |
# VAD Audio Class | |
class VADAudio: | |
def __init__(self, aggressiveness=3): | |
self.vad = webrtcvad.Vad(aggressiveness) | |
self.sample_rate = 16000 | |
self.frame_duration_ms = 30 | |
def frame_generator(self, audio, frame_duration_ms, sample_rate): | |
n = int(sample_rate * (frame_duration_ms / 1000.0)) | |
offset = 0 | |
while offset + n < len(audio): | |
yield audio[offset:offset + n] | |
offset += n | |
def vad_collector(self, audio, sample_rate, frame_duration_ms, padding_duration_ms=300, aggressiveness=3): | |
vad = webrtcvad.Vad(aggressiveness) | |
num_padding_frames = int(padding_duration_ms / frame_duration_ms) | |
ring_buffer = collections.deque(maxlen=num_padding_frames) | |
triggered = False | |
for frame in self.frame_generator(audio, frame_duration_ms, sample_rate): | |
is_speech = vad.is_speech(frame, sample_rate) | |
if not triggered: | |
ring_buffer.append((frame, is_speech)) | |
num_voiced = len([f for f, speech in ring_buffer if speech]) | |
if num_voiced > 0.9 * ring_buffer.maxlen: | |
triggered = True | |
for f, s in ring_buffer: | |
yield f | |
ring_buffer.clear() | |
else: | |
yield frame | |
ring_buffer.append((frame, is_speech)) | |
num_unvoiced = len([f for f, speech in ring_buffer if not speech]) | |
if num_unvoiced > 0.9 * ring_buffer.maxlen: | |
triggered = False | |
yield b''.join([f for f in ring_buffer]) | |
ring_buffer.clear() | |
# Transcribe with VAD | |
def transcribe_with_vad(audio_file): | |
vad_audio = VADAudio() | |
audio = AudioSegment.from_file(audio_file) | |
audio = audio.set_frame_rate(vad_audio.sample_rate).set_channels(1) | |
raw_audio = audio.raw_data | |
frames = vad_audio.vad_collector(raw_audio, vad_audio.sample_rate, vad_audio.frame_duration_ms) | |
for frame in frames: | |
if len(frame) > 0: | |
recognizer = sr.Recognizer() | |
audio_data = sr.AudioData(frame, vad_audio.sample_rate, audio.sample_width) | |
try: | |
text = recognizer.recognize_google(audio_data) | |
print(f"Transcription: {text}") | |
return text | |
except sr.UnknownValueError: | |
print("Google Speech Recognition could not understand the audio") | |
except sr.RequestError as e: | |
print(f"Could not request results from Google Speech Recognition service; {e}") | |
return "" | |
# Flask app | |
app = Flask(__name__, template_folder="sample_data/templates") | |
conversation_context = {} | |
# Configure CORS | |
CORS(app, resources={ | |
r"/*": { | |
"origins": ["http://localhost:4200", "https://localhost:4200"], | |
"methods": ["GET", "POST", "OPTIONS"], | |
"allow_headers": ["Content-Type", "X-Session-ID"] | |
} | |
}) | |
def handle_preflight(): | |
if request.method == 'OPTIONS': | |
response = app.make_default_options_response() | |
response.headers.add('Access-Control-Allow-Headers', 'Content-Type, X-Session-ID') | |
response.headers.add('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') | |
return response | |
def index(): | |
print("Rendering index page") | |
return render_template('index.html') | |
def search(): | |
try: | |
data = request.json | |
query = data.get('query') | |
session_id = data.get('session_id') | |
continue_conversation = data.get('continue', False) | |
if not query: | |
return jsonify({"error": "Query parameter is missing"}), 400 | |
if session_id not in conversation_context or not continue_conversation: | |
search_results = retriever.retrieve(query) | |
formatted_results = [] | |
for result in search_results: | |
property_info = result['property'] | |
formatted_result = { | |
"Property Name": property_info.get('PropertyName', 'N/A'), | |
"Address": property_info.get('Address', 'N/A'), | |
"ZipCode": int(float(property_info.get('ZipCode', 0))), | |
"LeasableSquareFeet": int(float(property_info.get('LeasableSquareFeet', 0))), | |
"YearBuilt": int(float(property_info.get('YearBuilt', 0))), | |
"NumberOfRooms": int(float(property_info.get('NumberOfRooms', 0))), | |
"ParkingSpaces": int(float(property_info.get('ParkingSpaces', 0))), | |
"PropertyManager": property_info.get('PropertyManager', 'N/A'), | |
"MarketValue": float(property_info.get('MarketValue', 0)), | |
"TaxAssessmentNumber": property_info.get('TaxAssessmentNumber', 'N/A'), | |
"City": property_info.get('City', 'N/A'), | |
"State": property_info.get('State', 'N/A'), | |
"Country": property_info.get('Country', 'N/A'), | |
"PropertyType": property_info.get('PropertyType', 'N/A'), | |
"PropertyStatus": property_info.get('PropertyStatus', 'N/A'), | |
"Description": property_info.get('Description', 'N/A'), | |
"ViewNumber": int(float(property_info.get('ViewNumber', 0))), | |
"Contact": int(float(property_info.get('Contact', 0))), | |
"TotalSquareFeet": int(float(property_info.get('TotalSquareFeet', 0))), | |
"IsDeleted": bool(property_info.get('IsDeleted', False)), | |
"Beds": int(float(property_info.get('Beds', 0))), | |
"Baths": int(float(property_info.get('Baths', 0))), | |
"AgentName": property_info.get('AgentName', 'N/A'), | |
"AgentPhoneNumber": property_info.get('AgentPhoneNumber', 'N/A'), | |
"AgentEmail": property_info.get('AgentEmail', 'N/A'), | |
"KeyFeatures": property_info.get('KeyFeatures', 'N/A'), | |
"NearbyAmenities": property_info.get('NearbyAmenities', 'N/A'), | |
"Property Image": result['image_url'], | |
"Distance": float(result['distance']) | |
} | |
formatted_results.append(formatted_result) | |
conversation_context[session_id] = formatted_results | |
else: | |
formatted_results = conversation_context[session_id] | |
print(f"Returning {len(formatted_results)} search results") | |
return jsonify(formatted_results) | |
except Exception as e: | |
logging.error(f"Error in search endpoint: {str(e)}") | |
return jsonify({"error": f"An error occurred: {str(e)}"}), 500 | |
def transcribe(): | |
if 'audio' not in request.files: | |
return jsonify({"error": "No audio file provided"}), 400 | |
audio_file = request.files['audio'] | |
# Ensure the file has an allowed extension | |
allowed_extensions = {'wav', 'mp3', 'ogg', 'webm'} | |
if '.' not in audio_file.filename or \ | |
audio_file.filename.rsplit('.', 1)[1].lower() not in allowed_extensions: | |
return jsonify({"error": "Invalid audio file format"}), 400 | |
try: | |
# Save the uploaded file temporarily | |
temp_dir = os.path.join(os.getcwd(), 'temp') | |
os.makedirs(temp_dir, exist_ok=True) | |
temp_path = os.path.join(temp_dir, 'temp_audio.' + audio_file.filename.rsplit('.', 1)[1].lower()) | |
audio_file.save(temp_path) | |
# Convert audio to proper format if needed | |
audio = AudioSegment.from_file(temp_path) | |
audio = audio.set_channels(1) # Convert to mono | |
audio = audio.set_frame_rate(16000) # Set sample rate to 16kHz | |
# Save as WAV for speech recognition | |
wav_path = os.path.join(temp_dir, 'temp_audio.wav') | |
audio.export(wav_path, format="wav") | |
# Perform speech recognition | |
recognizer = sr.Recognizer() | |
with sr.AudioFile(wav_path) as source: | |
audio_data = recognizer.record(source) | |
text = recognizer.recognize_google(audio_data) | |
# Clean up temporary files | |
os.remove(temp_path) | |
os.remove(wav_path) | |
# Grammar correction | |
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction") | |
settings = TTSettings(do_sample=True, top_k=50, temperature=0.7) | |
corrected_text = happy_tt.generate_text(f"grammar: {text}", args=settings) | |
print(f"Original Transcription: {text}") | |
print(f"Corrected Transcription: {corrected_text.text}") | |
return jsonify({ | |
"transcription": corrected_text.text, | |
"original": text | |
}) | |
except sr.UnknownValueError: | |
return jsonify({"error": "Could not understand audio"}), 400 | |
except sr.RequestError as e: | |
return jsonify({"error": f"Google Speech Recognition error: {str(e)}"}), 500 | |
except Exception as e: | |
logging.error(f"Error processing audio: {str(e)}") | |
return jsonify({"error": f"Audio processing error: {str(e)}"}), 500 | |
finally: | |
# Ensure temp files are cleaned up even if an error occurs | |
if 'temp_path' in locals() and os.path.exists(temp_path): | |
os.remove(temp_path) | |
if 'wav_path' in locals() and os.path.exists(wav_path): | |
os.remove(wav_path) | |
def generate(): | |
data = request.json | |
query = data.get('query') | |
session_id = data.get('session_id') | |
continue_conversation = data.get('continue', False) | |
if not query: | |
return jsonify({"error": "Query parameter is missing"}), 400 | |
if session_id in conversation_context and continue_conversation: | |
previous_results = conversation_context[session_id] | |
combined_query = f"Based on previous results:{previous_results}New Query: {query}" | |
response, duration = generate_response(combined_query) | |
else: | |
response, duration = generate_response(query) | |
conversation_context[session_id] = response | |
print(f"Generated response: {response}") | |
print(f"Time taken to generate response: {duration:.2f} seconds\n") | |
return jsonify({"response": response, "duration": duration}) | |
def recommend(): | |
data = request.json | |
query = data.get('query') | |
session_id = data.get('session_id') | |
continue_conversation = data.get('continue', False) | |
if not query: | |
return jsonify({"error": "Query parameter is missing"}), 400 | |
if query.lower() == 'hi': | |
return jsonify({"response": "Do you want to know the properties located near you? (yes/no):"}) | |
if query.lower() == 'yes': | |
if session_id in conversation_context and 'location' in conversation_context[session_id]: | |
latitude, longitude = conversation_context[session_id]['location'] | |
else: | |
return jsonify({"error": "Location not available. Please try again."}), 400 | |
my_location = (latitude, longitude) | |
# Filter out rows with invalid coordinates before calculating distances | |
valid_properties = df[ | |
df['Latitude'].apply(lambda x: isinstance(x, (int, float)) or (isinstance(x, str) and x.replace('.', '').isdigit())) & | |
df['Longitude'].apply(lambda x: isinstance(x, (int, float)) or (isinstance(x, str) and x.replace('.', '').isdigit())) | |
].copy() | |
# Convert coordinates to float | |
valid_properties['Latitude'] = valid_properties['Latitude'].astype(float) | |
valid_properties['Longitude'] = valid_properties['Longitude'].astype(float) | |
# Calculate distances for valid properties | |
valid_properties['Distance'] = valid_properties.apply( | |
lambda row: geodesic(my_location, (row['Latitude'], row['Longitude'])).miles, | |
axis=1 | |
) | |
# Get 5 nearest properties | |
nearest_properties = valid_properties.nsmallest(5, 'Distance') | |
nearest_properties_list = nearest_properties[[ | |
'PropertyName', 'Address', 'City', 'Distance', | |
'PropertyType', 'AgentPhoneNumber' | |
]].to_dict(orient='records') | |
if not nearest_properties_list: | |
return jsonify({"response": "No valid properties found near your location."}) | |
return jsonify({ | |
"response": "Here are the 5 nearest properties to your location:", | |
"properties": nearest_properties_list | |
}) | |
if session_id in conversation_context and continue_conversation: | |
previous_results = conversation_context[session_id] | |
combined_query = f"Based on previous results:{previous_results}New Query: {query}" | |
response, duration = combined_model_response(combined_query, retriever) | |
else: | |
response, duration = combined_model_response(query, retriever) | |
conversation_context[session_id] = response | |
print(f"Recommended response: {response}") | |
print(f"Time taken to generate recommended response: {duration:.2f} seconds\n") | |
return jsonify({"response": response, "duration": duration}) | |
def set_location(): | |
data = request.json | |
latitude = data.get('latitude') | |
longitude = data.get('longitude') | |
session_id = data.get('session_id') | |
if latitude is None or longitude is None: | |
return jsonify({"error": "Location parameters are missing"}), 400 | |
try: | |
# Initialize the geolocator | |
geolocator = Nominatim(user_agent="hive_prop") | |
# Get location details from coordinates | |
location = geolocator.reverse(f"{latitude}, {longitude}", language='en') | |
if location and location.raw.get('address'): | |
address = location.raw['address'] | |
city = address.get('city') or address.get('town') or address.get('suburb') or address.get('county') | |
state = address.get('state') | |
country = address.get('country') | |
# Store location data in conversation context | |
conversation_context[session_id] = { | |
'location': (latitude, longitude), | |
'city': city, | |
'state': state, | |
'country': country | |
} | |
return jsonify({ | |
"message": "Location set successfully.", | |
"city": city, | |
"state": state, | |
"country": country | |
}) | |
else: | |
return jsonify({"error": "Could not determine city from coordinates"}), 400 | |
except Exception as e: | |
logging.error(f"Error getting location details: {str(e)}") | |
return jsonify({"error": f"Error processing location: {str(e)}"}), 500 | |
if __name__ == '__main__': | |
# Remove ngrok configuration | |
# public_url = ngrok.connect(5000) | |
# print(f' * ngrok tunnel "http://127.0.0.1:5000" -> "{public_url}"') | |
# Update to use port 7860 (standard for Spaces) | |
app.run(host='0.0.0.0', port=7860) | |