import os import json import time from speech_recognition import Recognizer, Microphone, AudioData, UnknownValueError, RequestError from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer from huggingface_hub import login from product_recommender import ProductRecommender from objection_handler import load_objections, check_objections from objection_handler import ObjectionHandler from env_setup import config from sentence_transformers import SentenceTransformer from dotenv import load_dotenv # Load environment variables load_dotenv() # Hugging Face API setup huggingface_api_key = config["huggingface_api_key"] login(token=huggingface_api_key) # Sentiment Analysis Model model_name = "tabularisai/multilingual-sentiment-analysis" model = AutoModelForSequenceClassification.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) # Speech Recognition Setup recognizer = Recognizer() # Function to analyze sentiment def preprocess_text(text): """Preprocess text for better sentiment analysis.""" return text.strip().lower() def analyze_sentiment(text): """Analyze sentiment of the text using Hugging Face model.""" try: if not text.strip(): return "NEUTRAL", 0.0 processed_text = preprocess_text(text) result = sentiment_analyzer(processed_text)[0] print(f"Sentiment Analysis Result: {result}") # Map raw labels to sentiments sentiment_map = { 'Very Negative': "NEGATIVE", 'Negative': "NEGATIVE", 'Neutral': "NEUTRAL", 'Positive': "POSITIVE", 'Very Positive': "POSITIVE" } sentiment = sentiment_map.get(result['label'], "NEUTRAL") return sentiment, result['score'] except Exception as e: print(f"Error in sentiment analysis: {e}") return "NEUTRAL", 0.5 def transcribe_with_chunks(objections_dict): print("Note: If microphone access fails, please use alternative input.") chunks = [] current_chunk = [] chunk_start_time = time.time() is_listening = False try: # Try to list available microphones available_mics = Microphone.list_microphone_names() print(f"Available microphones: {available_mics}") except Exception as e: print(f"Could not detect microphones: {e}") # Replace hardcoded path with environment variable or relative path objection_file_path = config.get("OBJECTION_DATA_PATH", "objections.csv") product_file_path = config.get("PRODUCT_DATA_PATH", "recommendations.csv") # Initialize handlers with semantic search capabilities objection_handler = ObjectionHandler(objection_file_path) product_recommender = ProductRecommender(product_file_path) # Load the embeddings model once model = SentenceTransformer('all-MiniLM-L6-v2') try: # Try multiple device indices mic = None for device_index in range(10): # Try first 10 device indices try: mic = Microphone(device_index=device_index) print(f"Using microphone at device index {device_index}") break except Exception: continue if mic is None: print("No microphone available. Please provide text input.") return [] with mic as source: recognizer.adjust_for_ambient_noise(source) print("Microphone calibrated. Please speak.") while True: print("Listening for speech...") try: audio_data = recognizer.listen(source, timeout=5) text = recognizer.recognize_google(audio_data) if "start listening" in text.lower(): is_listening = True print("Listening started. Speak into the microphone.") continue elif "stop listening" in text.lower(): is_listening = False print("Listening stopped.") if current_chunk: chunk_text = " ".join(current_chunk) sentiment, score = analyze_sentiment(chunk_text) chunks.append((chunk_text, sentiment, score)) current_chunk = [] continue if is_listening and text.strip(): print(f"Transcription: {text}") current_chunk.append(text) if time.time() - chunk_start_time > 3: if current_chunk: chunk_text = " ".join(current_chunk) # Always process sentiment sentiment, score = analyze_sentiment(chunk_text) chunks.append((chunk_text, sentiment, score)) # Get objection responses and check similarity score query_embedding = model.encode([chunk_text]) distances, indices = objection_handler.index.search(query_embedding, 1) # If similarity is high enough, show objection response if distances[0][0] < 1.5: # Threshold for similarity responses = objection_handler.handle_objection(chunk_text) if responses: print("\nSuggested Response:") for response in responses: print(f"→ {response}") # Get product recommendations and check similarity score distances, indices = product_recommender.index.search(query_embedding, 1) # If similarity is high enough, show recommendations if distances[0][0] < 1.5: # Threshold for similarity recommendations = product_recommender.get_recommendations(chunk_text) if recommendations: print(f"\nRecommendations for this response:") for idx, rec in enumerate(recommendations, 1): print(f"{idx}. {rec}") print("\n") current_chunk = [] chunk_start_time = time.time() except UnknownValueError: print("Could not understand the audio.") except RequestError as e: print(f"Could not request results from Google Speech Recognition service; {e}") except KeyboardInterrupt: print("\nExiting...") return chunks if __name__ == "__main__": objections_file_path = config.get("OBJECTION_DATA_PATH", "objections.csv") objections_dict = load_objections(objections_file_path) transcribed_chunks = transcribe_with_chunks(objections_dict) print("Final transcriptions and sentiments:", transcribed_chunks)