import streamlit as st import datetime from transformers import pipeline import gradio as gr import tempfile from typing import Optional import numpy as np from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer import os import csv import huggingface_hub from huggingface_hub import Repository, hf_hub_download, upload_file from datetime import datetime # 🌟 Setup dataset repo 🌟 # Created new dataset as awacke1/MindfulStory.csv DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv" DATASET_REPO_ID = "awacke1/MindfulStory.csv" DATA_FILENAME = "MindfulStory.csv" DATA_FILE = os.path.join("data", DATA_FILENAME) HF_TOKEN = os.environ.get("HF_TOKEN") # πŸ˜… Oops! Try downloading the dataset (We hope it works!) try: hf_hub_download( repo_id=DATASET_REPO_ID, filename=DATA_FILENAME, cache_dir="data", force_filename=DATA_FILENAME ) except: print("😬 File not found, we’ll act like it’s not a problem...") # 🧠 AI Memory: Because forgetting is for humans πŸ€– def AIMemory(name: str, message: str): if name and message: with open(DATA_FILE, "a") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"]) writer.writerow({"name": name, "message": message, "time": str(datetime.now())}) commit_url = repo.push_to_hub() return {"name": name, "message": message, "time": str(datetime.now())} # 🌍 Repository setup! Let’s clone like pros πŸ‘¨β€πŸ’» repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN) # πŸ—£οΈ Set up Speech Recognition asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h") # 🎀 Set up TTS Models. Let’s find that sweet robotic voice! MODEL_NAMES = [ "en/ljspeech/tacotron2-DDC", "en/ljspeech/glow-tts", "en/ljspeech/speedy-speech-wn", "en/ljspeech/vits", "en/sam/tacotron-DDC", "fr/mai/tacotron2-DDC", "de/thorsten/tacotron2-DCA", ] # πŸ› οΈ Use Model Manager to load vocoders (Fancy tech magic here) MODELS = {} manager = ModelManager() for MODEL_NAME in MODEL_NAMES: print(f"πŸš€ Downloading {MODEL_NAME}... because waiting is fun!") model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}") vocoder_name: Optional[str] = model_item["default_vocoder"] vocoder_path = None vocoder_config_path = None if vocoder_name is not None: vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) synthesizer = Synthesizer( model_path, config_path, None, vocoder_path, vocoder_config_path, ) MODELS[MODEL_NAME] = synthesizer # πŸ§™β€β™‚οΈ Transcribe function: Turning audio into text with a sprinkle of magic! def transcribe(audio): text = asr(audio)["text"] return text # πŸ“Š Text classifier (because we love labeling things, right?) classifier = pipeline("text-classification") # 🎀 Speech to Text: Give me your voice, I’ll give you text! def speech_to_text(speech): text = asr(speech)["text"] return text # 😎 Sentiment Analysis (because even robots care about feelings πŸ’”) def text_to_sentiment(text): sentiment = classifier(text)[0]["label"] return sentiment # πŸ“¦ Saving it for later: Store this priceless info! def upsert(text): date_time = str(datetime.datetime.today()) doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time) doc_ref.set({ u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', u'last': text, u'born': date_time, }) saved = select('TTS-STT', date_time) return saved # πŸ” Retrieve all records: Gotta catch β€˜em all! def selectall(text): docs = db.collection('Text2SpeechSentimentSave').stream() doclist = '' for doc in docs: r = (f'{doc.id} => {doc.to_dict()}') doclist += r return doclist # πŸ—£οΈ Text to Speech (Because speaking is fun, but robots do it better) def tts(text: str, model_name: str): print(text, model_name) synthesizer = MODELS.get(model_name, None) if synthesizer is None: raise NameError("😬 Oops! Model not found.") wavs = synthesizer.tts(text) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: synthesizer.save_wav(wavs, fp) return fp.name # πŸŽ›οΈ Gradio UI with Emoji and Fun Comments πŸŽ‰ demo = gr.Blocks() with demo: # 🎀 Microphone input to capture your golden voice 🎀 audio_file = gr.Audio(source="microphone", type="filepath") # πŸ“œ Textbox to display transcribed text πŸ“œ text = gr.Textbox(label="Speech to Text") # πŸŽ™οΈ Radio input to choose the best Text to Speech model πŸŽ™οΈ TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES) # πŸ”Š Audio player to play back the robot’s voice πŸ”Š audio = gr.Audio(label="Output", interactive=False) # πŸŽ‰ Buttons for all your needs πŸŽ‰ b1 = gr.Button("🎀 Recognize Speech") b5 = gr.Button("πŸ”Š Read It Back Aloud") # πŸ–±οΈ Click buttons to perform actions! πŸ–±οΈ b1.click(speech_to_text, inputs=audio_file, outputs=text) b5.click(tts, inputs=[text, TTSchoice], outputs=audio) demo.launch(share=True)