Spaces:
Build error
Build error
| import gradio as gr | |
| import random | |
| import torch | |
| from transformers import AutoConfig, AutoTokenizer, AutoModelWithLMHead | |
| from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
| from itertools import chain | |
| import os | |
| import librosa | |
| import tempfile | |
| from typing import Optional | |
| import numpy as np | |
| import wave | |
| from huggingface_hub import hf_hub_download | |
| from stt import Model | |
| #### STT ########### | |
| ########### STT English ############## | |
| state = gr.Variable() | |
| REPO_ID = "mbarnig/lb-de-fr-en-pt-coqui-stt-models" | |
| my_title = "STT-ChatGPT-TTS with Coqui" | |
| my_description = "TODO add description and reference: STT base from mbarnig/lb-de-fr-en-pt-coqui-stt-models - 🐸 [Coqui.ai](https://https://coqui.ai/)." | |
| STT_LANGUAGES = [ | |
| "English", | |
| ] | |
| EXAMPLES = [ | |
| ["examples/english.wav", "English", True, "Linda", "every window and roof which could command a view of the horrible performance was occupied"], | |
| ] | |
| def stt_record(audio_record_buffer): | |
| #using english model, it is here to reduce memory usage, will trigger download first run | |
| #unfortunately will be slow as it is shared cpu/memory need to free memory after run | |
| acoustic_model = Model(hf_hub_download(repo_id = REPO_ID, filename = "english/model.tflite")) | |
| scorer_path = hf_hub_download(repo_id = REPO_ID, filename = "english/huge-vocabulary.scorer") | |
| if type(audio_record_buffer)!=tuple: | |
| y, sr = librosa.load(audio_record_buffer) | |
| else: | |
| sr, y = audio_record_buffer | |
| y = librosa.resample(y, orig_sr=sr, target_sr=16000).astype("int16") | |
| scorer = True # use scorer | |
| if scorer: | |
| acoustic_model.enableExternalScorer(scorer_path) | |
| result = acoustic_model.stt(y) | |
| else: | |
| acoustic_model.disableExternalScorer() | |
| result = acoustic_model.stt(y) | |
| print("STT:",result) | |
| return result | |
| #emotion_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion") | |
| #emotion_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-emotion") | |
| def get_emotion(text): | |
| input_ids = tokenizer.encode(text + '</s>', return_tensors='pt') | |
| output = model.generate(input_ids=input_ids,max_length=2) | |
| dec = [tokenizer.decode(ids) for ids in output] | |
| label = dec[0] | |
| return label.split()[1] | |
| config = AutoConfig.from_pretrained('gorkemgoknar/gpt2chatbotenglish') | |
| model = GPT2LMHeadModel.from_pretrained('gorkemgoknar/gpt2chatbotenglish', config=config) | |
| tokenizer = GPT2Tokenizer.from_pretrained('gorkemgoknar/gpt2chatbotenglish') | |
| tokenizer.model_max_length = 1024 | |
| #Dynamic Temperature | |
| #See experiment https://www.linkedin.com/pulse/ai-goes-job-interview-g%25C3%25B6rkem-g%25C3%25B6knar | |
| base_temperature = 1.2 | |
| dynamic_temperature_range = 0.15 | |
| rand_range = random.uniform(-1 * dynamic_temperature_range , dynamic_temperature_range ) | |
| temperature = base_temperature + rand_range | |
| SPECIAL_TOKENS = ["<bos>", "<eos>", "<speaker1>", "<speaker2>", "<pad>"] | |
| #See document for experiment https://www.linkedin.com/pulse/ai-goes-job-interview-g%C3%B6rkem-g%C3%B6knar/ | |
| def get_chat_response(name,history=[], input_txt = "Hello , what is your name?"): | |
| ai_history = history.copy() | |
| #ai_history.append(input_txt) | |
| ai_history_e = [tokenizer.encode(e) for e in ai_history] | |
| personality = "My name is " + name | |
| bos, eos, speaker1, speaker2 = tokenizer.convert_tokens_to_ids(SPECIAL_TOKENS[:-1]) | |
| #persona first, history next, input text must be at the end | |
| #[[bos, persona] , [history] , [input]] | |
| sequence = [[bos] + tokenizer.encode(personality)] + ai_history_e + [tokenizer.encode(input_txt)] | |
| ##[[bos, persona] , [speaker1 .., speakser2 .., speaker1 ... speaker2 ... , [input]] | |
| sequence = [sequence[0]] + [[speaker2 if (len(sequence)-i) % 2 else speaker1] + s for i, s in enumerate(sequence[1:])] | |
| sequence = list(chain(*sequence)) | |
| #bot_input_ids = tokenizer.encode(personality + tokenizer.eos_token + input_txt + tokenizer.eos_token , return_tensors='pt') | |
| sequence_len = len(sequence) | |
| #optimum response and speed | |
| chat_history_ids = model.generate( | |
| torch.tensor(sequence).unsqueeze(0), max_length=50, | |
| pad_token_id=tokenizer.eos_token_id, | |
| no_repeat_ngram_size=3, | |
| do_sample=True, | |
| top_k=60, | |
| top_p=0.8, | |
| temperature = 1.3 | |
| ) | |
| out_str = tokenizer.decode(chat_history_ids[0][sequence_len:], skip_special_tokens=True) | |
| #out_str = tokenizer.decode(chat_history_ids[:, sequence.shape[-1]:][0], skip_special_tokens=False) | |
| return out_str | |
| ##you can use anyone from below | |
| ''' | |
| | Macleod | Moran | Brenda | Ramirez | Peter Parker | Quentin Beck | Andy | |
| | Red | Norton | Willard | Chief | Chef | Kilgore | Kurtz | Westley | Buttercup | |
| | Vizzini | Fezzik | Inigo | Man In Black | Taylor | Zira | Zaius | Cornelius | |
| | Bud | Lindsey | Hippy | Erin | Ed | George | Donna | Trinity | Agent Smith | |
| | Morpheus | Neo | Tank | Meryl | Truman | Marlon | Christof | Stromboli | Bumstead | |
| | Schreber | Walker | Korben | Cornelius | Loc Rhod | Anakin | Obi-Wan | Palpatine | |
| | Padme | Superman | Luthor | Dude | Walter | Donny | Maude | General | Starkiller | |
| | Indiana | Willie | Short Round | John | Sarah | Terminator | Miller | Sarge | Reiben | |
| | Jackson | Upham | Chuckie | Will | Lambeau | Sean | Skylar | Saavik | Spock | |
| | Kirk | Bones | Khan | Kirk | Spock | Sybok | Scotty | Bourne | Pamela | Abbott | |
| | Nicky | Marshall | Korshunov | Troy | Vig | Archie Gates | Doc | Interrogator | |
| | Ellie | Ted | Peter | Drumlin | Joss | Macready | Childs | Nicholas | Conrad | |
| | Feingold | Christine | Adam | Barbara | Delia | Lydia | Cathy | Charles | Otho | |
| | Schaefer | Han | Luke | Leia | Threepio | Vader | Yoda | Lando | Elaine | Striker | |
| | Dr. Rumack | Kramer | David | Saavik | Kirk | Kruge | Holden | Deckard | Rachael | |
| | Batty | Sebastian | Sam | Frodo | Pippin | Gandalf | Kay | Edwards | Laurel | |
| | Edgar | Zed | Jay | Malloy | Plissken | Steve Rogers | Tony Stark | Scott Lang | |
| | Bruce Banner | Bruce | Edward | Two-Face | Batman | Chase | Alfred | Dick | |
| | Riddler | Din Djarin | Greef Karga | Kuiil | Ig-11 | Cara Dune | Peli Motto | |
| | Toro Calican | Ripley | Meredith | Dickie | Marge | Peter | Lambert | Kane | |
| | Dallas | Ripley | Ash | Parker | Threepio | Luke | Leia | Ben | Han | Common Bob | |
| | Common Alice | Jack | Tyler | Marla | Dana | Stantz | Venkman | Spengler | Louis | |
| | Fry | Johns | Riddick | Kirk | Decker | Spock | "Ilia | Indy | Belloq | Marion | |
| | Brother | Allnut | Rose | Qui-Gon | Jar Jar | |
| ''' | |
| MODEL_NAME= "tts_models/multilingual/multi-dataset/your_tts" | |
| def greet(character,your_voice,message,history): | |
| #gradios set_state/get_state had problems on embedded html! | |
| history = history or {"character": character, "message_history" : [] } | |
| #gradios set_state/get_state does not persist session for now using global | |
| #global history | |
| if history["character"] != character: | |
| #switching character | |
| history = {"character": character, "message_history" : [] } | |
| response = get_chat_response(character,history=history["message_history"],input_txt=message) | |
| os.system('tts --text "'+response+'" --model_name tts_models/multilingual/multi-dataset/your_tts --speaker_wav '+your_voice+' --language_idx "en"') | |
| history["message_history"].append((message, response)) | |
| #emotion = get_emotion(response) | |
| html = "<div class='chatbot'>" | |
| for user_msg, resp_msg in history["message_history"]: | |
| html += f"<div class='user_msg'>You: {user_msg}</div>" | |
| html += f"<div class='resp_msg'>{character}: {resp_msg}</div>" | |
| html += "</div>" | |
| return html,history,"tts_output.wav" | |
| def greet_stt_to_tts(character,your_voice,history): | |
| #gradios set_state/get_state had problems on embedded html! | |
| history = history or {"character": character, "message_history" : [] } | |
| #gradios set_state/get_state does not persist session for now using global | |
| #global history | |
| if history["character"] != character: | |
| #switching character | |
| history = {"character": character, "message_history" : [] } | |
| # speech -> text (Whisper) | |
| message = stt_record(your_voice) | |
| response = get_chat_response(character,history=history["message_history"],input_txt=message) | |
| print("Response:",response) | |
| if type(response) == tuple: | |
| # only get first | |
| response = response[0] | |
| print("Response only first:",response) | |
| os.system('tts --text "'+str(response)+'" --model_name tts_models/multilingual/multi-dataset/your_tts --speaker_wav '+your_voice+' --language_idx "en"') | |
| history["message_history"].append((message, response)) | |
| #emotion = get_emotion(response) | |
| html = "<div class='chatbot'>" | |
| for user_msg, resp_msg in history["message_history"]: | |
| html += f"<div class='user_msg'>You: {user_msg}</div>" | |
| html += f"<div class='resp_msg'>{character}: {resp_msg}</div>" | |
| html += "</div>" | |
| return html,history,"tts_output.wav" | |
| def greet_textonly(character,message,history): | |
| #gradios set_state/get_state had problems on embedded html! | |
| history = history or {"character": character, "message_history" : [] } | |
| #gradios set_state/get_state does not persist session for now using global | |
| #global history | |
| if history["character"] != character: | |
| #switching character | |
| history = {"character": character, "message_history" : [] } | |
| response = get_chat_response(character,history=history["message_history"],input_txt=message) | |
| history["message_history"].append((message, response)) | |
| #emotion = get_emotion(response) | |
| html = "<div class='chatbot'>" | |
| for user_msg, resp_msg in history["message_history"]: | |
| html += f"<div class='user_msg'>You: {user_msg}</div>" | |
| html += f"<div class='resp_msg'>{character}: {resp_msg}</div>" | |
| html += "</div>" | |
| return html,history | |
| personality_choices = ["Gandalf", "Riddick", "Macleod", "Morpheus", "Neo","Spock","Vader","Indy"] | |
| examples= ["Gandalf", "What is your name?"] | |
| css=""" | |
| .chatbox {display:flex;flex-direction:column} | |
| .user_msg, .resp_msg {padding:4px;margin-bottom:4px;border-radius:4px;width:80%} | |
| .user_msg {background-color:cornflowerblue;color:white;align-self:start} | |
| .resp_msg {background-color:lightgray;align-self:self-end} | |
| """ | |
| #some selected ones are in for demo use | |
| personality_choices = ["Gandalf", "Riddick", "Macleod", "Morpheus", "Neo","Spock","Vader","Indy", "Ig-11","Threepio","Tony Stark","Batman","Vizzini"] | |
| title = "Movie Chatbot with Coqui YourTTS" | |
| description = "Chat with your favorite movie characters, making characters voice like you. See Coqui Space for more TTS models https://huggingface.co/spaces/coqui/CoquiTTS" | |
| article = "STT base model from mbarnig/lb-de-fr-en-pt-coqui-stt-models - 🐸 [Coqui.ai](https://https://coqui.ai/)" | |
| #History not implemented in this demo, use metayazar.com/chatbot for a movie and character dropdown chat interface | |
| ##interface = gr.Interface(fn=greet, inputs=[gr.inputs.Dropdown(personality_choices) ,"text"], title=title, description=description, outputs="text") | |
| examples=[['Gandalf','dragon.wav','Who are you sir?',{}]] | |
| history = {"character": "None", "message_history" : [] } | |
| interface_full = gr.Interface(fn=greet_stt_to_tts, | |
| inputs=[gr.Dropdown(personality_choices), | |
| gr.Audio(source="microphone", type="filepath", label="Record Audio") , | |
| "state"], | |
| outputs=["html","state",gr.Audio(type="filepath")], | |
| css=css, title="Chat with Your Voice", description=description,article=article , | |
| live=False) | |
| interface_mic = gr.Interface(fn=greet, | |
| inputs=[gr.Dropdown(personality_choices), | |
| gr.Audio(source="microphone", type="filepath") , | |
| "text", | |
| "state"], | |
| outputs=["html","state",gr.Audio(type="filepath")], | |
| css=css, title="Chat with Your Voice", description=description,article=article ) | |
| interface_text = gr.Interface(fn=greet_textonly, | |
| inputs=[gr.Dropdown(personality_choices), | |
| "text", | |
| "state"], | |
| outputs=["html","state"], | |
| css=css, title="Chat Text Only", description=description,article=article) | |
| interface_file= gr.Interface(fn=greet, | |
| inputs=[gr.Dropdown(personality_choices), | |
| gr.Audio(type="filepath") , | |
| "text", | |
| "state"], | |
| outputs=["html","state",gr.Audio(type="filepath")], | |
| css=css, title="Chat with Uploaded file", description=description,article=article ) | |
| appinterface = gr.TabbedInterface([interface_mic,interface_full,interface_file, interface_text], ["Chat with Mic Record","Chat Speech -> Speech", "Chat with Audio Upload" , "Chat Text only"]) | |
| appinterface.launch() |