Spaces:
Runtime error
Runtime error
File size: 3,180 Bytes
95395b5 3460fd7 cbcbc7e 44e69f7 50e3e8e cbcbc7e 95395b5 cbcbc7e 50e3e8e cbcbc7e 95395b5 cbcbc7e 95395b5 cbcbc7e 95395b5 cbcbc7e 95395b5 cbcbc7e 95395b5 cbcbc7e 95395b5 cbcbc7e 95395b5 cbcbc7e 95395b5 cbcbc7e 95395b5 cbcbc7e 95395b5 cbcbc7e 50e3e8e cbcbc7e 50e3e8e cbcbc7e 50e3e8e cbcbc7e 50e3e8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from TTS.api import TTS
import os
import subprocess
# Load TTS Model
device = "cuda" if torch.cuda.is_available() else "cpu"
tts_model = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(device)
# Hugging Face LLM Client (DeepSeek R1 7B)
client = InferenceClient("deepseek-ai/deepseek-r1-7b")
# RVC Model Paths
RVC_MODEL_PATH = "zeldabotw.pth"
RVC_INDEX_PATH = "zeldabotw.index"
# Function to call RVC for voice conversion
def convert_voice(input_wav, output_wav):
"""Converts the input TTS audio to ZeldaBotW voice using RVC."""
if not os.path.exists(RVC_MODEL_PATH) or not os.path.exists(RVC_INDEX_PATH):
raise FileNotFoundError("RVC model files not found: Ensure zeldabotw.pth and zeldabotw.index are in the same directory.")
command = f"python infer_rvc.py --input {input_wav} --output {output_wav} --model {RVC_MODEL_PATH} --index {RVC_INDEX_PATH} --pitch_shift 0"
process = subprocess.run(command, shell=True, capture_output=True, text=True)
if process.returncode != 0:
print("RVC conversion failed:", process.stderr)
return None
return output_wav
# Chatbot Response + TTS + RVC
def respond(
message, history, system_message, max_tokens, temperature, top_p
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]: messages.append({"role": "user", "content": val[0]})
if val[1]: messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p
):
token = message.choices[0].delta.content
response += token
yield response, None, None # Text first
# Generate Speech from Text
tts_audio_path = "tts_output.wav"
tts_model.tts_to_file(text=response, file_path=tts_audio_path)
# Convert TTS output to ZeldaBotW voice
rvc_audio_path = "rvc_output.wav"
rvc_converted_path = convert_voice(tts_audio_path, rvc_audio_path)
yield response, tts_audio_path, rvc_converted_path # Send text, TTS, and RVC output
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## DeepSeek R1 7B Chatbot with ZeldaBotW Voice")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="User Input")
system_msg = gr.Textbox(value="You are a friendly Chatbot.", label="System Message")
max_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max Tokens")
temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)")
tts_audio = gr.Audio(type="filepath", label="TTS Output")
rvc_audio = gr.Audio(type="filepath", label="RVC ZeldaBotW Voice")
def chat_fn(message, history):
return respond(message, history, system_msg.value, max_tokens.value, temperature.value, top_p.value)
msg.submit(chat_fn, inputs=[msg, chatbot], outputs=[chatbot, tts_audio, rvc_audio])
demo.launch()
|