Spaces:
Runtime error
Runtime error
File size: 3,267 Bytes
95395b5 3460fd7 cbcbc7e 44e69f7 6e54df2 95395b5 cbcbc7e 50e3e8e cbcbc7e 95395b5 6e54df2 cbcbc7e 6e54df2 cbcbc7e 6150e28 cbcbc7e 6e54df2 cbcbc7e 6150e28 95395b5 cbcbc7e 95395b5 cbcbc7e 95395b5 cbcbc7e 95395b5 4c352f8 95395b5 6e54df2 cbcbc7e 95395b5 6e54df2 cbcbc7e 95395b5 4c352f8 50e3e8e cbcbc7e 6e54df2 cbcbc7e 4c352f8 50e3e8e cbcbc7e 50e3e8e cbcbc7e 50e3e8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from TTS.api import TTS
import os
import subprocess
# Force CPU usage
device = "cpu"
# Load TTS Model (Running on CPU)
tts_model = TTS("tts_models/en/ljspeech/tacotron2-DDC", gpu=False) # ✅ Ensures CPU-only execution
# Hugging Face LLM Client (DeepSeek R1 7B)
client = InferenceClient("deepseek-ai/deepseek-r1-7b")
# RVC Model Paths
RVC_MODEL_PATH = "zeldabotw.pth"
RVC_INDEX_PATH = "zeldabotw.index"
# Function to call RVC for voice conversion (CPU Mode)
def convert_voice(input_wav, output_wav):
"""Converts input TTS audio to ZeldaBotW voice using RVC (CPU Mode)."""
if not os.path.exists(RVC_MODEL_PATH) or not os.path.exists(RVC_INDEX_PATH):
raise FileNotFoundError("RVC model files not found! Ensure zeldabotw.pth and zeldabotw.index are in the same directory.")
command = f"python infer_rvc.py --input {input_wav} --output {output_wav} --model {RVC_MODEL_PATH} --index {RVC_INDEX_PATH} --pitch_shift 0 --device cpu"
process = subprocess.run(command, shell=True, capture_output=True, text=True)
if process.returncode != 0:
print("RVC conversion failed:", process.stderr)
return None
return output_wav
# Chatbot Response + TTS + RVC
def respond(message, history, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]: messages.append({"role": "user", "content": val[0]})
if val[1]: messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
# Get LLM Response
for message in client.chat_completion(messages, max_tokens=max_tokens, stream=False, temperature=temperature, top_p=top_p):
response += message.choices[0].message.content
# Generate Speech from Text (CPU Mode)
tts_audio_path = "tts_output.wav"
tts_model.tts_to_file(text=response, file_path=tts_audio_path)
# Convert TTS output to ZeldaBotW voice (CPU Mode)
rvc_audio_path = "rvc_output.wav"
rvc_converted_path = convert_voice(tts_audio_path, rvc_audio_path)
return response, tts_audio_path, rvc_converted_path # ✅ Now correctly returns all outputs
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## DeepSeek R1 7B Chatbot with ZeldaBotW Voice (CPU Mode)")
chatbot = gr.Chatbot(type="messages") # ✅ Fix deprecated type warning
msg = gr.Textbox(label="User Input")
system_msg = gr.Textbox(value="You are a friendly Chatbot.", label="System Message")
max_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max Tokens")
temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)")
tts_audio = gr.Audio(type="filepath", label="TTS Output")
rvc_audio = gr.Audio(type="filepath", label="RVC ZeldaBotW Voice")
def chat_fn(message, history):
return respond(message, history, system_msg.value, max_tokens.value, temperature.value, top_p.value)
msg.submit(chat_fn, inputs=[msg, chatbot], outputs=[chatbot, tts_audio, rvc_audio])
demo.launch()
|