Raven-with-Voice-Cloning-1.0

Runtime error

App Files Files Community

Kevin676 commited on Apr 9, 2023

Commit

8c5e652

0 Parent(s):

Duplicate from Kevin676/Raven-with-Voice-Cloning

Browse files

Files changed (6) hide show

.gitattributes +34 -0
20B_tokenizer.json +0 -0
README.md +14 -0
app.py +145 -0
packages.txt +1 -0
requirements.txt +8 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

20B_tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Raven RWKV 7B
+emoji: 🚀
+colorFrom: blue
+colorTo: green
+sdk: gradio
+sdk_version: 3.23.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+duplicated_from: Kevin676/Raven-with-Voice-Cloning
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import gradio as gr
+import os, gc, torch
+from datetime import datetime
+from huggingface_hub import hf_hub_download
+from pynvml import *
+nvmlInit()
+gpu_h = nvmlDeviceGetHandleByIndex(0)
+ctx_limit = 1024
+import whisper
+model1 = whisper.load_model("small")
+title1 = "RWKV-4-Raven-7B-v8-Eng-20230408-ctx4096"
+os.environ["RWKV_JIT_ON"] = '1'
+os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
+#from TTS.api import TTS
+#tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
+from rwkv.model import RWKV
+model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-raven", filename=f"{title1}.pth")
+model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
+from rwkv.utils import PIPELINE, PIPELINE_ARGS
+pipeline = PIPELINE(model, "20B_tokenizer.json")
+def generate_prompt(instruction, input=None):
+    if input:
+        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+# Instruction:
+{instruction}
+# Input:
+{input}
+# Response:
+"""
+    else:
+        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
+# Instruction:
+{instruction}
+# Response:
+"""
+def evaluate(
+#    instruction,
+    audio,
+#    upload,
+    input=None,
+    token_count=200,
+    temperature=1.0,
+    top_p=0.7,
+    presencePenalty = 0.1,
+    countPenalty = 0.1,
+):
+    res = []
+    # load audio and pad/trim it to fit 30 seconds
+    audio = whisper.load_audio(audio)
+    audio = whisper.pad_or_trim(audio)
+    # make log-Mel spectrogram and move to the same device as the model1
+    mel = whisper.log_mel_spectrogram(audio).to(model1.device)
+    # detect the spoken language
+    _, probs = model1.detect_language(mel)
+    print(f"Detected language: {max(probs, key=probs.get)}")
+    # decode the audio
+    options = whisper.DecodingOptions()
+    result = whisper.decode(model1, mel, options)
+    args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
+                     alpha_frequency = countPenalty,
+                     alpha_presence = presencePenalty,
+                     token_ban = [], # ban the generation of some tokens
+                     token_stop = [0]) # stop generation whenever you see any token here
+    instruction = result.text.strip()
+    input = input.strip()
+    ctx = generate_prompt(instruction, input)
+    gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
+    print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
+    all_tokens = []
+    out_last = 0
+    out_str = ''
+    occurrence = {}
+    state = None
+    for i in range(int(token_count)):
+        out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
+        for n in occurrence:
+            out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
+        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
+        if token in args.token_stop:
+            break
+        all_tokens += [token]
+        if token not in occurrence:
+            occurrence[token] = 1
+        else:
+            occurrence[token] += 1
+        tmp = pipeline.decode(all_tokens[out_last:])
+        if '\ufffd' not in tmp:
+            out_str += tmp
+            yield out_str.strip()
+            out_last = i + 1
+    gc.collect()
+    torch.cuda.empty_cache()
+    res.append(out_str.strip())
+#    res1 = ' '.join(str(x) for x in res)
+#    tts.tts_to_file(res1, speaker_wav = upload, language="en", file_path="output.wav")
+    return res
+#    return [result.text, res]
+#    yield out_str.strip()
+g = gr.Interface(
+    fn=evaluate,
+    inputs=[
+#        gr.components.Textbox(lines=2, label="Instruction", value="Tell me about ravens."),
+        gr.Audio(source="microphone", label = "请开始对话吧！", type="filepath"),
+#        gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)", type="filepath"),
+        gr.components.Textbox(lines=2, label="Input", placeholder="none"),
+        gr.components.Slider(minimum=10, maximum=200, step=10, value=150), # token_count
+        gr.components.Slider(minimum=0.2, maximum=2.0, step=0.1, value=1.0), # temperature
+        gr.components.Slider(minimum=0, maximum=1, step=0.05, value=0.5), # top_p
+        gr.components.Slider(0.0, 1.0, step=0.1, value=0.4),  # presencePenalty
+        gr.components.Slider(0.0, 1.0, step=0.1, value=0.4),  # countPenalty
+    ],
+    outputs=[
+        gr.inputs.Textbox(
+            lines=5,
+            label="Raven Output",
+        ),
+#        gr.Audio(label="Audio with Custom Voice"),
+    ],
+    title="🥳💬💕 - TalktoAI，随时随地，谈天说地！",
+    description="🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！",
+    article = "Powered by the RWKV Language Model"
+)
+g.queue(concurrency_count=1, max_size=10)
+g.launch(show_error=True)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ libsndfile1

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+ninja
+tokenizers
+rwkv==0.7.3
+pynvml
+huggingface_hub
+gradio>=3.17.1
+git+https://github.com/openai/whisper.git