Spaces:
Runtime error
Runtime error
File size: 5,100 Bytes
8c5e652 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
import os, gc, torch
from datetime import datetime
from huggingface_hub import hf_hub_download
from pynvml import *
nvmlInit()
gpu_h = nvmlDeviceGetHandleByIndex(0)
ctx_limit = 1024
import whisper
model1 = whisper.load_model("small")
title1 = "RWKV-4-Raven-7B-v8-Eng-20230408-ctx4096"
os.environ["RWKV_JIT_ON"] = '1'
os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
#from TTS.api import TTS
#tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
from rwkv.model import RWKV
model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-raven", filename=f"{title1}.pth")
model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
from rwkv.utils import PIPELINE, PIPELINE_ARGS
pipeline = PIPELINE(model, "20B_tokenizer.json")
def generate_prompt(instruction, input=None):
if input:
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
# Instruction:
{instruction}
# Input:
{input}
# Response:
"""
else:
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
# Instruction:
{instruction}
# Response:
"""
def evaluate(
# instruction,
audio,
# upload,
input=None,
token_count=200,
temperature=1.0,
top_p=0.7,
presencePenalty = 0.1,
countPenalty = 0.1,
):
res = []
# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model1
mel = whisper.log_mel_spectrogram(audio).to(model1.device)
# detect the spoken language
_, probs = model1.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")
# decode the audio
options = whisper.DecodingOptions()
result = whisper.decode(model1, mel, options)
args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
alpha_frequency = countPenalty,
alpha_presence = presencePenalty,
token_ban = [], # ban the generation of some tokens
token_stop = [0]) # stop generation whenever you see any token here
instruction = result.text.strip()
input = input.strip()
ctx = generate_prompt(instruction, input)
gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
all_tokens = []
out_last = 0
out_str = ''
occurrence = {}
state = None
for i in range(int(token_count)):
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
for n in occurrence:
out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
if token in args.token_stop:
break
all_tokens += [token]
if token not in occurrence:
occurrence[token] = 1
else:
occurrence[token] += 1
tmp = pipeline.decode(all_tokens[out_last:])
if '\ufffd' not in tmp:
out_str += tmp
yield out_str.strip()
out_last = i + 1
gc.collect()
torch.cuda.empty_cache()
res.append(out_str.strip())
# res1 = ' '.join(str(x) for x in res)
# tts.tts_to_file(res1, speaker_wav = upload, language="en", file_path="output.wav")
return res
# return [result.text, res]
# yield out_str.strip()
g = gr.Interface(
fn=evaluate,
inputs=[
# gr.components.Textbox(lines=2, label="Instruction", value="Tell me about ravens."),
gr.Audio(source="microphone", label = "请开始对话吧!", type="filepath"),
# gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)", type="filepath"),
gr.components.Textbox(lines=2, label="Input", placeholder="none"),
gr.components.Slider(minimum=10, maximum=200, step=10, value=150), # token_count
gr.components.Slider(minimum=0.2, maximum=2.0, step=0.1, value=1.0), # temperature
gr.components.Slider(minimum=0, maximum=1, step=0.05, value=0.5), # top_p
gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # presencePenalty
gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # countPenalty
],
outputs=[
gr.inputs.Textbox(
lines=5,
label="Raven Output",
),
# gr.Audio(label="Audio with Custom Voice"),
],
title="🥳💬💕 - TalktoAI,随时随地,谈天说地!",
description="🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
article = "Powered by the RWKV Language Model"
)
g.queue(concurrency_count=1, max_size=10)
g.launch(show_error=True) |