R-PRM-Demo

Sleeping

File size: 3,302 Bytes

38742d7
6634f63
53c0aa9
3de2dd6
 
734f1a7
3de2dd6
 
1978c10
113c3ed
cfee1b3
 
d447070
f0def04
d447070
dd32415
d447070
 
 
 
 
f0def04
d447070
 
 
 
6634f63
ca391f9
 
a50a704
d447070
 
ca391f9
075e4d8
 
 
 
 
 
 
 
 
a1a08d2
075e4d8
 
 
 
 
d447070
3307c6a
 
 
 
 
 
 
 
 
 
 
 
29ba4e2
e50fa51
38742d7
e73c7fc
6cbc38e
e50fa51
e73c7fc
a1a08d2
e73c7fc
38742d7
7be16c8
6634f63
38742d7
 
 
 
 
 
 
 
 
 
ca391f9
38742d7
 
7be16c8
f0def04
 
1978c10

import gradio as gr
from functools import lru_cache
import os
import spaces
import gradio as gr
from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
import torch

# 假设openai_client已定义，例如：

device = "cuda"
MODEL_NAME = "ByteDance-Seed/Seed-X-PPO-7B"

print("Start dowload")
def load_model():
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
    print(f"Model loaded in {device}")
    return model


model = load_model()
print("Ednd dowload")
# Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)


@lru_cache(maxsize=100)
def translate(text: str):
    return _translate(text)

# Only assign GPU if cache not used
@spaces.GPU
def _translate(text: str):
    input_tokens = (
        tokenizer(text, return_tensors="pt")
        .input_ids[0]
        .cpu()
        .numpy()
        .tolist()
    )
    translated_chunk = model.generate(
        input_ids=torch.tensor([input_tokens]).to(device),
        max_length=len(input_tokens) + 2048,
        num_return_sequences=1,
    )
    full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True).strip()
    print(full_output)
    return full_output

# def _translate(text: str, src_lang: str, tgt_lang: str):
#     prompt = f"Translate the following text from {src_lang} to {tgt_lang}. Direct output translation result without any explaination:\n\n{text}"
#     key=os.getenv('key')
#     openai_client = openai.OpenAI(base_url="https://ssapi.cppbear.site/v1", api_key=key)
#     response = openai_client.chat.completions.create(
#         model="tbai.xin-dpsk-deepseek-v3",  # 如gpt-3.5-turbo或其他兼容模型
#         messages=[{"role": "user", "content": prompt}],
#         max_tokens=30240,
#         temperature=0.0
#     )
#     print(response)
#     return response.choices[0].message.content.strip()


description = """
<div style="text-align: center;">
    <img src="https://github.com/user-attachments/assets/c42e675e-497c-4508-8bb9-093ad4d1f216" alt="UNESCO Meta Hugging Face Banner" style="max-width: 800px; width: 100%; margin: 0 auto;">
    <h1 style="color: #0077be; font-size: 3em;">Seed-X, powered by Bytedance</h1>
</div>
Seed-X, a powerful series of open-source multilingual translation language models, including an instruction model, a reinforcement learning model, and a reward model. It pushes the boundaries of translation capabilities within 7 billion parameters. We develop Seed-X as an accessible, off-the-shelf tool to support the community in advancing translation research and applications:
"""

examples_inputs = [["<s>Translate the following English sentence into Chinese:\nMay the force be with you <zh>"]]

with gr.Blocks() as demo:
    gr.Markdown(description)
    with gr.Row():
        input_text = gr.Textbox(label="Input Text", lines=6)
    with gr.Row():
        btn = gr.Button("Translate text")
    with gr.Row():
        output = gr.Textbox(label="Output Text", lines=6)
    btn.click(
        translate,
        inputs=[input_text],
        outputs=output,
    )
    examples = gr.Examples(examples=examples_inputs,inputs=[input_text], fn=translate, outputs=output, cache_examples=True)

print("Prepared")
demo.launch()