Spaces:
Sleeping
Sleeping
File size: 4,748 Bytes
38742d7 6634f63 1978c10 53c0aa9 3de2dd6 734f1a7 3de2dd6 61e0458 38742d7 1978c10 3f23d73 1978c10 113c3ed cfee1b3 d447070 61a28dc d447070 dd32415 d447070 a50a704 6634f63 5e1003d a50a704 d447070 51be568 54e9d7b 51be568 31a4eef 51be568 54e9d7b 51be568 ccdba66 51be568 d447070 3307c6a 29ba4e2 e50fa51 38742d7 e73c7fc 6cbc38e e50fa51 e73c7fc 6cbc38e e73c7fc 38742d7 6cbc38e 6634f63 38742d7 74a629f 38742d7 6634f63 1978c10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import gradio as gr
from flores import code_mapping
from functools import lru_cache
import openai # 用于调用外部API
import os
import spaces
import gradio as gr
from sacremoses import MosesPunctNormalizer
from stopes.pipelines.monolingual.utils.sentence_split import get_split_algo
from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
from flores import code_mapping
import platform
import torch
import nltk
from functools import lru_cache
code_mapping = dict(sorted(code_mapping.items(), key=lambda item: item[0]))
flores_codes = list(code_mapping.keys())
target_languages = flores_codes # 简化列表
# 假设openai_client已定义,例如:
device = "cuda"
MODEL_NAME = "ByteDance-Seed/Seed-X-PPO-7B"
def load_model():
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
print(f"Model loaded in {device}")
return model
model = load_model()
# Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@lru_cache(maxsize=100)
def translate(text: str, src_lang: str, tgt_lang: str):
if not src_lang:
raise gr.Error("The source language is empty! Please choose it in the dropdown list.")
if not tgt_lang:
raise gr.Error("The target language is empty! Please choose it in the dropdown list.")
return _translate(text, src_lang, tgt_lang)
# Only assign GPU if cache not used
@spaces.GPU
def _translate(text: str, src_lang: str, tgt_lang: str):
paragraphs = text.split("\n")
translated_paragraphs = []
for paragraph in paragraphs:
translated_sentences = []
input_tokens = (
tokenizer("Translate to Chinese. Direct output translation result without any explaination::\n\n" + paragraph, return_tensors="pt")
.input_ids[0]
.cpu()
.numpy()
.tolist()
)
translated_chunk = model.generate(
input_ids=torch.tensor([input_tokens]).to(device),
max_length=len(input_tokens) + 1000,
num_return_sequences=1,
)
print(translated_chunk)
translated_chunk = tokenizer.batch_decode(
translated_chunk[0], skip_special_tokens=True
)
translated_sentences.append(translated_chunk)
translated_paragraph = " ".join(translated_sentences)
translated_paragraphs.append(translated_paragraph)
return "\n".join(translated_paragraphs)
# def _translate(text: str, src_lang: str, tgt_lang: str):
# prompt = f"Translate the following text from {src_lang} to {tgt_lang}. Direct output translation result without any explaination:\n\n{text}"
# key=os.getenv('key')
# openai_client = openai.OpenAI(base_url="https://ssapi.cppbear.site/v1", api_key=key)
# response = openai_client.chat.completions.create(
# model="tbai.xin-dpsk-deepseek-v3", # 如gpt-3.5-turbo或其他兼容模型
# messages=[{"role": "user", "content": prompt}],
# max_tokens=30240,
# temperature=0.0
# )
# print(response)
# return response.choices[0].message.content.strip()
description = """
<div style="text-align: center;">
<img src="https://github.com/user-attachments/assets/c42e675e-497c-4508-8bb9-093ad4d1f216" alt="UNESCO Meta Hugging Face Banner" style="max-width: 800px; width: 100%; margin: 0 auto;">
<h1 style="color: #0077be; font-size: 3em;">Seed-X, powered by Bytedance</h1>
</div>
We are excited to introduce Seed-X, a powerful series of open-source multilingual translation language models, including an instruction model, a reinforcement learning model, and a reward model. It pushes the boundaries of translation capabilities within 7 billion parameters. We develop Seed-X as an accessible, off-the-shelf tool to support the community in advancing translation research and applications:
"""
examples_inputs = [["Seed-X is indeed a good translation model ","English","Chinese"],]
with gr.Blocks() as demo:
gr.Markdown(description)
with gr.Row():
src_lang = gr.Dropdown(label="Source Language", choices=flores_codes)
target_lang = gr.Dropdown(label="Target Language", choices=target_languages)
with gr.Row():
input_text = gr.Textbox(label="Input Text", lines=6)
with gr.Row():
btn = gr.Button("Translate text")
with gr.Row():
output = gr.Textbox(label="Output Text", lines=6)
btn.click(
translate,
inputs=[input_text, src_lang, target_lang],
outputs=output,
)
examples = gr.Examples(examples=examples_inputs,inputs=[input_text, src_lang,target_lang], fn=translate, outputs=output, cache_examples=True)
demo.launch()
|