kevinpro commited on
Commit
a1a08d2
·
verified ·
1 Parent(s): 075e4d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -14
app.py CHANGED
@@ -10,18 +10,11 @@ import torch
10
  import nltk
11
  from functools import lru_cache
12
 
13
-
14
- code_mapping = dict(sorted(code_mapping.items(), key=lambda item: item[0]))
15
- flores_codes = list(code_mapping.keys())
16
- target_languages = flores_codes # 简化列表
17
-
18
  # 假设openai_client已定义,例如:
19
 
20
  device = "cuda"
21
  MODEL_NAME = "ByteDance-Seed/Seed-X-PPO-7B"
22
 
23
-
24
-
25
  def load_model():
26
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
27
  print(f"Model loaded in {device}")
@@ -30,12 +23,10 @@ def load_model():
30
 
31
  model = load_model()
32
 
33
-
34
  # Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
35
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
36
 
37
 
38
-
39
  @lru_cache(maxsize=100)
40
  def translate(text: str, src_lang: str, tgt_lang: str):
41
  if not src_lang:
@@ -56,7 +47,7 @@ def _translate(text: str, src_lang: str, tgt_lang: str):
56
  )
57
  translated_chunk = model.generate(
58
  input_ids=torch.tensor([input_tokens]).to(device),
59
- max_length=len(input_tokens) + 1000,
60
  num_return_sequences=1,
61
  )
62
  full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True).strip()
@@ -82,16 +73,13 @@ description = """
82
  <img src="https://github.com/user-attachments/assets/c42e675e-497c-4508-8bb9-093ad4d1f216" alt="UNESCO Meta Hugging Face Banner" style="max-width: 800px; width: 100%; margin: 0 auto;">
83
  <h1 style="color: #0077be; font-size: 3em;">Seed-X, powered by Bytedance</h1>
84
  </div>
85
- We are excited to introduce Seed-X, a powerful series of open-source multilingual translation language models, including an instruction model, a reinforcement learning model, and a reward model. It pushes the boundaries of translation capabilities within 7 billion parameters. We develop Seed-X as an accessible, off-the-shelf tool to support the community in advancing translation research and applications:
86
  """
87
 
88
  examples_inputs = [["Seed-X is indeed a good translation model ","English","Chinese"],]
89
 
90
  with gr.Blocks() as demo:
91
  gr.Markdown(description)
92
- with gr.Row():
93
- src_lang = gr.Dropdown(label="Source Language", choices=flores_codes)
94
- target_lang = gr.Dropdown(label="Target Language", choices=target_languages)
95
  with gr.Row():
96
  input_text = gr.Textbox(label="Input Text", lines=6)
97
  with gr.Row():
 
10
  import nltk
11
  from functools import lru_cache
12
 
 
 
 
 
 
13
  # 假设openai_client已定义,例如:
14
 
15
  device = "cuda"
16
  MODEL_NAME = "ByteDance-Seed/Seed-X-PPO-7B"
17
 
 
 
18
  def load_model():
19
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
20
  print(f"Model loaded in {device}")
 
23
 
24
  model = load_model()
25
 
 
26
  # Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
27
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
28
 
29
 
 
30
  @lru_cache(maxsize=100)
31
  def translate(text: str, src_lang: str, tgt_lang: str):
32
  if not src_lang:
 
47
  )
48
  translated_chunk = model.generate(
49
  input_ids=torch.tensor([input_tokens]).to(device),
50
+ max_length=len(input_tokens) + 2048,
51
  num_return_sequences=1,
52
  )
53
  full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True).strip()
 
73
  <img src="https://github.com/user-attachments/assets/c42e675e-497c-4508-8bb9-093ad4d1f216" alt="UNESCO Meta Hugging Face Banner" style="max-width: 800px; width: 100%; margin: 0 auto;">
74
  <h1 style="color: #0077be; font-size: 3em;">Seed-X, powered by Bytedance</h1>
75
  </div>
76
+ Seed-X, a powerful series of open-source multilingual translation language models, including an instruction model, a reinforcement learning model, and a reward model. It pushes the boundaries of translation capabilities within 7 billion parameters. We develop Seed-X as an accessible, off-the-shelf tool to support the community in advancing translation research and applications:
77
  """
78
 
79
  examples_inputs = [["Seed-X is indeed a good translation model ","English","Chinese"],]
80
 
81
  with gr.Blocks() as demo:
82
  gr.Markdown(description)
 
 
 
83
  with gr.Row():
84
  input_text = gr.Textbox(label="Input Text", lines=6)
85
  with gr.Row():