Spaces:
Sleeping
Sleeping
name
commited on
Commit
·
6d3e512
1
Parent(s):
eddec9a
add files
Browse files- README.md +7 -4
- app.py +41 -0
- requirements.txt +5 -0
README.md
CHANGED
@@ -1,13 +1,16 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 😻
|
4 |
-
colorFrom:
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license:
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: mt-v1.0
|
3 |
emoji: 😻
|
4 |
+
colorFrom: purple
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.50.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
|
16 |
+
rebuild
|
app.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from tow.model_byt5.tokenizer import Tokenizer_byt5
|
3 |
+
from tow.model_byt5.model import Transformer_byt5
|
4 |
+
import json
|
5 |
+
import torch
|
6 |
+
from huggingface_hub import hf_hub_download
|
7 |
+
|
8 |
+
model_weights_path = hf_hub_download(repo_id="df-h/byt5-base-alibi-mt", filename="pytorch_model.bin")
|
9 |
+
model_config_path = hf_hub_download(repo_id="df-h/byt5-base-alibi-mt", filename="config.json")
|
10 |
+
|
11 |
+
def translate(inputs):
|
12 |
+
with open(model_config_path, 'r') as f:
|
13 |
+
config = json.load(f)
|
14 |
+
|
15 |
+
state_dict = torch.load(model_weights_path, map_location=torch.device('cpu'))
|
16 |
+
model = Transformer_byt5(config=config)
|
17 |
+
model.load_state_dict(state_dict)
|
18 |
+
model = model.eval()
|
19 |
+
tokenizer = Tokenizer_byt5()
|
20 |
+
ids = tokenizer(inputs, max_length=512)
|
21 |
+
len_pad = 512 - len(ids)
|
22 |
+
if len_pad > 0:
|
23 |
+
ids = ids + [0 for x in range(len_pad)]
|
24 |
+
print(ids)
|
25 |
+
inputs = torch.tensor([ids]).to(torch.device('cpu'))
|
26 |
+
outputs = model.generate(inputs, max_length=512)
|
27 |
+
text = tokenizer.ids2text(outputs.tolist()[0])
|
28 |
+
return text
|
29 |
+
|
30 |
+
demo = gr.Interface(
|
31 |
+
fn=translate,
|
32 |
+
inputs=[
|
33 |
+
gr.components.Textbox(label="input", value="zh2en:一个描述实际事物的函数,其中的高频信息往往对应着很小的 “振幅”, 否则整个函数会很奇怪是个压扁的 “弹簧” ,不具实际意义。"),
|
34 |
+
],
|
35 |
+
outputs=["text"],
|
36 |
+
cache_examples=False,
|
37 |
+
title="Translation",
|
38 |
+
description="Support tasks: en2es, en2ja, en2zh, ja2zh, es2zh, es2ja"
|
39 |
+
)
|
40 |
+
|
41 |
+
demo.launch(debug=True, share=True, server_name="0.0.0.0")
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers==4.36.2
|
2 |
+
sentencepiece==0.1.99
|
3 |
+
numpy==1.24.2
|
4 |
+
torch==2.0.1
|
5 |
+
-e git+https://github.com/repo/tow.git@a21644091cefd744ac1c8a87be4bae1e6a8c1898#egg=tow
|