Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,14 +1,10 @@
|
|
1 |
import gradio as gr
|
2 |
-
from flores import code_mapping
|
3 |
from functools import lru_cache
|
4 |
import openai # 用于调用外部API
|
5 |
import os
|
6 |
import spaces
|
7 |
import gradio as gr
|
8 |
-
from sacremoses import MosesPunctNormalizer
|
9 |
-
from stopes.pipelines.monolingual.utils.sentence_split import get_split_algo
|
10 |
from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
|
11 |
-
from flores import code_mapping
|
12 |
import platform
|
13 |
import torch
|
14 |
import nltk
|
@@ -51,37 +47,21 @@ def translate(text: str, src_lang: str, tgt_lang: str):
|
|
51 |
# Only assign GPU if cache not used
|
52 |
@spaces.GPU
|
53 |
def _translate(text: str, src_lang: str, tgt_lang: str):
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
num_return_sequences=1,
|
70 |
-
)
|
71 |
-
print(translated_chunk)
|
72 |
-
translated_chunk = tokenizer.batch_decode(
|
73 |
-
translated_chunk[0], skip_special_tokens=True
|
74 |
-
)
|
75 |
-
|
76 |
-
if isinstance(translated_chunk,list):
|
77 |
-
translated_chunk = "".join(translated_chunk)
|
78 |
-
translated_sentences.append(translated_chunk)
|
79 |
-
print("dev: ",translated_chunk)
|
80 |
-
translated_paragraph = " ".join(translated_sentences)
|
81 |
-
translated_paragraphs.append(translated_paragraph)
|
82 |
-
|
83 |
-
return "\n".join(translated_paragraphs)
|
84 |
-
|
85 |
|
86 |
# def _translate(text: str, src_lang: str, tgt_lang: str):
|
87 |
# prompt = f"Translate the following text from {src_lang} to {tgt_lang}. Direct output translation result without any explaination:\n\n{text}"
|
|
|
1 |
import gradio as gr
|
|
|
2 |
from functools import lru_cache
|
3 |
import openai # 用于调用外部API
|
4 |
import os
|
5 |
import spaces
|
6 |
import gradio as gr
|
|
|
|
|
7 |
from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
|
|
|
8 |
import platform
|
9 |
import torch
|
10 |
import nltk
|
|
|
47 |
# Only assign GPU if cache not used
|
48 |
@spaces.GPU
|
49 |
def _translate(text: str, src_lang: str, tgt_lang: str):
|
50 |
+
input_tokens = (
|
51 |
+
tokenizer(text, return_tensors="pt")
|
52 |
+
.input_ids[0]
|
53 |
+
.cpu()
|
54 |
+
.numpy()
|
55 |
+
.tolist()
|
56 |
+
)
|
57 |
+
translated_chunk = model.generate(
|
58 |
+
input_ids=torch.tensor([input_tokens]).to(device),
|
59 |
+
max_length=len(input_tokens) + 1000,
|
60 |
+
num_return_sequences=1,
|
61 |
+
)
|
62 |
+
full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True).strip()
|
63 |
+
print(full_output)
|
64 |
+
return full_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
# def _translate(text: str, src_lang: str, tgt_lang: str):
|
67 |
# prompt = f"Translate the following text from {src_lang} to {tgt_lang}. Direct output translation result without any explaination:\n\n{text}"
|