kevinpro commited on
Commit
075e4d8
·
verified ·
1 Parent(s): febd975

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -35
app.py CHANGED
@@ -1,14 +1,10 @@
1
  import gradio as gr
2
- from flores import code_mapping
3
  from functools import lru_cache
4
  import openai # 用于调用外部API
5
  import os
6
  import spaces
7
  import gradio as gr
8
- from sacremoses import MosesPunctNormalizer
9
- from stopes.pipelines.monolingual.utils.sentence_split import get_split_algo
10
  from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
11
- from flores import code_mapping
12
  import platform
13
  import torch
14
  import nltk
@@ -51,37 +47,21 @@ def translate(text: str, src_lang: str, tgt_lang: str):
51
  # Only assign GPU if cache not used
52
  @spaces.GPU
53
  def _translate(text: str, src_lang: str, tgt_lang: str):
54
- paragraphs = text.split("\n")
55
- translated_paragraphs = []
56
-
57
- for paragraph in paragraphs:
58
- translated_sentences = []
59
- input_tokens = (
60
- tokenizer("Translate to Chinese. Direct output translation result without any explaination::\n\n" + paragraph, return_tensors="pt")
61
- .input_ids[0]
62
- .cpu()
63
- .numpy()
64
- .tolist()
65
- )
66
- translated_chunk = model.generate(
67
- input_ids=torch.tensor([input_tokens]).to(device),
68
- max_length=len(input_tokens) + 1000,
69
- num_return_sequences=1,
70
- )
71
- print(translated_chunk)
72
- translated_chunk = tokenizer.batch_decode(
73
- translated_chunk[0], skip_special_tokens=True
74
- )
75
-
76
- if isinstance(translated_chunk,list):
77
- translated_chunk = "".join(translated_chunk)
78
- translated_sentences.append(translated_chunk)
79
- print("dev: ",translated_chunk)
80
- translated_paragraph = " ".join(translated_sentences)
81
- translated_paragraphs.append(translated_paragraph)
82
-
83
- return "\n".join(translated_paragraphs)
84
-
85
 
86
  # def _translate(text: str, src_lang: str, tgt_lang: str):
87
  # prompt = f"Translate the following text from {src_lang} to {tgt_lang}. Direct output translation result without any explaination:\n\n{text}"
 
1
  import gradio as gr
 
2
  from functools import lru_cache
3
  import openai # 用于调用外部API
4
  import os
5
  import spaces
6
  import gradio as gr
 
 
7
  from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
 
8
  import platform
9
  import torch
10
  import nltk
 
47
  # Only assign GPU if cache not used
48
  @spaces.GPU
49
  def _translate(text: str, src_lang: str, tgt_lang: str):
50
+ input_tokens = (
51
+ tokenizer(text, return_tensors="pt")
52
+ .input_ids[0]
53
+ .cpu()
54
+ .numpy()
55
+ .tolist()
56
+ )
57
+ translated_chunk = model.generate(
58
+ input_ids=torch.tensor([input_tokens]).to(device),
59
+ max_length=len(input_tokens) + 1000,
60
+ num_return_sequences=1,
61
+ )
62
+ full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True).strip()
63
+ print(full_output)
64
+ return full_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  # def _translate(text: str, src_lang: str, tgt_lang: str):
67
  # prompt = f"Translate the following text from {src_lang} to {tgt_lang}. Direct output translation result without any explaination:\n\n{text}"