Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
22ba884
1
Parent(s):
c8ea0d8
Update split_text, >5000 chars OK
Browse files- __pycache__/split_text.cpython-38.pyc +0 -0
- app.py +60 -37
- split_text.py +46 -0
__pycache__/split_text.cpython-38.pyc
ADDED
Binary file (875 Bytes). View file
|
|
app.py
CHANGED
@@ -17,31 +17,32 @@ from gevent import monkey
|
|
17 |
monkey.patch_all()
|
18 |
|
19 |
# """
|
|
|
20 |
# from gevent import monkey
|
21 |
# monkey.patch_all()
|
22 |
|
23 |
-
import nest_asyncio
|
24 |
# nest_asyncio.apply()
|
25 |
|
26 |
import asyncio
|
|
|
27 |
|
28 |
-
from multiprocessing import Process # , freeze_support
|
29 |
import gradio as gr
|
30 |
-
|
31 |
import httpx
|
32 |
-
from logzero import logger
|
33 |
|
34 |
# from deepl_fastapi.run_uvicorn import main
|
35 |
# from deepl_fastapi_pw.run_uvicorn_async import main
|
36 |
from deepl_fastapi_pw.deepl_server_async import main
|
|
|
37 |
|
38 |
# from deepl_scraper_pw.deepl_tr import deepl_tr
|
39 |
-
from
|
40 |
|
41 |
arun = asyncio.get_event_loop().run_until_complete
|
42 |
|
43 |
|
44 |
def deepl(text, from_lang, to_lang):
|
|
|
45 |
try:
|
46 |
text = str(text).strip()
|
47 |
except Exception:
|
@@ -49,39 +50,53 @@ def deepl(text, from_lang, to_lang):
|
|
49 |
if not text:
|
50 |
return "Put something there, man."
|
51 |
|
52 |
-
# _ = """
|
53 |
# "http://127.0.0.1:8000/text/?q=test%20me&to_lang=zh"
|
54 |
-
url = "http://127.0.0.1:8000/text/"
|
55 |
-
url = "http://127.0.0.1:8001/text/"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
"text": text,
|
63 |
-
"from_lang": from_lang,
|
64 |
-
"to_lang": to_lang,
|
65 |
-
},
|
66 |
-
)
|
67 |
-
resp.raise_for_status()
|
68 |
-
except Exception as exc:
|
69 |
-
logger.error(exc)
|
70 |
-
return str(exc)
|
71 |
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
logger.error(exc)
|
83 |
-
res = str(exc)
|
84 |
-
# """
|
85 |
|
86 |
_ = """
|
87 |
try:
|
@@ -92,7 +107,11 @@ def deepl(text, from_lang, to_lang):
|
|
92 |
return str(exc)
|
93 |
# """
|
94 |
|
95 |
-
return res
|
|
|
|
|
|
|
|
|
96 |
|
97 |
|
98 |
if __name__ == "__main__":
|
@@ -102,10 +121,14 @@ if __name__ == "__main__":
|
|
102 |
iface = gr.Interface(
|
103 |
fn=deepl,
|
104 |
inputs=[
|
105 |
-
gr.Textbox(
|
|
|
|
|
|
|
106 |
gr.Textbox(label="from_lang", value="en", lines=1),
|
107 |
gr.Textbox(label="to_lang", value="zh", lines=1),
|
108 |
],
|
109 |
-
outputs="textarea"
|
|
|
110 |
)
|
111 |
iface.launch()
|
|
|
17 |
monkey.patch_all()
|
18 |
|
19 |
# """
|
20 |
+
# pylint: disable=broad-except
|
21 |
# from gevent import monkey
|
22 |
# monkey.patch_all()
|
23 |
|
24 |
+
# import nest_asyncio
|
25 |
# nest_asyncio.apply()
|
26 |
|
27 |
import asyncio
|
28 |
+
from multiprocessing import Process # , freeze_support
|
29 |
|
|
|
30 |
import gradio as gr
|
|
|
31 |
import httpx
|
|
|
32 |
|
33 |
# from deepl_fastapi.run_uvicorn import main
|
34 |
# from deepl_fastapi_pw.run_uvicorn_async import main
|
35 |
from deepl_fastapi_pw.deepl_server_async import main
|
36 |
+
from logzero import logger
|
37 |
|
38 |
# from deepl_scraper_pw.deepl_tr import deepl_tr
|
39 |
+
from split_text import split_text
|
40 |
|
41 |
arun = asyncio.get_event_loop().run_until_complete
|
42 |
|
43 |
|
44 |
def deepl(text, from_lang, to_lang):
|
45 |
+
"""Translate."""
|
46 |
try:
|
47 |
text = str(text).strip()
|
48 |
except Exception:
|
|
|
50 |
if not text:
|
51 |
return "Put something there, man."
|
52 |
|
|
|
53 |
# "http://127.0.0.1:8000/text/?q=test%20me&to_lang=zh"
|
54 |
+
url = "http://127.0.0.1:8000/text/"
|
55 |
+
url = "http://127.0.0.1:8001/text/"
|
56 |
+
|
57 |
+
res_tot = ""
|
58 |
+
text_tot = text[:]
|
59 |
+
slash = "_slash_"
|
60 |
+
text_tot = text_tot.replace("/", slash)
|
61 |
+
|
62 |
+
_ = split_text(text_tot)
|
63 |
+
len_ = len(_)
|
64 |
+
logger.info("Split to %s", len_)
|
65 |
+
for idx, chunk in enumerate(_):
|
66 |
+
logger.info("%s/%s", idx + 1, len_)
|
67 |
+
try:
|
68 |
+
# resp = httpx.get(f"{url}?q={text}&from_lang={from_lang}&to_lang={to_lang}")
|
69 |
+
resp = httpx.post(
|
70 |
+
url,
|
71 |
+
json={
|
72 |
+
"text": chunk,
|
73 |
+
"from_lang": from_lang,
|
74 |
+
"to_lang": to_lang,
|
75 |
+
},
|
76 |
+
timeout=90,
|
77 |
+
)
|
78 |
+
resp.raise_for_status()
|
79 |
+
except Exception as exc:
|
80 |
+
logger.error(exc)
|
81 |
+
return str(exc)
|
82 |
|
83 |
+
try:
|
84 |
+
jdata = resp.json()
|
85 |
+
except Exception as exc:
|
86 |
+
logger.error(exc)
|
87 |
+
return str(exc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
+
try:
|
90 |
+
# res = jdata.get("trtext")
|
91 |
+
res = jdata.get("result")
|
92 |
+
except Exception as exc:
|
93 |
+
logger.error(exc)
|
94 |
+
res = str(exc)
|
95 |
|
96 |
+
if res_tot:
|
97 |
+
res_tot = res_tot + "\n" + res
|
98 |
+
else:
|
99 |
+
res_tot = res
|
|
|
|
|
|
|
100 |
|
101 |
_ = """
|
102 |
try:
|
|
|
107 |
return str(exc)
|
108 |
# """
|
109 |
|
110 |
+
# return res
|
111 |
+
|
112 |
+
res_tot = res_tot.replace(slash, "/")
|
113 |
+
|
114 |
+
return res_tot
|
115 |
|
116 |
|
117 |
if __name__ == "__main__":
|
|
|
121 |
iface = gr.Interface(
|
122 |
fn=deepl,
|
123 |
inputs=[
|
124 |
+
gr.Textbox(
|
125 |
+
placeholder="Paste text here (not limited to 5000 chars)",
|
126 |
+
lines=7,
|
127 |
+
),
|
128 |
gr.Textbox(label="from_lang", value="en", lines=1),
|
129 |
gr.Textbox(label="to_lang", value="zh", lines=1),
|
130 |
],
|
131 |
+
outputs="textarea",
|
132 |
+
allow_flagging="never",
|
133 |
)
|
134 |
iface.launch()
|
split_text.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Split text to limit chars per chunk.
|
2 |
+
|
3 |
+
Converted from splitText.js.
|
4 |
+
"""
|
5 |
+
# pylint: disable=invalid-name, broad-except
|
6 |
+
from typing import Optional
|
7 |
+
|
8 |
+
from logzero import logger
|
9 |
+
|
10 |
+
limit_ = 4900
|
11 |
+
|
12 |
+
|
13 |
+
def split_text(text: str, limit: Optional[int] = None):
|
14 |
+
"""Split text to limit chars per chunk."""
|
15 |
+
if not text: # handle text=""
|
16 |
+
return [text]
|
17 |
+
|
18 |
+
if limit is None:
|
19 |
+
limit = limit_
|
20 |
+
else:
|
21 |
+
try:
|
22 |
+
limit = int(limit)
|
23 |
+
except Exception as exc:
|
24 |
+
logger.error(exc)
|
25 |
+
limit = limit_
|
26 |
+
if limit < 1:
|
27 |
+
limit = limit_
|
28 |
+
|
29 |
+
chunks = []
|
30 |
+
paragraphs = text.splitlines()
|
31 |
+
current_chunk = paragraphs[0] + "\n"
|
32 |
+
for paragraph in paragraphs[1:]:
|
33 |
+
if len(current_chunk) + len(paragraph) <= limit:
|
34 |
+
# Add paragraph to current chunk
|
35 |
+
current_chunk += paragraph + "\n"
|
36 |
+
else:
|
37 |
+
# Save current chunk and start a new one with this paragraph
|
38 |
+
chunks.append(current_chunk)
|
39 |
+
current_chunk = paragraph + "\n"
|
40 |
+
# Add the last chunk
|
41 |
+
chunks.append(current_chunk)
|
42 |
+
|
43 |
+
# remove extra \n and possible blank in the beginning
|
44 |
+
# return list(filter(lambda _: _.strip(), map(lambda _: _.strip(), chunks)))
|
45 |
+
|
46 |
+
return chunks
|