ffreemt commited on
Commit
22ba884
·
1 Parent(s): c8ea0d8

Update split_text, >5000 chars OK

Browse files
Files changed (3) hide show
  1. __pycache__/split_text.cpython-38.pyc +0 -0
  2. app.py +60 -37
  3. split_text.py +46 -0
__pycache__/split_text.cpython-38.pyc ADDED
Binary file (875 Bytes). View file
 
app.py CHANGED
@@ -17,31 +17,32 @@ from gevent import monkey
17
  monkey.patch_all()
18
 
19
  # """
 
20
  # from gevent import monkey
21
  # monkey.patch_all()
22
 
23
- import nest_asyncio
24
  # nest_asyncio.apply()
25
 
26
  import asyncio
 
27
 
28
- from multiprocessing import Process # , freeze_support
29
  import gradio as gr
30
-
31
  import httpx
32
- from logzero import logger
33
 
34
  # from deepl_fastapi.run_uvicorn import main
35
  # from deepl_fastapi_pw.run_uvicorn_async import main
36
  from deepl_fastapi_pw.deepl_server_async import main
 
37
 
38
  # from deepl_scraper_pw.deepl_tr import deepl_tr
39
- from deepl_fastapi_pw.deepl_tr import deepl_tr
40
 
41
  arun = asyncio.get_event_loop().run_until_complete
42
 
43
 
44
  def deepl(text, from_lang, to_lang):
 
45
  try:
46
  text = str(text).strip()
47
  except Exception:
@@ -49,39 +50,53 @@ def deepl(text, from_lang, to_lang):
49
  if not text:
50
  return "Put something there, man."
51
 
52
- # _ = """
53
  # "http://127.0.0.1:8000/text/?q=test%20me&to_lang=zh"
54
- url = "http://127.0.0.1:8000/text/" # ?q=test%20me&to_lang=zh"
55
- url = "http://127.0.0.1:8001/text/" # ?q=test%20me&to_lang=zh"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- try:
58
- # resp = httpx.get(f"{url}?q={text}&from_lang={from_lang}&to_lang={to_lang}")
59
- resp = httpx.post(
60
- url,
61
- json={
62
- "text": text,
63
- "from_lang": from_lang,
64
- "to_lang": to_lang,
65
- },
66
- )
67
- resp.raise_for_status()
68
- except Exception as exc:
69
- logger.error(exc)
70
- return str(exc)
71
 
72
- try:
73
- jdata = resp.json()
74
- except Exception as exc:
75
- logger.error(exc)
76
- return str(exc)
 
77
 
78
- try:
79
- # res = jdata.get("trtext")
80
- res = jdata.get("result")
81
- except Exception as exc:
82
- logger.error(exc)
83
- res = str(exc)
84
- # """
85
 
86
  _ = """
87
  try:
@@ -92,7 +107,11 @@ def deepl(text, from_lang, to_lang):
92
  return str(exc)
93
  # """
94
 
95
- return res
 
 
 
 
96
 
97
 
98
  if __name__ == "__main__":
@@ -102,10 +121,14 @@ if __name__ == "__main__":
102
  iface = gr.Interface(
103
  fn=deepl,
104
  inputs=[
105
- gr.Textbox(placeholder="Paste text here (max. 5000 chars)", lines=7,),
 
 
 
106
  gr.Textbox(label="from_lang", value="en", lines=1),
107
  gr.Textbox(label="to_lang", value="zh", lines=1),
108
  ],
109
- outputs="textarea"
 
110
  )
111
  iface.launch()
 
17
  monkey.patch_all()
18
 
19
  # """
20
+ # pylint: disable=broad-except
21
  # from gevent import monkey
22
  # monkey.patch_all()
23
 
24
+ # import nest_asyncio
25
  # nest_asyncio.apply()
26
 
27
  import asyncio
28
+ from multiprocessing import Process # , freeze_support
29
 
 
30
  import gradio as gr
 
31
  import httpx
 
32
 
33
  # from deepl_fastapi.run_uvicorn import main
34
  # from deepl_fastapi_pw.run_uvicorn_async import main
35
  from deepl_fastapi_pw.deepl_server_async import main
36
+ from logzero import logger
37
 
38
  # from deepl_scraper_pw.deepl_tr import deepl_tr
39
+ from split_text import split_text
40
 
41
  arun = asyncio.get_event_loop().run_until_complete
42
 
43
 
44
  def deepl(text, from_lang, to_lang):
45
+ """Translate."""
46
  try:
47
  text = str(text).strip()
48
  except Exception:
 
50
  if not text:
51
  return "Put something there, man."
52
 
 
53
  # "http://127.0.0.1:8000/text/?q=test%20me&to_lang=zh"
54
+ url = "http://127.0.0.1:8000/text/"
55
+ url = "http://127.0.0.1:8001/text/"
56
+
57
+ res_tot = ""
58
+ text_tot = text[:]
59
+ slash = "_slash_"
60
+ text_tot = text_tot.replace("/", slash)
61
+
62
+ _ = split_text(text_tot)
63
+ len_ = len(_)
64
+ logger.info("Split to %s", len_)
65
+ for idx, chunk in enumerate(_):
66
+ logger.info("%s/%s", idx + 1, len_)
67
+ try:
68
+ # resp = httpx.get(f"{url}?q={text}&from_lang={from_lang}&to_lang={to_lang}")
69
+ resp = httpx.post(
70
+ url,
71
+ json={
72
+ "text": chunk,
73
+ "from_lang": from_lang,
74
+ "to_lang": to_lang,
75
+ },
76
+ timeout=90,
77
+ )
78
+ resp.raise_for_status()
79
+ except Exception as exc:
80
+ logger.error(exc)
81
+ return str(exc)
82
 
83
+ try:
84
+ jdata = resp.json()
85
+ except Exception as exc:
86
+ logger.error(exc)
87
+ return str(exc)
 
 
 
 
 
 
 
 
 
88
 
89
+ try:
90
+ # res = jdata.get("trtext")
91
+ res = jdata.get("result")
92
+ except Exception as exc:
93
+ logger.error(exc)
94
+ res = str(exc)
95
 
96
+ if res_tot:
97
+ res_tot = res_tot + "\n" + res
98
+ else:
99
+ res_tot = res
 
 
 
100
 
101
  _ = """
102
  try:
 
107
  return str(exc)
108
  # """
109
 
110
+ # return res
111
+
112
+ res_tot = res_tot.replace(slash, "/")
113
+
114
+ return res_tot
115
 
116
 
117
  if __name__ == "__main__":
 
121
  iface = gr.Interface(
122
  fn=deepl,
123
  inputs=[
124
+ gr.Textbox(
125
+ placeholder="Paste text here (not limited to 5000 chars)",
126
+ lines=7,
127
+ ),
128
  gr.Textbox(label="from_lang", value="en", lines=1),
129
  gr.Textbox(label="to_lang", value="zh", lines=1),
130
  ],
131
+ outputs="textarea",
132
+ allow_flagging="never",
133
  )
134
  iface.launch()
split_text.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Split text to limit chars per chunk.
2
+
3
+ Converted from splitText.js.
4
+ """
5
+ # pylint: disable=invalid-name, broad-except
6
+ from typing import Optional
7
+
8
+ from logzero import logger
9
+
10
+ limit_ = 4900
11
+
12
+
13
+ def split_text(text: str, limit: Optional[int] = None):
14
+ """Split text to limit chars per chunk."""
15
+ if not text: # handle text=""
16
+ return [text]
17
+
18
+ if limit is None:
19
+ limit = limit_
20
+ else:
21
+ try:
22
+ limit = int(limit)
23
+ except Exception as exc:
24
+ logger.error(exc)
25
+ limit = limit_
26
+ if limit < 1:
27
+ limit = limit_
28
+
29
+ chunks = []
30
+ paragraphs = text.splitlines()
31
+ current_chunk = paragraphs[0] + "\n"
32
+ for paragraph in paragraphs[1:]:
33
+ if len(current_chunk) + len(paragraph) <= limit:
34
+ # Add paragraph to current chunk
35
+ current_chunk += paragraph + "\n"
36
+ else:
37
+ # Save current chunk and start a new one with this paragraph
38
+ chunks.append(current_chunk)
39
+ current_chunk = paragraph + "\n"
40
+ # Add the last chunk
41
+ chunks.append(current_chunk)
42
+
43
+ # remove extra \n and possible blank in the beginning
44
+ # return list(filter(lambda _: _.strip(), map(lambda _: _.strip(), chunks)))
45
+
46
+ return chunks