freemt commited on
Commit
f50da55
·
2 Parent(s): 2575636 1b2837a

Resolve pull merge conflict

Browse files
Files changed (4) hide show
  1. README.md +2 -1
  2. requirements.txt +2 -1
  3. ubee/__main__.py +117 -76
  4. ubee/uclas.py +7 -2
README.md CHANGED
@@ -4,7 +4,8 @@ emoji: 🏢
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
- app_file: app.py
 
8
  pinned: false
9
  license: mit
10
  ---
 
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 3.0.22
8
+ app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio
2
  install
3
  transformers
4
  sentencepiece
@@ -6,6 +6,7 @@ sklearn
6
  git+https://github.com/ffreemt/align-model-pool
7
  sentence-transformers
8
  logzero
 
9
  icecream
10
  alive-progress
11
  more_itertools
 
1
+ # gradio
2
  install
3
  transformers
4
  sentencepiece
 
6
  git+https://github.com/ffreemt/align-model-pool
7
  sentence-transformers
8
  logzero
9
+ set_loglevel
10
  icecream
11
  alive-progress
12
  more_itertools
ubee/__main__.py CHANGED
@@ -1,4 +1,21 @@
1
- """Gen ubee main."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # pylint: disable=unused-import, wrong-import-position, wrong-import-order, too-many-locals, broad-except, line-too-long
3
 
4
  import sys
@@ -14,6 +31,8 @@ import pandas as pd
14
  from icecream import ic
15
  from icecream import install as ic_install
16
  from logzero import logger
 
 
17
 
18
  # for embeddable python
19
  # if "." not in sys.path: sys.path.insert(0, ".")
@@ -22,6 +41,8 @@ from ubee import __version__
22
  from ubee.ubee import ubee
23
 
24
  # logzero.loglevel(10)
 
 
25
  ic_install()
26
  ic.configureOutput(
27
  includeContext=True,
@@ -30,17 +51,44 @@ ic.configureOutput(
30
  ic.enable()
31
  # ic.disenable() # to turn off
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- def greet1(name):
35
- """Dummy."""
36
- return "Hello " + name + "!!"
37
 
38
 
 
39
  def ifn(
40
  text1,
41
  text2,
42
- # segment: str
43
- thresh: float
44
  ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
45
  """Take inputs, return outputs.
46
 
@@ -60,6 +108,7 @@ def ifn(
60
  # return _
61
 
62
  res1_, res2_ = ubee(res1, res2, thresh)
 
63
 
64
  out_df = pd.DataFrame(
65
  zip_longest(res1, res2),
@@ -72,11 +121,17 @@ def ifn(
72
  _ = None
73
 
74
  # return out_df, pd.DataFrame(res1_, columns=["text1", "text2", "likelihood"]), _
 
75
  return pd.DataFrame(res1_, columns=["text1", "text2", "likelihood"]).to_html(), _.to_html()
 
 
 
76
 
77
 
78
  def main():
79
  """Create main entry."""
 
 
80
  text_zh = Path("data/test_zh.txt").read_text(encoding="utf8")
81
  text_zh = [elm.strip() for elm in text_zh.splitlines() if elm.strip()][:10]
82
  text_zh = "\n\n".join(text_zh)
@@ -136,73 +191,6 @@ def main():
136
  lines = 15
137
  placeholder = "Type or paste text here"
138
 
139
- inputs = [
140
- gr.inputs.Textbox(
141
- lines=lines, placeholder=placeholder, default=ex2_zh, label="text1"
142
- ),
143
- gr.inputs.Textbox(
144
- lines=lines, placeholder=placeholder, default=ex2_en, label="text2"
145
- ),
146
- gr.inputs.Slider(
147
- minimum=0.0,
148
- maximum=1.0,
149
- step=0.1,
150
- default=0.4,
151
- label="threshold",
152
- ),
153
- ]
154
-
155
- out_df = gr.outputs.Dataframe(
156
- headers=None,
157
- max_rows=lines, # 20
158
- max_cols=None,
159
- overflow_row_behaviour="paginate",
160
- type="auto",
161
- label="To be aligned",
162
- )
163
- aligned = gr.outputs.Dataframe(
164
- headers=None,
165
- max_rows=lines, # 20
166
- max_cols=None,
167
- overflow_row_behaviour="paginate",
168
- type="auto",
169
- label="Aligned",
170
- )
171
- leftover = gr.outputs.Dataframe(
172
- headers=None,
173
- max_rows=lines, # 20
174
- max_cols=None,
175
- overflow_row_behaviour="paginate",
176
- type="auto",
177
- label="Leftover",
178
- )
179
- outputs = [ # tot. 3
180
- out_df,
181
- aligned,
182
- leftover,
183
- ]
184
-
185
- _ = """ # switch to blocks
186
- iface = gr.Interface(
187
- fn=greet,
188
- # fn=ubee,
189
- title=title,
190
- theme=theme,
191
- layout="vertical", # horizontal unaligned
192
- description=description,
193
- article=article,
194
- # inputs="text",
195
- # outputs="text",
196
- inputs=inputs, # text1, text2, segment, thresh
197
- outputs=outputs,
198
- examples=examples,
199
- # enable_queue=True,
200
- )
201
- iface.launch(
202
- enable_queue=True,
203
- share=True,
204
- )
205
- # """
206
  blocks = gr.Blocks()
207
 
208
  with blocks:
@@ -212,27 +200,49 @@ def main():
212
 
213
  Align non-sequential dualtexts.
214
 
 
 
215
  """).strip()
216
  )
217
  with gr.Column():
218
  with gr.Row():
 
219
  text1 = gr.inputs.Textbox(
220
  lines=lines, placeholder=placeholder, default=ex1_zh, label="text1"
221
  ),
222
  text2 = gr.inputs.Textbox(
223
  lines=lines, placeholder=placeholder, default=ex1_en, label="text2"
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  )
225
  with gr.Row():
226
- slider = gr.inputs.Slider(
227
  minimum=0.0,
228
  maximum=1.0,
229
  step=0.1,
230
- default=0.4,
231
  label="threshold",
232
  )
233
  btn = gr.Button("Run")
 
234
  _ = """
235
  out_df = gr.outputs.Dataframe(
 
 
 
 
 
236
  headers=None,
237
  max_rows=lines, # 20
238
  max_cols=None,
@@ -241,6 +251,7 @@ def main():
241
  label="To be aligned",
242
  )
243
  # """
 
244
  with gr.Row():
245
  _ = """
246
  aligned = gr.Dataframe(
@@ -262,21 +273,51 @@ def main():
262
  # """
263
  aligned = gr.HTML(value="output aligned", label="Aligned")
264
  leftover = gr.HTML(value="output leftover", label="Leftover")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  btn.click(
266
  fn=ifn,
267
  inputs=[
268
  text1,
269
  text2,
 
270
  ],
271
  outputs=[
272
- out_df,
273
  aligned,
274
  leftover,
275
  ]
276
  )
277
 
 
 
 
278
 
279
  if __name__ == "__main__":
 
280
  main()
281
 
282
  _ = """
 
1
+ """Gen ubee main.
2
+
3
+ private
4
+ url = 'https://hf.space/embed/mikeee/zero-shot/+/api/predict'
5
+ resp = httpx.post(
6
+ url,
7
+ json={"data": ["love", ",".join(["liebe", "this is test", "hate you"]), False]},
8
+ timeout=httpx.Timeout(None, connect=3),
9
+ )
10
+ resp.json()
11
+ {'data': [{'label': 'liebe',
12
+ 'confidences': [{'label': 'liebe', 'confidence': 0.8688847422599792},
13
+ {'label': 'this is test', 'confidence': 0.12558135390281677},
14
+ {'label': 'hate you', 'confidence': 0.005533925257623196}]}],
15
+ 'duration': 0.265749454498291,
16
+ 'average_duration': 4.639325571060181}
17
+
18
+ """
19
  # pylint: disable=unused-import, wrong-import-position, wrong-import-order, too-many-locals, broad-except, line-too-long
20
 
21
  import sys
 
31
  from icecream import ic
32
  from icecream import install as ic_install
33
  from logzero import logger
34
+ from set_loglevel import set_loglevel
35
+ logzero.loglevel(set_loglevel())
36
 
37
  # for embeddable python
38
  # if "." not in sys.path: sys.path.insert(0, ".")
 
41
  from ubee.ubee import ubee
42
 
43
  # logzero.loglevel(10)
44
+ logger.debug(" debug on ")
45
+
46
  ic_install()
47
  ic.configureOutput(
48
  includeContext=True,
 
51
  ic.enable()
52
  # ic.disenable() # to turn off
53
 
54
+ ic(" ic.enabled ")
55
+
56
+ _ = """
57
+ ic("Testing...")
58
+ import model_pool
59
+ from model_pool import fetch_check_aux
60
+ print("model-pool version", model_pool.__version__)
61
+ print("gradio version", gr.__version__)
62
+
63
+ try:
64
+ fetch_check_aux.fetch_check_aux()
65
+ except Exception as _:
66
+ ic(["fetch_check_aux.fetch_check_aux", _])
67
+
68
+ from model_pool.load_model import load_model
69
+ try:
70
+ clas = load_model("clas-l-user")
71
+ except Exception as _:
72
+ ic(["load_model(\"clas-l-user\")", _])
73
+ # """
74
+
75
+ # _ = clas("love", ["liebe", "hate you", "test"])
76
+ # print(_)
77
+ # raise SystemExit("Exit by intention")
78
+ # {'sequence': 'love', 'labels': ['liebe', 'test', 'hate you'],
79
+ # 'scores': [0.8885253667831421, 0.10581762343645096, 0.005657028406858444]}
80
+ # Runs OK
81
 
82
+ # text1 = ""
83
+ # text2 = ""
84
+ # thresh: float = 0.4
85
 
86
 
87
+ # segment: str
88
  def ifn(
89
  text1,
90
  text2,
91
+ thresh
 
92
  ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
93
  """Take inputs, return outputs.
94
 
 
108
  # return _
109
 
110
  res1_, res2_ = ubee(res1, res2, thresh)
111
+ # res1_, res2_ = res1, res2
112
 
113
  out_df = pd.DataFrame(
114
  zip_longest(res1, res2),
 
121
  _ = None
122
 
123
  # return out_df, pd.DataFrame(res1_, columns=["text1", "text2", "likelihood"]), _
124
+ <<<<<<< HEAD
125
  return pd.DataFrame(res1_, columns=["text1", "text2", "likelihood"]).to_html(), _.to_html()
126
+ =======
127
+ return pd.DataFrame(res1_, columns=["text1", "text2", "likelihood"]), _
128
+ >>>>>>> 1b2837ac7145a0ce5ead7088aaa41165a6615ad4
129
 
130
 
131
  def main():
132
  """Create main entry."""
133
+ # global text1, text2, threash
134
+
135
  text_zh = Path("data/test_zh.txt").read_text(encoding="utf8")
136
  text_zh = [elm.strip() for elm in text_zh.splitlines() if elm.strip()][:10]
137
  text_zh = "\n\n".join(text_zh)
 
191
  lines = 15
192
  placeholder = "Type or paste text here"
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  blocks = gr.Blocks()
195
 
196
  with blocks:
 
200
 
201
  Align non-sequential dualtexts.
202
 
203
+ 可对词、句、段,每个词(或句或段)一行。可对任意语言对(英中、英德、德法、中日……等等)。建议 threshold 门槛值 -- 词: 0.3,句:0.5, 段: 0.7。如果太多 leftover,可适当调小 threshold。 如果太多误对则可以适当调大 threshold。
204
+
205
  """).strip()
206
  )
207
  with gr.Column():
208
  with gr.Row():
209
+ <<<<<<< HEAD
210
  text1 = gr.inputs.Textbox(
211
  lines=lines, placeholder=placeholder, default=ex1_zh, label="text1"
212
  ),
213
  text2 = gr.inputs.Textbox(
214
  lines=lines, placeholder=placeholder, default=ex1_en, label="text2"
215
+ =======
216
+ text1 = gr.Textbox(
217
+ lines=lines,
218
+ placeholder=placeholder,
219
+ value=ex2_zh,
220
+ label="text1"
221
+ )
222
+ text2 = gr.Textbox(
223
+ lines=lines,
224
+ placeholder=placeholder,
225
+ value=ex2_en,
226
+ label="text2"
227
+ >>>>>>> 1b2837ac7145a0ce5ead7088aaa41165a6615ad4
228
  )
229
  with gr.Row():
230
+ thresh = gr.Slider(
231
  minimum=0.0,
232
  maximum=1.0,
233
  step=0.1,
234
+ value=0.4,
235
  label="threshold",
236
  )
237
  btn = gr.Button("Run")
238
+ <<<<<<< HEAD
239
  _ = """
240
  out_df = gr.outputs.Dataframe(
241
+ =======
242
+
243
+ _ = """
244
+ out_df = gr.Dataframe(
245
+ >>>>>>> 1b2837ac7145a0ce5ead7088aaa41165a6615ad4
246
  headers=None,
247
  max_rows=lines, # 20
248
  max_cols=None,
 
251
  label="To be aligned",
252
  )
253
  # """
254
+ <<<<<<< HEAD
255
  with gr.Row():
256
  _ = """
257
  aligned = gr.Dataframe(
 
273
  # """
274
  aligned = gr.HTML(value="output aligned", label="Aligned")
275
  leftover = gr.HTML(value="output leftover", label="Leftover")
276
+ =======
277
+
278
+ # with gr.Row():
279
+ aligned = gr.Dataframe(
280
+ headers=None,
281
+ max_rows=lines, # 20
282
+ max_cols=None,
283
+ overflow_row_behaviour="paginate",
284
+ type="auto",
285
+ label="Aligned",
286
+ )
287
+
288
+ leftover = gr.Dataframe(
289
+ headers=None,
290
+ max_rows=lines, # 20
291
+ max_cols=None,
292
+ overflow_row_behaviour="paginate",
293
+ type="auto",
294
+ label="Leftover",
295
+ )
296
+
297
+ logger.debug("text1: %s", text1)
298
+ logger.debug("text2: %s", text2)
299
+
300
+ >>>>>>> 1b2837ac7145a0ce5ead7088aaa41165a6615ad4
301
  btn.click(
302
  fn=ifn,
303
  inputs=[
304
  text1,
305
  text2,
306
+ thresh,
307
  ],
308
  outputs=[
309
+ # out_df,
310
  aligned,
311
  leftover,
312
  ]
313
  )
314
 
315
+ # blocks.launch()
316
+ blocks.launch(debug=True, enable_queue=True)
317
+
318
 
319
  if __name__ == "__main__":
320
+ logger.info(" Start main()")
321
  main()
322
 
323
  _ = """
ubee/uclas.py CHANGED
@@ -13,9 +13,14 @@ from model_pool.load_model import load_model
13
  from model_pool.model_s import load_model_s
14
  from sklearn.metrics.pairwise import cosine_similarity
15
 
16
- logzero.loglevel(20)
 
 
 
 
 
 
17
 
18
- fetch_check_aux("/home/user")
19
  model_s = load_model_s()
20
  clas = load_model("clas-l-user")
21
 
 
13
  from model_pool.model_s import load_model_s
14
  from sklearn.metrics.pairwise import cosine_similarity
15
 
16
+ # logzero.loglevel(20)
17
+
18
+ # fetch_check_aux("/home/user")
19
+ try:
20
+ fetch_check_aux()
21
+ except Exception as _:
22
+ logger.error(_)
23
 
 
24
  model_s = load_model_s()
25
  clas = load_model("clas-l-user")
26