Spaces:
Build error
Build error
freemt
commited on
Commit
·
d7cdc67
1
Parent(s):
5821b23
Switch to blocks, attempt
Browse files- .stignore +101 -0
- install-sw.sh +23 -0
- install-sw1.sh +25 -0
- okteto.yml +44 -0
- poetry.toml +3 -0
- requirements.txt +8 -9
- ubee/__main__.py +7 -9
- ubee/seg_text.py +3 -4
- ubee/ubee.py +4 -3
- ubee/uclas.py +7 -7
.stignore
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.git
|
| 2 |
+
# Byte-compiled / optimized / DLL files
|
| 3 |
+
__pycache__
|
| 4 |
+
*.py[cod]
|
| 5 |
+
*$py.class
|
| 6 |
+
|
| 7 |
+
# C extensions
|
| 8 |
+
*.so
|
| 9 |
+
|
| 10 |
+
# Distribution / packaging
|
| 11 |
+
.Python
|
| 12 |
+
build
|
| 13 |
+
develop-eggs
|
| 14 |
+
dist
|
| 15 |
+
downloads
|
| 16 |
+
eggs
|
| 17 |
+
.eggs
|
| 18 |
+
lib
|
| 19 |
+
lib64
|
| 20 |
+
parts
|
| 21 |
+
sdist
|
| 22 |
+
var
|
| 23 |
+
wheels
|
| 24 |
+
pip-wheel-metadata
|
| 25 |
+
share/python-wheels
|
| 26 |
+
*.egg-info
|
| 27 |
+
.installed.cfg
|
| 28 |
+
*.egg
|
| 29 |
+
MANIFEST
|
| 30 |
+
|
| 31 |
+
# PyInstaller
|
| 32 |
+
# Usually these files are written by a python script from a template
|
| 33 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 34 |
+
*.manifest
|
| 35 |
+
*.spec
|
| 36 |
+
|
| 37 |
+
# Installer logs
|
| 38 |
+
pip-log.txt
|
| 39 |
+
pip-delete-this-directory.txt
|
| 40 |
+
|
| 41 |
+
# Translations
|
| 42 |
+
*.mo
|
| 43 |
+
*.pot
|
| 44 |
+
|
| 45 |
+
# Django stuff:
|
| 46 |
+
*.log
|
| 47 |
+
local_settings.py
|
| 48 |
+
db.sqlite3
|
| 49 |
+
|
| 50 |
+
# Flask stuff:
|
| 51 |
+
instance
|
| 52 |
+
.webassets-cache
|
| 53 |
+
|
| 54 |
+
# Scrapy stuff:
|
| 55 |
+
.scrapy
|
| 56 |
+
|
| 57 |
+
# Sphinx documentation
|
| 58 |
+
docs/_build
|
| 59 |
+
|
| 60 |
+
# PyBuilder
|
| 61 |
+
target
|
| 62 |
+
|
| 63 |
+
# Jupyter Notebook
|
| 64 |
+
.ipynb_checkpoints
|
| 65 |
+
|
| 66 |
+
# IPython
|
| 67 |
+
profile_default
|
| 68 |
+
ipython_config.py
|
| 69 |
+
|
| 70 |
+
# pyenv
|
| 71 |
+
.python-version
|
| 72 |
+
|
| 73 |
+
# celery beat schedule file
|
| 74 |
+
celerybeat-schedule
|
| 75 |
+
|
| 76 |
+
# SageMath parsed files
|
| 77 |
+
*.sage.py
|
| 78 |
+
|
| 79 |
+
# Environments
|
| 80 |
+
.env
|
| 81 |
+
.venv
|
| 82 |
+
env
|
| 83 |
+
venv
|
| 84 |
+
ENV
|
| 85 |
+
env.bak
|
| 86 |
+
venv.bak
|
| 87 |
+
|
| 88 |
+
# Spyder project settings
|
| 89 |
+
.spyderproject
|
| 90 |
+
.spyproject
|
| 91 |
+
|
| 92 |
+
# Rope project settings
|
| 93 |
+
.ropeproject
|
| 94 |
+
|
| 95 |
+
# mypy
|
| 96 |
+
.mypy_cache
|
| 97 |
+
.dmypy.json
|
| 98 |
+
dmypy.json
|
| 99 |
+
|
| 100 |
+
# Pyre type checker
|
| 101 |
+
.pyre
|
install-sw.sh
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pip install pipx
|
| 2 |
+
# pipx install poetry
|
| 3 |
+
# pipx ensurepath
|
| 4 |
+
# source ~/.bashrc
|
| 5 |
+
|
| 6 |
+
# curl -sSL https://install.python-poetry.org | python3 -
|
| 7 |
+
# -C- continue -S show error -o output
|
| 8 |
+
curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
|
| 9 |
+
python install-poetry.py
|
| 10 |
+
rm install-poetry.py
|
| 11 |
+
echo export PATH=~/.local/bin:$PATH > ~/.bashrc
|
| 12 |
+
source ~/.bashrc
|
| 13 |
+
# ~/.local/bin/poetry install
|
| 14 |
+
|
| 15 |
+
wget -c https://deb.nodesource.com/setup_12.x
|
| 16 |
+
bash setup_12.x
|
| 17 |
+
apt-get install -y nodejs
|
| 18 |
+
npm install -g npm@latest
|
| 19 |
+
npm install -g nodemon
|
| 20 |
+
rm setup_12.x
|
| 21 |
+
|
| 22 |
+
# apt upate # alerady done in apt-get install -y nodejs
|
| 23 |
+
apt install byobu -y > /dev/null 2>&1
|
install-sw1.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pip install pipx
|
| 2 |
+
# pipx install poetry
|
| 3 |
+
# pipx ensurepath
|
| 4 |
+
# source ~/.bashrc
|
| 5 |
+
|
| 6 |
+
# curl -sSL https://install.python-poetry.org | python3 -
|
| 7 |
+
# -C- continue -S show error -o output
|
| 8 |
+
curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
|
| 9 |
+
python install-poetry.py
|
| 10 |
+
rm install-poetry.py
|
| 11 |
+
echo export PATH=~/.local/bin:$PATH > ~/.bashrc
|
| 12 |
+
source ~/.bashrc
|
| 13 |
+
# ~/.local/bin/poetry install
|
| 14 |
+
|
| 15 |
+
wget -c https://deb.nodesource.com/setup_12.x
|
| 16 |
+
bash setup_12.x
|
| 17 |
+
apt-get install -y nodejs
|
| 18 |
+
npm install -g npm@latest
|
| 19 |
+
npm install -g nodemon
|
| 20 |
+
rm setup_12.x
|
| 21 |
+
|
| 22 |
+
# apt update # alerady done in apt-get install -y nodejs
|
| 23 |
+
apt install byobu -y > /dev/null 2>&1
|
| 24 |
+
byobu-enable
|
| 25 |
+
byobu
|
okteto.yml
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: gradio-cmat
|
| 2 |
+
|
| 3 |
+
# The build section defines how to build the images of
|
| 4 |
+
# your development environment
|
| 5 |
+
# More info: https://www.okteto.com/docs/reference/manifest/#build
|
| 6 |
+
# build:
|
| 7 |
+
# my-service:
|
| 8 |
+
# context: .
|
| 9 |
+
|
| 10 |
+
# The deploy section defines how to deploy your development environment
|
| 11 |
+
# More info: https://www.okteto.com/docs/reference/manifest/#deploy
|
| 12 |
+
# deploy:
|
| 13 |
+
# commands:
|
| 14 |
+
# - name: Deploy
|
| 15 |
+
# command: echo 'Replace this line with the proper 'helm'
|
| 16 |
+
|
| 17 |
+
# or 'kubectl' commands to deploy your development environment'
|
| 18 |
+
|
| 19 |
+
# The dependencies section defines other git repositories to be
|
| 20 |
+
# deployed as part of your development environment
|
| 21 |
+
# More info: https://www.okteto.com/docs/reference/manifest/#dependencies
|
| 22 |
+
# dependencies:
|
| 23 |
+
# - https://github.com/okteto/sample
|
| 24 |
+
# The dev section defines how to activate a development container
|
| 25 |
+
# More info: https://www.okteto.com/docs/reference/manifest/#dev
|
| 26 |
+
dev:
|
| 27 |
+
gradio-cmat:
|
| 28 |
+
# image: okteto/dev:latest
|
| 29 |
+
# image: python:3.8.13-bullseye
|
| 30 |
+
# image: simbachain/poetry-3.8
|
| 31 |
+
image: python:3.8
|
| 32 |
+
command: bash
|
| 33 |
+
workdir: /usr/src/app
|
| 34 |
+
sync:
|
| 35 |
+
- .:/usr/src/app
|
| 36 |
+
environment:
|
| 37 |
+
- name=$USER
|
| 38 |
+
forward:
|
| 39 |
+
- 7861:7861
|
| 40 |
+
- 7860:7860
|
| 41 |
+
- 8501:8501
|
| 42 |
+
reverse:
|
| 43 |
+
- 9000:9000
|
| 44 |
+
autocreate: true
|
poetry.toml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[virtualenvs]
|
| 2 |
+
create = true
|
| 3 |
+
in-project = true
|
requirements.txt
CHANGED
|
@@ -3,10 +3,8 @@ install
|
|
| 3 |
transformers
|
| 4 |
sentencepiece
|
| 5 |
sklearn
|
| 6 |
-
git+https://github.com/ffreemt/fast-langid
|
| 7 |
git+https://github.com/ffreemt/align-model-pool
|
| 8 |
sentence-transformers
|
| 9 |
-
sentence_splitter
|
| 10 |
logzero
|
| 11 |
icecream
|
| 12 |
alive-progress
|
|
@@ -14,10 +12,11 @@ more_itertools
|
|
| 14 |
#
|
| 15 |
openpyxl
|
| 16 |
# --- seg_text
|
| 17 |
-
Morfessor
|
| 18 |
-
pyicu
|
| 19 |
-
pycld2
|
| 20 |
-
tqdm
|
| 21 |
-
polyglot
|
| 22 |
-
sentence_splitter
|
| 23 |
-
pyfunctional
|
|
|
|
|
|
| 3 |
transformers
|
| 4 |
sentencepiece
|
| 5 |
sklearn
|
|
|
|
| 6 |
git+https://github.com/ffreemt/align-model-pool
|
| 7 |
sentence-transformers
|
|
|
|
| 8 |
logzero
|
| 9 |
icecream
|
| 10 |
alive-progress
|
|
|
|
| 12 |
#
|
| 13 |
openpyxl
|
| 14 |
# --- seg_text
|
| 15 |
+
# Morfessor
|
| 16 |
+
# pyicu
|
| 17 |
+
# pycld2
|
| 18 |
+
# tqdm
|
| 19 |
+
# polyglot
|
| 20 |
+
# sentence_splitter
|
| 21 |
+
# pyfunctional
|
| 22 |
+
# git+https://github.com/ffreemt/fast-langid
|
ubee/__main__.py
CHANGED
|
@@ -1,20 +1,18 @@
|
|
| 1 |
"""Gen ubee main."""
|
| 2 |
-
# pylint: disable=unused-import, wrong-import-position, wrong-import-order, too-many-locals, broad-except
|
| 3 |
|
| 4 |
-
from typing import Tuple, Optional
|
| 5 |
-
|
| 6 |
-
from pathlib import Path
|
| 7 |
import sys
|
| 8 |
-
from random import shuffle
|
| 9 |
-
|
| 10 |
from itertools import zip_longest
|
|
|
|
|
|
|
| 11 |
from textwrap import dedent
|
|
|
|
| 12 |
|
| 13 |
import gradio as gr
|
| 14 |
-
|
| 15 |
-
import pandas as pd
|
| 16 |
-
from icecream import install as ic_install, ic
|
| 17 |
import logzero
|
|
|
|
|
|
|
|
|
|
| 18 |
from logzero import logger
|
| 19 |
|
| 20 |
# for embeddable python
|
|
|
|
| 1 |
"""Gen ubee main."""
|
| 2 |
+
# pylint: disable=unused-import, wrong-import-position, wrong-import-order, too-many-locals, broad-except, line-too-long
|
| 3 |
|
|
|
|
|
|
|
|
|
|
| 4 |
import sys
|
|
|
|
|
|
|
| 5 |
from itertools import zip_longest
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from random import shuffle
|
| 8 |
from textwrap import dedent
|
| 9 |
+
from typing import Optional, Tuple
|
| 10 |
|
| 11 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
| 12 |
import logzero
|
| 13 |
+
import pandas as pd
|
| 14 |
+
from icecream import ic
|
| 15 |
+
from icecream import install as ic_install
|
| 16 |
from logzero import logger
|
| 17 |
|
| 18 |
# for embeddable python
|
ubee/seg_text.py
CHANGED
|
@@ -9,15 +9,14 @@ else use polyglot.text.Text
|
|
| 9 |
"""
|
| 10 |
# pylint: disable=
|
| 11 |
|
|
|
|
| 12 |
from typing import List, Optional, Union
|
| 13 |
|
| 14 |
-
import
|
| 15 |
-
from tqdm.auto import tqdm
|
| 16 |
from polyglot.detect.base import logger as polyglot_logger
|
| 17 |
from polyglot.text import Detector, Text
|
| 18 |
from sentence_splitter import split_text_into_sentences
|
| 19 |
-
|
| 20 |
-
from logzero import logger
|
| 21 |
|
| 22 |
# turn of polyglot.text.Detector warning
|
| 23 |
polyglot_logger.setLevel("ERROR")
|
|
|
|
| 9 |
"""
|
| 10 |
# pylint: disable=
|
| 11 |
|
| 12 |
+
import re
|
| 13 |
from typing import List, Optional, Union
|
| 14 |
|
| 15 |
+
from logzero import logger
|
|
|
|
| 16 |
from polyglot.detect.base import logger as polyglot_logger
|
| 17 |
from polyglot.text import Detector, Text
|
| 18 |
from sentence_splitter import split_text_into_sentences
|
| 19 |
+
from tqdm.auto import tqdm
|
|
|
|
| 20 |
|
| 21 |
# turn of polyglot.text.Detector warning
|
| 22 |
polyglot_logger.setLevel("ERROR")
|
ubee/ubee.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
"""Align via ubee,"""
|
| 2 |
# pylint: disable=
|
| 3 |
-
from typing import Iterable, List, Tuple
|
| 4 |
from itertools import zip_longest
|
|
|
|
| 5 |
|
|
|
|
| 6 |
from logzero import logger
|
|
|
|
| 7 |
from ubee.uclas import uclas
|
| 8 |
-
from icecream import ic
|
| 9 |
|
| 10 |
|
| 11 |
def ubee(
|
|
@@ -17,7 +18,7 @@ def ubee(
|
|
| 17 |
|
| 18 |
Args:
|
| 19 |
sents_zh: list of text, can be any langauge supported by clas-l-user
|
| 20 |
-
|
| 21 |
Returns:
|
| 22 |
three tuples of aligned blocked
|
| 23 |
leftovers (unaligned)
|
|
|
|
| 1 |
"""Align via ubee,"""
|
| 2 |
# pylint: disable=
|
|
|
|
| 3 |
from itertools import zip_longest
|
| 4 |
+
from typing import Iterable, List, Tuple
|
| 5 |
|
| 6 |
+
from icecream import ic
|
| 7 |
from logzero import logger
|
| 8 |
+
|
| 9 |
from ubee.uclas import uclas
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def ubee(
|
|
|
|
| 18 |
|
| 19 |
Args:
|
| 20 |
sents_zh: list of text, can be any langauge supported by clas-l-user
|
| 21 |
+
sents_en: ditto
|
| 22 |
Returns:
|
| 23 |
three tuples of aligned blocked
|
| 24 |
leftovers (unaligned)
|
ubee/uclas.py
CHANGED
|
@@ -2,16 +2,16 @@
|
|
| 2 |
# pylint: disable=invalid-name
|
| 3 |
|
| 4 |
from typing import List, Tuple, Union
|
| 5 |
-
import numpy as np
|
| 6 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
| 7 |
-
from joblib import Memory
|
| 8 |
-
|
| 9 |
-
from model_pool import fetch_check_aux # pylint: disable=import-error
|
| 10 |
-
from model_pool.model_s import load_model_s # pylint: disable=import-error
|
| 11 |
-
from model_pool.load_model import load_model # pylint: disable=import-error
|
| 12 |
|
| 13 |
import logzero
|
|
|
|
|
|
|
| 14 |
from logzero import logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
logzero.loglevel(20)
|
| 17 |
|
|
|
|
| 2 |
# pylint: disable=invalid-name
|
| 3 |
|
| 4 |
from typing import List, Tuple, Union
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
import logzero
|
| 7 |
+
import numpy as np
|
| 8 |
+
from joblib import Memory
|
| 9 |
from logzero import logger
|
| 10 |
+
# set PYTHONPATH=..\align-model-pool # in win10
|
| 11 |
+
from model_pool.fetch_check_aux import fetch_check_aux
|
| 12 |
+
from model_pool.load_model import load_model
|
| 13 |
+
from model_pool.model_s import load_model_s
|
| 14 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 15 |
|
| 16 |
logzero.loglevel(20)
|
| 17 |
|