Spaces:
Build error
Build error
| """Align via ubee,""" | |
| # pylint: disable= | |
| from itertools import zip_longest | |
| from typing import Iterable, List, Tuple | |
| from icecream import ic | |
| from logzero import logger | |
| from ubee.uclas import uclas | |
| def ubee( | |
| sents_zh: Iterable, | |
| sents_en: Iterable, | |
| thresh: float = 0.5, | |
| ) -> Tuple[List[Tuple[str, str, float]], List[Tuple[str, str]]]: | |
| """Align blocks. | |
| Args: | |
| sents_zh: list of text, can be any langauge supported by clas-l-user | |
| sents_en: ditto | |
| Returns: | |
| three tuples of aligned blocked | |
| leftovers (unaligned) | |
| """ | |
| res = [] | |
| labels = [*sents_en] | |
| lo1 = [] | |
| lo2 = labels[:] | |
| for seq in sents_zh: | |
| ic(seq) | |
| label, likelihood = uclas(seq, labels, thresh=thresh) | |
| if label: | |
| likelihood = round(float(likelihood), 2) | |
| res.append((seq, label, likelihood)) | |
| try: | |
| lo2.remove(label) | |
| except Exception as exc: | |
| logger.error(exc) | |
| logger.info("seq: %s, lable: %s", seq, label) | |
| else: | |
| lo1.append(seq) | |
| return res, [*zip_longest(lo1, lo2)] | |