diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..a790e320464ebc778ca07f5bcd826a9c8412ed0e --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +# example of file for storing private and user specific environment variables, like keys or system paths +# rename it to ".env" (excluded from version control by default) +# .env is loaded by train.py automatically +# hydra allows you to reference variables in .yaml configs with special syntax: ${oc.env:MY_VAR} + +MY_VAR="/home/user/my/system/path" diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index a6344aac8c09253b3b630fb776ae94478aa0275b..0000000000000000000000000000000000000000 --- a/.gitattributes +++ /dev/null @@ -1,35 +0,0 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..cbec8b43a0414bbbf4cc9feae49b9dc091a60c92 --- /dev/null +++ b/.gitignore @@ -0,0 +1,163 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +### VisualStudioCode +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace +**/.vscode + +# JetBrains +.idea/ + +# Data & Models +*.h5 +*.tar +*.tar.gz + +# Lightning-Hydra-Template +configs/local/default.yaml +/data/ +/logs/ +.env + +# Aim logging +.aim + +# Cython complied files +matcha/utils/monotonic_align/core.c + +# Ignoring hifigan checkpoint +generator_v1 +g_02500000 +gradio_cached_examples/ +synth_output/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e695f115eba12d84fe6f465c5d834dfa35c3d2ec --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,59 @@ +default_language_version: + python: python3.10 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + # list of supported hooks: https://pre-commit.com/hooks.html + - id: trailing-whitespace + - id: end-of-file-fixer + # - id: check-docstring-first + - id: check-yaml + - id: debug-statements + - id: detect-private-key + - id: check-toml + - id: check-case-conflict + - id: check-added-large-files + + # python code formatting + - repo: https://github.com/psf/black + rev: 23.12.1 + hooks: + - id: black + args: [--line-length, "120"] + + # python import sorting + - repo: https://github.com/PyCQA/isort + rev: 5.13.2 + hooks: + - id: isort + args: ["--profile", "black", "--filter-files"] + + # python upgrading syntax to newer version + - repo: https://github.com/asottile/pyupgrade + rev: v3.15.0 + hooks: + - id: pyupgrade + args: [--py38-plus] + + # python check (PEP8), programming errors and code complexity + - repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + args: + [ + "--max-line-length", "120", + "--extend-ignore", + "E203,E402,E501,F401,F841,RST2,RST301", + "--exclude", + "logs/*,data/*,matcha/hifigan/*", + ] + additional_dependencies: [flake8-rst-docstrings==0.3.0] + + # pylint + - repo: https://github.com/pycqa/pylint + rev: v3.0.3 + hooks: + - id: pylint diff --git a/.project-root b/.project-root new file mode 100644 index 0000000000000000000000000000000000000000..63eab774b9e36aa1a46cbd31b59cbd373bc5477f --- /dev/null +++ b/.project-root @@ -0,0 +1,2 @@ +# this file is required for inferring the project root directory +# do not delete diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000000000000000000000000000000000000..962864189eab99a66b315b80f5a9976e7a423d4a --- /dev/null +++ b/.pylintrc @@ -0,0 +1,525 @@ +[MASTER] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-whitelist= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Specify a configuration file. +#rcfile= + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=missing-docstring, + too-many-public-methods, + too-many-lines, + bare-except, + ## for avoiding weird p3.6 CI linter error + ## TODO: see later if we can remove this + assigning-non-slot, + unsupported-assignment-operation, + ## end + line-too-long, + fixme, + wrong-import-order, + ungrouped-imports, + wrong-import-position, + import-error, + invalid-name, + too-many-instance-attributes, + arguments-differ, + arguments-renamed, + no-name-in-module, + no-member, + unsubscriptable-object, + raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + useless-object-inheritance, + too-few-public-methods, + too-many-branches, + too-many-arguments, + too-many-locals, + too-many-statements, + duplicate-code, + not-callable, + import-outside-toplevel, + logging-fstring-interpolation, + logging-not-lazy, + unused-argument, + no-else-return, + chained-comparison, + redefined-outer-name + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[REPORTS] + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +#msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit + + +[LOGGING] + +# Format style used to check logging format string. `old` means using % +# formatting, while `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package.. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members=numpy.*,torch.* + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=120 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[SIMILARITIES] + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. +argument-rgx=[a-z_][a-z0-9_]{0,30}$ + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. +#class-attribute-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + x, + ex, + Run, + _ + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. +variable-rgx=[a-z_][a-z0-9_]{0,30}$ + + +[STRING] + +# This flag controls whether the implicit-str-concat-in-sequence should +# generate a warning on implicit string concatenation in sequences defined over +# several lines. +check-str-concat-over-line-jumps=no + + +[IMPORTS] + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules=optparse,tkinter.tix + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled). +ext-import-graph= + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled). +import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement. +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=15 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "BaseException, Exception". +overgeneral-exceptions=builtins.BaseException, + builtins.Exception diff --git a/Create_dataset/__init__.py b/Create_dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/Create_dataset/__init__.py @@ -0,0 +1 @@ + diff --git a/Create_dataset/cr_dataset_script.py b/Create_dataset/cr_dataset_script.py new file mode 100644 index 0000000000000000000000000000000000000000..12b5c4de0e1bca21ca4c4553ca8209adbfb1ad25 --- /dev/null +++ b/Create_dataset/cr_dataset_script.py @@ -0,0 +1,97 @@ +import pandas as pd +import numpy as np +from datasets import load_dataset +from datasets import Dataset, DatasetDict +from IPython.display import Audio +import scipy +import librosa +from tqdm import tqdm +import re +import os + + +def load_audio(audio_dict:dict)->None: + target_sr = 22050 + audio_resampled = librosa.resample(np.array(audio_dict['array']), + orig_sr=audio_dict['sampling_rate'], + target_sr=target_sr) + scipy.io.wavfile.write(audio_dict['path'], + rate=target_sr, + data=(audio_resampled* 32767).astype(np.int16)) + +def remove_outer_quotes_regex(sen:str)->str: + return re.sub(r'^["\'](.*)["\']$', r'\1', sen) + +def main()->None: + os.mkdir('kany_dataset') + os.chdir('kany_dataset') + os.mkdir('wavs') + os.chdir('wavs') + + + art = """ + /\_/\ + ( o.o ) + > ^ < + + V O I C E + """ + print(art) + + print('--- LOADING DATASET ---') + dataset_kany = load_dataset("Simonlob/Kany_dataset_mk4") + + # mk TRAIN + print() + print('--- CONVERTIND AND SAVING THE TRAIN DATASET ---') + num_shards=20 + path = [] + text = [] + + with tqdm(total=len(dataset_kany['train']), leave=False) as pbar: + for ind in range(num_shards): + dataset_shard = dataset_kany['train'].shard(num_shards=num_shards, index=ind) + for row in dataset_shard: + load_audio(row['audio']) + path.append(row['audio']['path']) + text.append(row['raw_transcription']) + pbar.update(1) + + + absolute_path = os.path.abspath('../') + os.chdir(absolute_path) + + dir = f'{absolute_path}/wavs/' + df = pd.DataFrame({'path':path, 'text':text}) + df.text = df.text.map(remove_outer_quotes_regex) + df.path = dir + df.path + df.to_csv('kany_filelist_train.txt', sep='|', header=None, index=False) + + # mk TEST + os.chdir(dir) + path = [] + text = [] + print() + print('--- CONVERTIND AND SAVING THE TEST DATASET ---') + with tqdm(total=len(dataset_kany['test']), leave=False) as pbar2: + for row in tqdm(dataset_kany['test']): + load_audio(row['audio']) + path.append(row['audio']['path']) + text.append(row['raw_transcription']) + pbar2.update(1) + + os.chdir(absolute_path) + df = pd.DataFrame({'path':path, 'text':text}) + df.text = df.text.map(remove_outer_quotes_regex) + df.path = dir + df.path + df.to_csv('kany_filelist_test.txt', sep='|', header=None, index=False) + print() + print('--- THE DATASET IS READY ---') + print(f'Dir of data is "{absolute_path}"') + + absolute_path_home = os.path.abspath('../') + os.chdir(absolute_path_home) + + +if __name__ == "__main__": + main() diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..858018e750da7be7b271bb7307e68d159ed67ef6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Shivam Mehta + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..c013140cdfb9de19c4d4e73c73a44e33f33fa871 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,14 @@ +include README.md +include LICENSE.txt +include requirements.*.txt +include *.cff +include requirements.txt +include matcha/VERSION +recursive-include matcha *.json +recursive-include matcha *.html +recursive-include matcha *.png +recursive-include matcha *.md +recursive-include matcha *.py +recursive-include matcha *.pyx +recursive-exclude tests * +prune tests* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4b523dd17b13a19617c9cc9d9dad7f7d8d4c24a0 --- /dev/null +++ b/Makefile @@ -0,0 +1,42 @@ + +help: ## Show help + @grep -E '^[.a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +clean: ## Clean autogenerated files + rm -rf dist + find . -type f -name "*.DS_Store" -ls -delete + find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf + find . | grep -E ".pytest_cache" | xargs rm -rf + find . | grep -E ".ipynb_checkpoints" | xargs rm -rf + rm -f .coverage + +clean-logs: ## Clean logs + rm -rf logs/** + +create-package: ## Create wheel and tar gz + rm -rf dist/ + python setup.py bdist_wheel --plat-name=manylinux1_x86_64 + python setup.py sdist + python -m twine upload dist/* --verbose --skip-existing + +format: ## Run pre-commit hooks + pre-commit run -a + +sync: ## Merge changes from main branch to your current branch + git pull + git pull origin main + +test: ## Run not slow tests + pytest -k "not slow" + +test-full: ## Run all tests + pytest + +train-ljspeech: ## Train the model + python matcha/train.py experiment=ljspeech + +train-ljspeech-min: ## Train the model with minimum memory + python matcha/train.py experiment=ljspeech_min_memory + +start_app: ## Start the app + python matcha/app.py diff --git a/README.md b/README.md index bb8522cbfaac275be25c05256c729130618476ee..7fc5d00cc157200c02cd0cb96fdab8f3448e91ab 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,70 @@ ---- -title: Akylai Tts Mini -emoji: 📈 -colorFrom: gray -colorTo: gray -sdk: gradio -sdk_version: 4.26.0 -app_file: app.py -pinned: false -license: apache-2.0 ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +
+ + + +# 🍵 Matcha-TTS: A fast TTS architecture with conditional flow matching + +### [Shivam Mehta](https://www.kth.se/profile/smehta), [Ruibo Tu](https://www.kth.se/profile/ruibo), [Jonas Beskow](https://www.kth.se/profile/beskow), [Éva Székely](https://www.kth.se/profile/szekely), and [Gustav Eje Henter](https://people.kth.se/~ghe/) + +[![python](https://img.shields.io/badge/-Python_3.10-blue?logo=python&logoColor=white)](https://www.python.org/downloads/release/python-3100/) +[![pytorch](https://img.shields.io/badge/PyTorch_2.0+-ee4c2c?logo=pytorch&logoColor=white)](https://pytorch.org/get-started/locally/) +[![lightning](https://img.shields.io/badge/-Lightning_2.0+-792ee5?logo=pytorchlightning&logoColor=white)](https://pytorchlightning.ai/) +[![hydra](https://img.shields.io/badge/Config-Hydra_1.3-89b8cd)](https://hydra.cc/) +[![black](https://img.shields.io/badge/Code%20Style-Black-black.svg?labelColor=gray)](https://black.readthedocs.io/en/stable/) +[![isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/) + +

+ +

+ +
+ + + +# Matcha-TTS for Kyrgyz language + +## Train with Kany Dataset + +The training dataset has 7016 samples and 13 hours of speech. All settings for training have already been made. + +## Process by Terminal + +* **Load this repo and connect to HF** + +``` +git clone https://github.com/simonlobgromov/Matcha-TTS +cd Matcha-TTS +pip install -e . +``` +!!!The environment will be restarted!!! + +Install this: + +``` +apt-get install espeak-ng +``` +Connect to HF + +``` +git config --global credential.helper store +huggingface-cli login +``` + +* **Load the Data** + +``` +create-dataset + +# If you see a cat, then everything is fine! +``` + +* **Train** + +``` +python matcha/train.py experiment=akylai +``` + +* **Checkpoints** + +Checkpoints will be saved in `./Matcha-TTS/logs/train/akylai/runs/_