SadTalker

Build error

App Files Files Community

liang

vinthony commited on Apr 14, 2023

Commit

a31c0b9

0 Parent(s):

Duplicate from vinthony/SadTalker

Browse files

Co-authored-by: ShadowC <[email protected]>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +52 -0
.gitignore +159 -0
Dockerfile +59 -0
LICENSE +21 -0
README.md +15 -0
app.py +160 -0
checkpoints/BFM_Fitting/01_MorphableModel.mat +1 -0
checkpoints/BFM_Fitting/BFM09_model_info.mat +1 -0
checkpoints/BFM_Fitting/BFM_exp_idx.mat +1 -0
checkpoints/BFM_Fitting/BFM_front_idx.mat +1 -0
checkpoints/BFM_Fitting/Exp_Pca.bin +1 -0
checkpoints/BFM_Fitting/facemodel_info.mat +1 -0
checkpoints/BFM_Fitting/select_vertex_id.mat +1 -0
checkpoints/BFM_Fitting/similarity_Lm3D_all.mat +1 -0
checkpoints/BFM_Fitting/std_exp.txt +1 -0
checkpoints/auido2exp_00300-model.pth +1 -0
checkpoints/auido2pose_00140-model.pth +1 -0
checkpoints/epoch_20.pth +1 -0
checkpoints/facevid2vid_00189-model.pth.tar +1 -0
checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip +1 -0
checkpoints/hub/checkpoints/s3fd-619a316812.pth +1 -0
checkpoints/mapping_00229-model.pth.tar +1 -0
checkpoints/shape_predictor_68_face_landmarks.dat +1 -0
checkpoints/wav2lip.pth +1 -0
config/auido2exp.yaml +58 -0
config/auido2pose.yaml +49 -0
config/facerender.yaml +45 -0
docs/sadtalker_logo.png +0 -0
examples/driven_audio/RD_Radio31_000.wav +0 -0
examples/driven_audio/RD_Radio34_002.wav +0 -0
examples/driven_audio/RD_Radio36_000.wav +0 -0
examples/driven_audio/RD_Radio40_000.wav +0 -0
examples/driven_audio/bus_chinese.wav +0 -0
examples/driven_audio/chinese_news.wav +3 -0
examples/driven_audio/chinese_poem1.wav +0 -0
examples/driven_audio/chinese_poem2.wav +0 -0
examples/driven_audio/deyu.wav +3 -0
examples/driven_audio/eluosi.wav +3 -0
examples/driven_audio/fayu.wav +3 -0
examples/driven_audio/imagine.wav +3 -0
examples/driven_audio/itosinger1.wav +0 -0
examples/driven_audio/japanese.wav +3 -0
examples/source_image/art_0.png +0 -0
examples/source_image/art_1.png +0 -0
examples/source_image/art_10.png +0 -0
examples/source_image/art_11.png +0 -0
examples/source_image/art_12.png +0 -0
examples/source_image/art_13.png +0 -0
examples/source_image/art_14.png +0 -0
examples/source_image/art_15.png +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,52 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoints/BFM_Fitting/01_MorphableModel.mat filter=lfs diff=lfs merge=lfs -text
+checkpoints/BFM_Fitting/BFM09_model_info.mat filter=lfs diff=lfs merge=lfs -text
+checkpoints/facevid2vid_00189-model.pth.tar filter=lfs diff=lfs merge=lfs -text
+checkpoints/mapping_00229-model.pth.tar filter=lfs diff=lfs merge=lfs -text
+checkpoints/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/chinese_news.wav filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/deyu.wav filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/eluosi.wav filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/fayu.wav filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/imagine.wav filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/japanese.wav filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_16.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_17.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_3.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_4.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_5.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_8.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_9.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,159 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+results/
+checkpoints/
+gradio_cached_examples/
+gfpgan/
+start.sh

Dockerfile ADDED Viewed

	@@ -0,0 +1,59 @@

+FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends \
+    git \
+    zip \
+    unzip \
+    git-lfs \
+    wget \
+    curl \
+    # ffmpeg \
+    ffmpeg \
+    x264 \
+    # python build dependencies \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:${PATH}
+WORKDIR ${HOME}/app
+RUN curl https://pyenv.run | bash
+ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
+ENV PYTHON_VERSION=3.10.9
+RUN pyenv install ${PYTHON_VERSION} && \
+    pyenv global ${PYTHON_VERSION} && \
+    pyenv rehash && \
+    pip install --no-cache-dir -U pip setuptools wheel
+RUN pip install --no-cache-dir -U torch==1.12.1 torchvision==0.13.1
+COPY --chown=1000 requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir -U -r /tmp/requirements.txt
+COPY --chown=1000 . ${HOME}/app
+RUN ls -a
+ENV PYTHONPATH=${HOME}/app \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    SYSTEM=spaces
+CMD ["python", "app.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Tencent AI Lab
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,15 @@

+---
+title: SadTalker
+emoji: 😭
+colorFrom: purple
+colorTo: green
+sdk: gradio
+sdk_version: 3.23.0
+app_file: app.py
+pinned: false
+license: mit
+duplicated_from: vinthony/SadTalker
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import os, sys
+import tempfile
+import gradio as gr
+from src.gradio_demo import SadTalker
+from src.utils.text2speech import TTSTalker
+from huggingface_hub import snapshot_download
+def get_source_image(image):
+        return image
+def download_model():
+    REPO_ID = 'vinthony/SadTalker'
+    snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
+def sadtalker_demo():
+    download_model()
+    sad_talker = SadTalker(lazy_load=True)
+    tts_talker = TTSTalker()
+    with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
+        gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
+                    <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
+                    <a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a>  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
+                     <a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
+        gr.Markdown("""
+        <b>You may duplicate the space and upgrade to GPU in settings for better performance and faster inference without waiting in the queue. <a style='display:inline-block' href="https://huggingface.co/spaces/vinthony/SadTalker?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></b> \
+        <br/><b>Alternatively, try our GitHub <a href=https://github.com/Winfredy/SadTalker> code </a> on your own GPU. </b> <a style='display:inline-block' href="https://github.com/Winfredy/SadTalker"><img src="https://img.shields.io/github/stars/Winfredy/SadTalker?style=social"/></a> \
+        """)
+        with gr.Row().style(equal_height=False):
+            with gr.Column(variant='panel'):
+                with gr.Tabs(elem_id="sadtalker_source_image"):
+                    with gr.TabItem('Upload image'):
+                        with gr.Row():
+                            source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
+                with gr.Tabs(elem_id="sadtalker_driven_audio"):
+                    with gr.TabItem('Upload or Generating from TTS'):
+                        with gr.Column(variant='panel'):
+                            driven_audio = gr.Audio(label="Input audio(.wav/.mp3)", source="upload", type="filepath")
+                        with gr.Column(variant='panel'):
+                            input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.")
+                            tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
+                            tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
+            with gr.Column(variant='panel'):
+                with gr.Tabs(elem_id="sadtalker_checkbox"):
+                    with gr.TabItem('Settings'):
+                        with gr.Column(variant='panel'):
+                            preprocess_type = gr.Radio(['crop','resize','full'], value='crop', label='preprocess', info="How to handle input image?")
+                            is_still_mode = gr.Checkbox(label="w/ Still Mode (fewer hand motion, works with preprocess `full`)")
+                            enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer")
+                            submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
+                with gr.Tabs(elem_id="sadtalker_genearted"):
+                        gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
+        with gr.Row():
+            examples = [
+                [
+                    'examples/source_image/full_body_1.png',
+                    'examples/driven_audio/bus_chinese.wav',
+                    'crop',
+                    True,
+                    False
+                ],
+                [
+                    'examples/source_image/full_body_2.png',
+                    'examples/driven_audio/japanese.wav',
+                    'crop',
+                    False,
+                    False
+                ],
+                [
+                    'examples/source_image/full3.png',
+                    'examples/driven_audio/deyu.wav',
+                    'crop',
+                    False,
+                    True
+                ],
+                [
+                    'examples/source_image/full4.jpeg',
+                    'examples/driven_audio/eluosi.wav',
+                    'full',
+                    False,
+                    True
+                ],
+                [
+                    'examples/source_image/full4.jpeg',
+                    'examples/driven_audio/imagine.wav',
+                    'full',
+                    True,
+                    True
+                ],
+                [
+                    'examples/source_image/full_body_1.png',
+                    'examples/driven_audio/bus_chinese.wav',
+                    'full',
+                    True,
+                    False
+                ],
+                [
+                    'examples/source_image/art_13.png',
+                    'examples/driven_audio/fayu.wav',
+                    'resize',
+                    True,
+                    False
+                ],
+                [
+                    'examples/source_image/art_5.png',
+                    'examples/driven_audio/chinese_news.wav',
+                    'resize',
+                    False,
+                    False
+                ],
+                [
+                    'examples/source_image/art_5.png',
+                    'examples/driven_audio/RD_Radio31_000.wav',
+                    'resize',
+                    True,
+                    True
+                ],
+            ]
+            gr.Examples(examples=examples,
+                        inputs=[
+                            source_image,
+                            driven_audio,
+                            preprocess_type,
+                            is_still_mode,
+                            enhancer],
+                        outputs=[gen_video],
+                        fn=sad_talker.test,
+                        cache_examples=os.getenv('SYSTEM') == 'spaces') #
+        submit.click(
+                    fn=sad_talker.test,
+                    inputs=[source_image,
+                            driven_audio,
+                            preprocess_type,
+                            is_still_mode,
+                            enhancer],
+                    outputs=[gen_video]
+                    )
+    return sadtalker_interface
+if __name__ == "__main__":
+    demo = sadtalker_demo()
+    demo.queue(max_size=10)
+    demo.launch(debug=True)

checkpoints/BFM_Fitting/01_MorphableModel.mat ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2

checkpoints/BFM_Fitting/BFM09_model_info.mat ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/db8d00544f0b0182f1b8430a3bb87662b3ff674eb33c84e6f52dbe2971adb81b

checkpoints/BFM_Fitting/BFM_exp_idx.mat ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/1146e4e9c3bef303a497383aa7974c014fe945c7

checkpoints/BFM_Fitting/BFM_front_idx.mat ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b9d7b0953dd1dc5b1e28144610485409ac321f9b

checkpoints/BFM_Fitting/Exp_Pca.bin ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726

checkpoints/BFM_Fitting/facemodel_info.mat ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/3e516ec7297fa3248098f49ecea10579f4831c0a

checkpoints/BFM_Fitting/select_vertex_id.mat ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/5b8b220093d93b133acc94ffed159f31a74854cd

checkpoints/BFM_Fitting/similarity_Lm3D_all.mat ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/a0e23588302bc71fc899eef53ff06df5f4df4c1d

checkpoints/BFM_Fitting/std_exp.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/767b8de4ea1ca78b6f22b98ff2dee4fa345500bb

checkpoints/auido2exp_00300-model.pth ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b7608f0e6b477e50e03ca569ac5b04a841b9217f89d502862fc78fda4e46dec4

checkpoints/auido2pose_00140-model.pth ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/4fba6701852dc57efbed25b1e4276e4ff752941860d69fc4429f08a02326ebce

checkpoints/epoch_20.pth ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b

checkpoints/facevid2vid_00189-model.pth.tar ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbad01d46f0510276dc4521322dde6824a873a4222cd0740c85762e7067ea71d

checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/cd938726adb1f15f361263cce2db9cb820c42585fa8796ec72ce19107f369a46

checkpoints/hub/checkpoints/s3fd-619a316812.pth ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/619a31681264d3f7f7fc7a16a42cbbe8b23f31a256f75a366e5a1bcd59b33543

checkpoints/mapping_00229-model.pth.tar ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1

checkpoints/shape_predictor_68_face_landmarks.dat ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f

checkpoints/wav2lip.pth ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b78b681b68ad9fe6c6fb1debc6ff43ad05834a8af8a62ffc4167b7b34ef63c37

config/auido2exp.yaml ADDED Viewed

	@@ -0,0 +1,58 @@

+DATASET:
+  TRAIN_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/file_list/train.txt
+  EVAL_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/file_list/val.txt
+  TRAIN_BATCH_SIZE: 32
+  EVAL_BATCH_SIZE: 32
+  EXP: True
+  EXP_DIM: 64
+  FRAME_LEN: 32
+  COEFF_LEN: 73
+  NUM_CLASSES: 46
+  AUDIO_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav
+  COEFF_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav2lip_3dmm
+  LMDB_PATH: /apdcephfs_cq2/share_1290939/shadowcun/datasets/VoxCeleb/v1/imdb
+  DEBUG: True
+  NUM_REPEATS: 2
+  T: 40
+MODEL:
+  FRAMEWORK: V2
+  AUDIOENCODER:
+    LEAKY_RELU: True
+    NORM: 'IN'
+  DISCRIMINATOR:
+    LEAKY_RELU: False
+    INPUT_CHANNELS: 6
+  CVAE:
+    AUDIO_EMB_IN_SIZE: 512
+    AUDIO_EMB_OUT_SIZE: 128
+    SEQ_LEN: 32
+    LATENT_SIZE: 256
+    ENCODER_LAYER_SIZES: [192, 1024]
+    DECODER_LAYER_SIZES: [1024, 192]
+TRAIN:
+  MAX_EPOCH: 300
+  GENERATOR:
+    LR: 2.0e-5
+  DISCRIMINATOR:
+    LR: 1.0e-5
+  LOSS:
+    W_FEAT: 0
+    W_COEFF_EXP: 2
+    W_LM: 1.0e-2
+    W_LM_MOUTH: 0
+    W_REG: 0
+    W_SYNC: 0
+    W_COLOR: 0
+    W_EXPRESSION: 0
+    W_LIPREADING: 0.01
+    W_LIPREADING_VV: 0
+    W_EYE_BLINK: 4
+TAG:
+  NAME:  small_dataset

config/auido2pose.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+DATASET:
+  TRAIN_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/audio2pose_unet_noAudio/dataset/train_33.txt
+  EVAL_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/audio2pose_unet_noAudio/dataset/val.txt
+  TRAIN_BATCH_SIZE: 64
+  EVAL_BATCH_SIZE: 1
+  EXP: True
+  EXP_DIM: 64
+  FRAME_LEN: 32
+  COEFF_LEN: 73
+  NUM_CLASSES: 46
+  AUDIO_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav
+  COEFF_ROOT_PATH: /apdcephfs_cq2/share_1290939/shadowcun/datasets/VoxCeleb/v1/imdb
+  DEBUG: True
+MODEL:
+  AUDIOENCODER:
+    LEAKY_RELU: True
+    NORM: 'IN'
+  DISCRIMINATOR:
+    LEAKY_RELU: False
+    INPUT_CHANNELS: 6
+  CVAE:
+    AUDIO_EMB_IN_SIZE: 512
+    AUDIO_EMB_OUT_SIZE: 6
+    SEQ_LEN: 32
+    LATENT_SIZE: 64
+    ENCODER_LAYER_SIZES: [192, 128]
+    DECODER_LAYER_SIZES: [128, 192]
+TRAIN:
+  MAX_EPOCH: 150
+  GENERATOR:
+    LR: 1.0e-4
+  DISCRIMINATOR:
+    LR: 1.0e-4
+  LOSS:
+    LAMBDA_REG: 1
+    LAMBDA_LANDMARKS: 0
+    LAMBDA_VERTICES: 0
+    LAMBDA_GAN_MOTION: 0.7
+    LAMBDA_GAN_COEFF: 0
+    LAMBDA_KL: 1
+TAG:
+  NAME: cvae_UNET_useAudio_usewav2lipAudioEncoder

config/facerender.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_params:
+  common_params:
+    num_kp: 15
+    image_channel: 3
+    feature_channel: 32
+    estimate_jacobian: False   # True
+  kp_detector_params:
+     temperature: 0.1
+     block_expansion: 32
+     max_features: 1024
+     scale_factor: 0.25         # 0.25
+     num_blocks: 5
+     reshape_channel: 16384  # 16384 = 1024 * 16
+     reshape_depth: 16
+  he_estimator_params:
+     block_expansion: 64
+     max_features: 2048
+     num_bins: 66
+  generator_params:
+    block_expansion: 64
+    max_features: 512
+    num_down_blocks: 2
+    reshape_channel: 32
+    reshape_depth: 16         # 512 = 32 * 16
+    num_resblocks: 6
+    estimate_occlusion_map: True
+    dense_motion_params:
+      block_expansion: 32
+      max_features: 1024
+      num_blocks: 5
+      reshape_depth: 16
+      compress: 4
+  discriminator_params:
+    scales: [1]
+    block_expansion: 32
+    max_features: 512
+    num_blocks: 4
+    sn: True
+  mapping_params:
+      coeff_nc: 70
+      descriptor_nc: 1024
+      layer: 3
+      num_kp: 15
+      num_bins: 66