Spaces:
Build error
Build error
Duplicate from vinthony/SadTalker
Browse filesCo-authored-by: ShadowC <[email protected]>
This view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +52 -0
- .gitignore +155 -0
- Dockerfile +59 -0
- LICENSE +21 -0
- README.md +15 -0
- app.py +111 -0
- checkpoints/BFM_Fitting.zip +3 -0
- checkpoints/BFM_Fitting/01_MorphableModel.mat +3 -0
- checkpoints/BFM_Fitting/BFM09_model_info.mat +3 -0
- checkpoints/BFM_Fitting/BFM_exp_idx.mat +0 -0
- checkpoints/BFM_Fitting/BFM_front_idx.mat +0 -0
- checkpoints/BFM_Fitting/Exp_Pca.bin +3 -0
- checkpoints/BFM_Fitting/facemodel_info.mat +0 -0
- checkpoints/BFM_Fitting/select_vertex_id.mat +0 -0
- checkpoints/BFM_Fitting/similarity_Lm3D_all.mat +0 -0
- checkpoints/BFM_Fitting/std_exp.txt +1 -0
- checkpoints/auido2exp_00300-model.pth +3 -0
- checkpoints/auido2pose_00140-model.pth +3 -0
- checkpoints/epoch_20.pth +3 -0
- checkpoints/facevid2vid_00189-model.pth.tar +3 -0
- checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip +3 -0
- checkpoints/hub/checkpoints/s3fd-619a316812.pth +3 -0
- checkpoints/mapping_00229-model.pth.tar +3 -0
- checkpoints/shape_predictor_68_face_landmarks.dat +3 -0
- checkpoints/wav2lip.pth +3 -0
- config/auido2exp.yaml +58 -0
- config/auido2pose.yaml +49 -0
- config/facerender.yaml +45 -0
- examples/driven_audio/RD_Radio31_000.wav +0 -0
- examples/driven_audio/RD_Radio34_002.wav +0 -0
- examples/driven_audio/RD_Radio36_000.wav +0 -0
- examples/driven_audio/RD_Radio40_000.wav +0 -0
- examples/driven_audio/chinese_news.wav +3 -0
- examples/driven_audio/chinese_poem1.wav +0 -0
- examples/driven_audio/chinese_poem2.wav +0 -0
- examples/driven_audio/deyu.wav +3 -0
- examples/driven_audio/eluosi.wav +3 -0
- examples/driven_audio/fayu.wav +3 -0
- examples/driven_audio/imagine.wav +3 -0
- examples/driven_audio/itosinger1.wav +0 -0
- examples/driven_audio/japanese.wav +3 -0
- examples/source_image/art_0.png +0 -0
- examples/source_image/art_1.png +0 -0
- examples/source_image/art_10.png +0 -0
- examples/source_image/art_11.png +0 -0
- examples/source_image/art_12.png +0 -0
- examples/source_image/art_13.png +0 -0
- examples/source_image/art_14.png +0 -0
- examples/source_image/art_15.png +0 -0
- examples/source_image/art_16.png +3 -0
.gitattributes
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
checkpoints/BFM_Fitting/01_MorphableModel.mat filter=lfs diff=lfs merge=lfs -text
|
36 |
+
checkpoints/BFM_Fitting/BFM09_model_info.mat filter=lfs diff=lfs merge=lfs -text
|
37 |
+
checkpoints/facevid2vid_00189-model.pth.tar filter=lfs diff=lfs merge=lfs -text
|
38 |
+
checkpoints/mapping_00229-model.pth.tar filter=lfs diff=lfs merge=lfs -text
|
39 |
+
checkpoints/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
|
40 |
+
examples/driven_audio/chinese_news.wav filter=lfs diff=lfs merge=lfs -text
|
41 |
+
examples/driven_audio/deyu.wav filter=lfs diff=lfs merge=lfs -text
|
42 |
+
examples/driven_audio/eluosi.wav filter=lfs diff=lfs merge=lfs -text
|
43 |
+
examples/driven_audio/fayu.wav filter=lfs diff=lfs merge=lfs -text
|
44 |
+
examples/driven_audio/imagine.wav filter=lfs diff=lfs merge=lfs -text
|
45 |
+
examples/driven_audio/japanese.wav filter=lfs diff=lfs merge=lfs -text
|
46 |
+
examples/source_image/art_16.png filter=lfs diff=lfs merge=lfs -text
|
47 |
+
examples/source_image/art_17.png filter=lfs diff=lfs merge=lfs -text
|
48 |
+
examples/source_image/art_3.png filter=lfs diff=lfs merge=lfs -text
|
49 |
+
examples/source_image/art_4.png filter=lfs diff=lfs merge=lfs -text
|
50 |
+
examples/source_image/art_5.png filter=lfs diff=lfs merge=lfs -text
|
51 |
+
examples/source_image/art_8.png filter=lfs diff=lfs merge=lfs -text
|
52 |
+
examples/source_image/art_9.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
results/
|
Dockerfile
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
|
2 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
3 |
+
RUN apt-get update && \
|
4 |
+
apt-get upgrade -y && \
|
5 |
+
apt-get install -y --no-install-recommends \
|
6 |
+
git \
|
7 |
+
zip \
|
8 |
+
unzip \
|
9 |
+
git-lfs \
|
10 |
+
wget \
|
11 |
+
curl \
|
12 |
+
# ffmpeg \
|
13 |
+
ffmpeg \
|
14 |
+
x264 \
|
15 |
+
# python build dependencies \
|
16 |
+
build-essential \
|
17 |
+
libssl-dev \
|
18 |
+
zlib1g-dev \
|
19 |
+
libbz2-dev \
|
20 |
+
libreadline-dev \
|
21 |
+
libsqlite3-dev \
|
22 |
+
libncursesw5-dev \
|
23 |
+
xz-utils \
|
24 |
+
tk-dev \
|
25 |
+
libxml2-dev \
|
26 |
+
libxmlsec1-dev \
|
27 |
+
libffi-dev \
|
28 |
+
liblzma-dev && \
|
29 |
+
apt-get clean && \
|
30 |
+
rm -rf /var/lib/apt/lists/*
|
31 |
+
|
32 |
+
RUN useradd -m -u 1000 user
|
33 |
+
USER user
|
34 |
+
ENV HOME=/home/user \
|
35 |
+
PATH=/home/user/.local/bin:${PATH}
|
36 |
+
WORKDIR ${HOME}/app
|
37 |
+
|
38 |
+
RUN curl https://pyenv.run | bash
|
39 |
+
ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
|
40 |
+
ENV PYTHON_VERSION=3.10.9
|
41 |
+
RUN pyenv install ${PYTHON_VERSION} && \
|
42 |
+
pyenv global ${PYTHON_VERSION} && \
|
43 |
+
pyenv rehash && \
|
44 |
+
pip install --no-cache-dir -U pip setuptools wheel
|
45 |
+
|
46 |
+
RUN pip install --no-cache-dir -U torch==1.12.1 torchvision==0.13.1
|
47 |
+
COPY --chown=1000 requirements.txt /tmp/requirements.txt
|
48 |
+
RUN pip install --no-cache-dir -U -r /tmp/requirements.txt
|
49 |
+
|
50 |
+
COPY --chown=1000 . ${HOME}/app
|
51 |
+
RUN ls -a
|
52 |
+
ENV PYTHONPATH=${HOME}/app \
|
53 |
+
PYTHONUNBUFFERED=1 \
|
54 |
+
GRADIO_ALLOW_FLAGGING=never \
|
55 |
+
GRADIO_NUM_PORTS=1 \
|
56 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
57 |
+
GRADIO_THEME=huggingface \
|
58 |
+
SYSTEM=spaces
|
59 |
+
CMD ["python", "app.py"]
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Tencent AI Lab
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: SadTalker
|
3 |
+
emoji: 😭
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: green
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.23.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
duplicated_from: vinthony/SadTalker
|
12 |
+
---
|
13 |
+
|
14 |
+
|
15 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, sys
|
2 |
+
import tempfile
|
3 |
+
import gradio as gr
|
4 |
+
from modules.text2speech import text2speech
|
5 |
+
from modules.sadtalker_test import SadTalker
|
6 |
+
|
7 |
+
def get_driven_audio(audio):
|
8 |
+
if os.path.isfile(audio):
|
9 |
+
return audio
|
10 |
+
else:
|
11 |
+
save_path = tempfile.NamedTemporaryFile(
|
12 |
+
delete=False,
|
13 |
+
suffix=("." + "wav"),
|
14 |
+
)
|
15 |
+
gen_audio = text2speech(audio, save_path.name)
|
16 |
+
return gen_audio, gen_audio
|
17 |
+
|
18 |
+
def get_source_image(image):
|
19 |
+
return image
|
20 |
+
|
21 |
+
def sadtalker_demo(result_dir='./tmp/'):
|
22 |
+
|
23 |
+
sad_talker = SadTalker()
|
24 |
+
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
|
25 |
+
gr.Markdown("<div align='center'> <h3> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </h3> \
|
26 |
+
<a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> \
|
27 |
+
<a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> \
|
28 |
+
<a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </a> </div>")
|
29 |
+
|
30 |
+
with gr.Row():
|
31 |
+
with gr.Column(variant='panel'):
|
32 |
+
with gr.Tabs(elem_id="sadtalker_source_image"):
|
33 |
+
with gr.TabItem('Upload image'):
|
34 |
+
with gr.Row():
|
35 |
+
source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256)
|
36 |
+
|
37 |
+
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
38 |
+
with gr.TabItem('Upload audio(wav/mp3 only currently)'):
|
39 |
+
with gr.Column(variant='panel'):
|
40 |
+
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
41 |
+
|
42 |
+
with gr.Column(variant='panel'):
|
43 |
+
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
44 |
+
with gr.TabItem('Settings'):
|
45 |
+
with gr.Column(variant='panel'):
|
46 |
+
is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion)").style(container=True)
|
47 |
+
is_resize_mode = gr.Checkbox(label="Resize Mode (⚠️ Resize mode need manually crop the image firstly, can handle larger image crop)").style(container=True)
|
48 |
+
is_enhance_mode = gr.Checkbox(label="Enhance Mode (better face quality )").style(container=True)
|
49 |
+
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
50 |
+
|
51 |
+
with gr.Tabs(elem_id="sadtalker_genearted"):
|
52 |
+
gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
|
53 |
+
gen_text = gr.Textbox(visible=False)
|
54 |
+
|
55 |
+
with gr.Row():
|
56 |
+
examples = [
|
57 |
+
[
|
58 |
+
'examples/source_image/art_10.png',
|
59 |
+
'examples/driven_audio/deyu.wav',
|
60 |
+
True,
|
61 |
+
False,
|
62 |
+
False
|
63 |
+
],
|
64 |
+
[
|
65 |
+
'examples/source_image/art_1.png',
|
66 |
+
'examples/driven_audio/fayu.wav',
|
67 |
+
True,
|
68 |
+
True,
|
69 |
+
False
|
70 |
+
],
|
71 |
+
[
|
72 |
+
'examples/source_image/art_9.png',
|
73 |
+
'examples/driven_audio/itosinger1.wav',
|
74 |
+
True,
|
75 |
+
False,
|
76 |
+
True
|
77 |
+
]
|
78 |
+
]
|
79 |
+
gr.Examples(examples=examples,
|
80 |
+
inputs=[
|
81 |
+
source_image,
|
82 |
+
driven_audio,
|
83 |
+
is_still_mode,
|
84 |
+
is_resize_mode,
|
85 |
+
is_enhance_mode,
|
86 |
+
gr.Textbox(value=result_dir, visible=False)],
|
87 |
+
outputs=[gen_video, gen_text],
|
88 |
+
fn=sad_talker.test,
|
89 |
+
cache_examples=os.getenv('SYSTEM') == 'spaces')
|
90 |
+
|
91 |
+
submit.click(
|
92 |
+
fn=sad_talker.test,
|
93 |
+
inputs=[source_image,
|
94 |
+
driven_audio,
|
95 |
+
is_still_mode,
|
96 |
+
is_resize_mode,
|
97 |
+
is_enhance_mode,
|
98 |
+
gr.Textbox(value=result_dir, visible=False)],
|
99 |
+
outputs=[gen_video, gen_text]
|
100 |
+
)
|
101 |
+
|
102 |
+
return sadtalker_interface
|
103 |
+
|
104 |
+
|
105 |
+
if __name__ == "__main__":
|
106 |
+
|
107 |
+
sadtalker_result_dir = os.path.join('./', 'results')
|
108 |
+
demo = sadtalker_demo(sadtalker_result_dir)
|
109 |
+
demo.launch()
|
110 |
+
|
111 |
+
|
checkpoints/BFM_Fitting.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:785f77f3de288568e76666cd419dcf40646d3f74eae6d4fa3b766c933087a9d8
|
3 |
+
size 404051745
|
checkpoints/BFM_Fitting/01_MorphableModel.mat
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2
|
3 |
+
size 240875364
|
checkpoints/BFM_Fitting/BFM09_model_info.mat
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db8d00544f0b0182f1b8430a3bb87662b3ff674eb33c84e6f52dbe2971adb81b
|
3 |
+
size 127170280
|
checkpoints/BFM_Fitting/BFM_exp_idx.mat
ADDED
Binary file (91.9 kB). View file
|
|
checkpoints/BFM_Fitting/BFM_front_idx.mat
ADDED
Binary file (44.9 kB). View file
|
|
checkpoints/BFM_Fitting/Exp_Pca.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726
|
3 |
+
size 51086404
|
checkpoints/BFM_Fitting/facemodel_info.mat
ADDED
Binary file (739 kB). View file
|
|
checkpoints/BFM_Fitting/select_vertex_id.mat
ADDED
Binary file (62.3 kB). View file
|
|
checkpoints/BFM_Fitting/similarity_Lm3D_all.mat
ADDED
Binary file (994 Bytes). View file
|
|
checkpoints/BFM_Fitting/std_exp.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
453980 257264 263068 211890 135873 184721 47055.6 72732 62787.4 106226 56708.5 51439.8 34887.1 44378.7 51813.4 31030.7 23354.9 23128.1 19400 21827.6 22767.7 22057.4 19894.3 16172.8 17142.7 10035.3 14727.5 12972.5 10763.8 8953.93 8682.62 8941.81 6342.3 5205.3 7065.65 6083.35 6678.88 4666.63 5082.89 5134.76 4908.16 3964.93 3739.95 3180.09 2470.45 1866.62 1624.71 2423.74 1668.53 1471.65 1194.52 782.102 815.044 835.782 834.937 744.496 575.146 633.76 705.685 753.409 620.306 673.326 766.189 619.866 559.93 357.264 396.472 556.849 455.048 460.592 400.735 326.702 279.428 291.535 326.584 305.664 287.816 283.642 276.19
|
checkpoints/auido2exp_00300-model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7608f0e6b477e50e03ca569ac5b04a841b9217f89d502862fc78fda4e46dec4
|
3 |
+
size 34278319
|
checkpoints/auido2pose_00140-model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fba6701852dc57efbed25b1e4276e4ff752941860d69fc4429f08a02326ebce
|
3 |
+
size 95916155
|
checkpoints/epoch_20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b
|
3 |
+
size 288860037
|
checkpoints/facevid2vid_00189-model.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbad01d46f0510276dc4521322dde6824a873a4222cd0740c85762e7067ea71d
|
3 |
+
size 2112619148
|
checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd938726adb1f15f361263cce2db9cb820c42585fa8796ec72ce19107f369a46
|
3 |
+
size 96316515
|
checkpoints/hub/checkpoints/s3fd-619a316812.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:619a31681264d3f7f7fc7a16a42cbbe8b23f31a256f75a366e5a1bcd59b33543
|
3 |
+
size 89843225
|
checkpoints/mapping_00229-model.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1
|
3 |
+
size 155521183
|
checkpoints/shape_predictor_68_face_landmarks.dat
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
|
3 |
+
size 99693937
|
checkpoints/wav2lip.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b78b681b68ad9fe6c6fb1debc6ff43ad05834a8af8a62ffc4167b7b34ef63c37
|
3 |
+
size 435807851
|
config/auido2exp.yaml
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
DATASET:
|
2 |
+
TRAIN_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/file_list/train.txt
|
3 |
+
EVAL_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/file_list/val.txt
|
4 |
+
TRAIN_BATCH_SIZE: 32
|
5 |
+
EVAL_BATCH_SIZE: 32
|
6 |
+
EXP: True
|
7 |
+
EXP_DIM: 64
|
8 |
+
FRAME_LEN: 32
|
9 |
+
COEFF_LEN: 73
|
10 |
+
NUM_CLASSES: 46
|
11 |
+
AUDIO_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav
|
12 |
+
COEFF_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav2lip_3dmm
|
13 |
+
LMDB_PATH: /apdcephfs_cq2/share_1290939/shadowcun/datasets/VoxCeleb/v1/imdb
|
14 |
+
DEBUG: True
|
15 |
+
NUM_REPEATS: 2
|
16 |
+
T: 40
|
17 |
+
|
18 |
+
|
19 |
+
MODEL:
|
20 |
+
FRAMEWORK: V2
|
21 |
+
AUDIOENCODER:
|
22 |
+
LEAKY_RELU: True
|
23 |
+
NORM: 'IN'
|
24 |
+
DISCRIMINATOR:
|
25 |
+
LEAKY_RELU: False
|
26 |
+
INPUT_CHANNELS: 6
|
27 |
+
CVAE:
|
28 |
+
AUDIO_EMB_IN_SIZE: 512
|
29 |
+
AUDIO_EMB_OUT_SIZE: 128
|
30 |
+
SEQ_LEN: 32
|
31 |
+
LATENT_SIZE: 256
|
32 |
+
ENCODER_LAYER_SIZES: [192, 1024]
|
33 |
+
DECODER_LAYER_SIZES: [1024, 192]
|
34 |
+
|
35 |
+
|
36 |
+
TRAIN:
|
37 |
+
MAX_EPOCH: 300
|
38 |
+
GENERATOR:
|
39 |
+
LR: 2.0e-5
|
40 |
+
DISCRIMINATOR:
|
41 |
+
LR: 1.0e-5
|
42 |
+
LOSS:
|
43 |
+
W_FEAT: 0
|
44 |
+
W_COEFF_EXP: 2
|
45 |
+
W_LM: 1.0e-2
|
46 |
+
W_LM_MOUTH: 0
|
47 |
+
W_REG: 0
|
48 |
+
W_SYNC: 0
|
49 |
+
W_COLOR: 0
|
50 |
+
W_EXPRESSION: 0
|
51 |
+
W_LIPREADING: 0.01
|
52 |
+
W_LIPREADING_VV: 0
|
53 |
+
W_EYE_BLINK: 4
|
54 |
+
|
55 |
+
TAG:
|
56 |
+
NAME: small_dataset
|
57 |
+
|
58 |
+
|
config/auido2pose.yaml
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
DATASET:
|
2 |
+
TRAIN_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/audio2pose_unet_noAudio/dataset/train_33.txt
|
3 |
+
EVAL_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/audio2pose_unet_noAudio/dataset/val.txt
|
4 |
+
TRAIN_BATCH_SIZE: 64
|
5 |
+
EVAL_BATCH_SIZE: 1
|
6 |
+
EXP: True
|
7 |
+
EXP_DIM: 64
|
8 |
+
FRAME_LEN: 32
|
9 |
+
COEFF_LEN: 73
|
10 |
+
NUM_CLASSES: 46
|
11 |
+
AUDIO_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav
|
12 |
+
COEFF_ROOT_PATH: /apdcephfs_cq2/share_1290939/shadowcun/datasets/VoxCeleb/v1/imdb
|
13 |
+
DEBUG: True
|
14 |
+
|
15 |
+
|
16 |
+
MODEL:
|
17 |
+
AUDIOENCODER:
|
18 |
+
LEAKY_RELU: True
|
19 |
+
NORM: 'IN'
|
20 |
+
DISCRIMINATOR:
|
21 |
+
LEAKY_RELU: False
|
22 |
+
INPUT_CHANNELS: 6
|
23 |
+
CVAE:
|
24 |
+
AUDIO_EMB_IN_SIZE: 512
|
25 |
+
AUDIO_EMB_OUT_SIZE: 6
|
26 |
+
SEQ_LEN: 32
|
27 |
+
LATENT_SIZE: 64
|
28 |
+
ENCODER_LAYER_SIZES: [192, 128]
|
29 |
+
DECODER_LAYER_SIZES: [128, 192]
|
30 |
+
|
31 |
+
|
32 |
+
TRAIN:
|
33 |
+
MAX_EPOCH: 150
|
34 |
+
GENERATOR:
|
35 |
+
LR: 1.0e-4
|
36 |
+
DISCRIMINATOR:
|
37 |
+
LR: 1.0e-4
|
38 |
+
LOSS:
|
39 |
+
LAMBDA_REG: 1
|
40 |
+
LAMBDA_LANDMARKS: 0
|
41 |
+
LAMBDA_VERTICES: 0
|
42 |
+
LAMBDA_GAN_MOTION: 0.7
|
43 |
+
LAMBDA_GAN_COEFF: 0
|
44 |
+
LAMBDA_KL: 1
|
45 |
+
|
46 |
+
TAG:
|
47 |
+
NAME: cvae_UNET_useAudio_usewav2lipAudioEncoder
|
48 |
+
|
49 |
+
|
config/facerender.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_params:
|
2 |
+
common_params:
|
3 |
+
num_kp: 15
|
4 |
+
image_channel: 3
|
5 |
+
feature_channel: 32
|
6 |
+
estimate_jacobian: False # True
|
7 |
+
kp_detector_params:
|
8 |
+
temperature: 0.1
|
9 |
+
block_expansion: 32
|
10 |
+
max_features: 1024
|
11 |
+
scale_factor: 0.25 # 0.25
|
12 |
+
num_blocks: 5
|
13 |
+
reshape_channel: 16384 # 16384 = 1024 * 16
|
14 |
+
reshape_depth: 16
|
15 |
+
he_estimator_params:
|
16 |
+
block_expansion: 64
|
17 |
+
max_features: 2048
|
18 |
+
num_bins: 66
|
19 |
+
generator_params:
|
20 |
+
block_expansion: 64
|
21 |
+
max_features: 512
|
22 |
+
num_down_blocks: 2
|
23 |
+
reshape_channel: 32
|
24 |
+
reshape_depth: 16 # 512 = 32 * 16
|
25 |
+
num_resblocks: 6
|
26 |
+
estimate_occlusion_map: True
|
27 |
+
dense_motion_params:
|
28 |
+
block_expansion: 32
|
29 |
+
max_features: 1024
|
30 |
+
num_blocks: 5
|
31 |
+
reshape_depth: 16
|
32 |
+
compress: 4
|
33 |
+
discriminator_params:
|
34 |
+
scales: [1]
|
35 |
+
block_expansion: 32
|
36 |
+
max_features: 512
|
37 |
+
num_blocks: 4
|
38 |
+
sn: True
|
39 |
+
mapping_params:
|
40 |
+
coeff_nc: 70
|
41 |
+
descriptor_nc: 1024
|
42 |
+
layer: 3
|
43 |
+
num_kp: 15
|
44 |
+
num_bins: 66
|
45 |
+
|
examples/driven_audio/RD_Radio31_000.wav
ADDED
Binary file (512 kB). View file
|
|
examples/driven_audio/RD_Radio34_002.wav
ADDED
Binary file (512 kB). View file
|
|
examples/driven_audio/RD_Radio36_000.wav
ADDED
Binary file (512 kB). View file
|
|
examples/driven_audio/RD_Radio40_000.wav
ADDED
Binary file (512 kB). View file
|
|
examples/driven_audio/chinese_news.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b0f4d313a1ca671bc4831d60bcf0c12225efbffe6c0e93e54fbfe9bcd4021cb
|
3 |
+
size 1536078
|
examples/driven_audio/chinese_poem1.wav
ADDED
Binary file (263 kB). View file
|
|
examples/driven_audio/chinese_poem2.wav
ADDED
Binary file (461 kB). View file
|
|
examples/driven_audio/deyu.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba1839c57770a2ab0b593ce814344bfd4d750da02acc9be9e8cf5b9113a0f88a
|
3 |
+
size 2694784
|
examples/driven_audio/eluosi.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4a3593815dc7b68c256672baa61934c9479efa770af2065fb0886f02713606e
|
3 |
+
size 1786672
|
examples/driven_audio/fayu.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16ebd13626ae4171030b4ea05cceef06078483c352e4b68d469fc2a52bfffceb
|
3 |
+
size 1940428
|
examples/driven_audio/imagine.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2db410217e074d91ae6011e1c5dc0b94f02d05d381c50af8e54253eeacad17d2
|
3 |
+
size 1618510
|
examples/driven_audio/itosinger1.wav
ADDED
Binary file (500 kB). View file
|
|
examples/driven_audio/japanese.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3db5426d0b158799e2be4f609b11f75bfbd4affffe18e9a1c8e6f241fcdedcfc
|
3 |
+
size 2622712
|
examples/source_image/art_0.png
ADDED
![]() |
examples/source_image/art_1.png
ADDED
![]() |
examples/source_image/art_10.png
ADDED
![]() |
examples/source_image/art_11.png
ADDED
![]() |
examples/source_image/art_12.png
ADDED
![]() |
examples/source_image/art_13.png
ADDED
![]() |
examples/source_image/art_14.png
ADDED
![]() |
examples/source_image/art_15.png
ADDED
![]() |
examples/source_image/art_16.png
ADDED
![]() |
Git LFS Details
|