yablokoff vinthony commited on
Commit
f368cb7
·
0 Parent(s):

Duplicate from vinthony/SadTalker

Browse files

Co-authored-by: ShadowC <[email protected]>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +52 -0
  2. .gitignore +155 -0
  3. Dockerfile +59 -0
  4. LICENSE +21 -0
  5. README.md +15 -0
  6. app.py +111 -0
  7. checkpoints/BFM_Fitting.zip +3 -0
  8. checkpoints/BFM_Fitting/01_MorphableModel.mat +3 -0
  9. checkpoints/BFM_Fitting/BFM09_model_info.mat +3 -0
  10. checkpoints/BFM_Fitting/BFM_exp_idx.mat +0 -0
  11. checkpoints/BFM_Fitting/BFM_front_idx.mat +0 -0
  12. checkpoints/BFM_Fitting/Exp_Pca.bin +3 -0
  13. checkpoints/BFM_Fitting/facemodel_info.mat +0 -0
  14. checkpoints/BFM_Fitting/select_vertex_id.mat +0 -0
  15. checkpoints/BFM_Fitting/similarity_Lm3D_all.mat +0 -0
  16. checkpoints/BFM_Fitting/std_exp.txt +1 -0
  17. checkpoints/auido2exp_00300-model.pth +3 -0
  18. checkpoints/auido2pose_00140-model.pth +3 -0
  19. checkpoints/epoch_20.pth +3 -0
  20. checkpoints/facevid2vid_00189-model.pth.tar +3 -0
  21. checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip +3 -0
  22. checkpoints/hub/checkpoints/s3fd-619a316812.pth +3 -0
  23. checkpoints/mapping_00229-model.pth.tar +3 -0
  24. checkpoints/shape_predictor_68_face_landmarks.dat +3 -0
  25. checkpoints/wav2lip.pth +3 -0
  26. config/auido2exp.yaml +58 -0
  27. config/auido2pose.yaml +49 -0
  28. config/facerender.yaml +45 -0
  29. examples/driven_audio/RD_Radio31_000.wav +0 -0
  30. examples/driven_audio/RD_Radio34_002.wav +0 -0
  31. examples/driven_audio/RD_Radio36_000.wav +0 -0
  32. examples/driven_audio/RD_Radio40_000.wav +0 -0
  33. examples/driven_audio/chinese_news.wav +3 -0
  34. examples/driven_audio/chinese_poem1.wav +0 -0
  35. examples/driven_audio/chinese_poem2.wav +0 -0
  36. examples/driven_audio/deyu.wav +3 -0
  37. examples/driven_audio/eluosi.wav +3 -0
  38. examples/driven_audio/fayu.wav +3 -0
  39. examples/driven_audio/imagine.wav +3 -0
  40. examples/driven_audio/itosinger1.wav +0 -0
  41. examples/driven_audio/japanese.wav +3 -0
  42. examples/source_image/art_0.png +0 -0
  43. examples/source_image/art_1.png +0 -0
  44. examples/source_image/art_10.png +0 -0
  45. examples/source_image/art_11.png +0 -0
  46. examples/source_image/art_12.png +0 -0
  47. examples/source_image/art_13.png +0 -0
  48. examples/source_image/art_14.png +0 -0
  49. examples/source_image/art_15.png +0 -0
  50. examples/source_image/art_16.png +3 -0
.gitattributes ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ checkpoints/BFM_Fitting/01_MorphableModel.mat filter=lfs diff=lfs merge=lfs -text
36
+ checkpoints/BFM_Fitting/BFM09_model_info.mat filter=lfs diff=lfs merge=lfs -text
37
+ checkpoints/facevid2vid_00189-model.pth.tar filter=lfs diff=lfs merge=lfs -text
38
+ checkpoints/mapping_00229-model.pth.tar filter=lfs diff=lfs merge=lfs -text
39
+ checkpoints/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
40
+ examples/driven_audio/chinese_news.wav filter=lfs diff=lfs merge=lfs -text
41
+ examples/driven_audio/deyu.wav filter=lfs diff=lfs merge=lfs -text
42
+ examples/driven_audio/eluosi.wav filter=lfs diff=lfs merge=lfs -text
43
+ examples/driven_audio/fayu.wav filter=lfs diff=lfs merge=lfs -text
44
+ examples/driven_audio/imagine.wav filter=lfs diff=lfs merge=lfs -text
45
+ examples/driven_audio/japanese.wav filter=lfs diff=lfs merge=lfs -text
46
+ examples/source_image/art_16.png filter=lfs diff=lfs merge=lfs -text
47
+ examples/source_image/art_17.png filter=lfs diff=lfs merge=lfs -text
48
+ examples/source_image/art_3.png filter=lfs diff=lfs merge=lfs -text
49
+ examples/source_image/art_4.png filter=lfs diff=lfs merge=lfs -text
50
+ examples/source_image/art_5.png filter=lfs diff=lfs merge=lfs -text
51
+ examples/source_image/art_8.png filter=lfs diff=lfs merge=lfs -text
52
+ examples/source_image/art_9.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ results/
Dockerfile ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
2
+ ENV DEBIAN_FRONTEND=noninteractive
3
+ RUN apt-get update && \
4
+ apt-get upgrade -y && \
5
+ apt-get install -y --no-install-recommends \
6
+ git \
7
+ zip \
8
+ unzip \
9
+ git-lfs \
10
+ wget \
11
+ curl \
12
+ # ffmpeg \
13
+ ffmpeg \
14
+ x264 \
15
+ # python build dependencies \
16
+ build-essential \
17
+ libssl-dev \
18
+ zlib1g-dev \
19
+ libbz2-dev \
20
+ libreadline-dev \
21
+ libsqlite3-dev \
22
+ libncursesw5-dev \
23
+ xz-utils \
24
+ tk-dev \
25
+ libxml2-dev \
26
+ libxmlsec1-dev \
27
+ libffi-dev \
28
+ liblzma-dev && \
29
+ apt-get clean && \
30
+ rm -rf /var/lib/apt/lists/*
31
+
32
+ RUN useradd -m -u 1000 user
33
+ USER user
34
+ ENV HOME=/home/user \
35
+ PATH=/home/user/.local/bin:${PATH}
36
+ WORKDIR ${HOME}/app
37
+
38
+ RUN curl https://pyenv.run | bash
39
+ ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
40
+ ENV PYTHON_VERSION=3.10.9
41
+ RUN pyenv install ${PYTHON_VERSION} && \
42
+ pyenv global ${PYTHON_VERSION} && \
43
+ pyenv rehash && \
44
+ pip install --no-cache-dir -U pip setuptools wheel
45
+
46
+ RUN pip install --no-cache-dir -U torch==1.12.1 torchvision==0.13.1
47
+ COPY --chown=1000 requirements.txt /tmp/requirements.txt
48
+ RUN pip install --no-cache-dir -U -r /tmp/requirements.txt
49
+
50
+ COPY --chown=1000 . ${HOME}/app
51
+ RUN ls -a
52
+ ENV PYTHONPATH=${HOME}/app \
53
+ PYTHONUNBUFFERED=1 \
54
+ GRADIO_ALLOW_FLAGGING=never \
55
+ GRADIO_NUM_PORTS=1 \
56
+ GRADIO_SERVER_NAME=0.0.0.0 \
57
+ GRADIO_THEME=huggingface \
58
+ SYSTEM=spaces
59
+ CMD ["python", "app.py"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Tencent AI Lab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SadTalker
3
+ emoji: 😭
4
+ colorFrom: purple
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.23.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: vinthony/SadTalker
12
+ ---
13
+
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import tempfile
3
+ import gradio as gr
4
+ from modules.text2speech import text2speech
5
+ from modules.sadtalker_test import SadTalker
6
+
7
+ def get_driven_audio(audio):
8
+ if os.path.isfile(audio):
9
+ return audio
10
+ else:
11
+ save_path = tempfile.NamedTemporaryFile(
12
+ delete=False,
13
+ suffix=("." + "wav"),
14
+ )
15
+ gen_audio = text2speech(audio, save_path.name)
16
+ return gen_audio, gen_audio
17
+
18
+ def get_source_image(image):
19
+ return image
20
+
21
+ def sadtalker_demo(result_dir='./tmp/'):
22
+
23
+ sad_talker = SadTalker()
24
+ with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
25
+ gr.Markdown("<div align='center'> <h3> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </h3> \
26
+ <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
27
+ <a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
28
+ <a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </a> </div>")
29
+
30
+ with gr.Row():
31
+ with gr.Column(variant='panel'):
32
+ with gr.Tabs(elem_id="sadtalker_source_image"):
33
+ with gr.TabItem('Upload image'):
34
+ with gr.Row():
35
+ source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256)
36
+
37
+ with gr.Tabs(elem_id="sadtalker_driven_audio"):
38
+ with gr.TabItem('Upload audio(wav/mp3 only currently)'):
39
+ with gr.Column(variant='panel'):
40
+ driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
41
+
42
+ with gr.Column(variant='panel'):
43
+ with gr.Tabs(elem_id="sadtalker_checkbox"):
44
+ with gr.TabItem('Settings'):
45
+ with gr.Column(variant='panel'):
46
+ is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion)").style(container=True)
47
+ is_resize_mode = gr.Checkbox(label="Resize Mode (⚠️ Resize mode need manually crop the image firstly, can handle larger image crop)").style(container=True)
48
+ is_enhance_mode = gr.Checkbox(label="Enhance Mode (better face quality )").style(container=True)
49
+ submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
50
+
51
+ with gr.Tabs(elem_id="sadtalker_genearted"):
52
+ gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
53
+ gen_text = gr.Textbox(visible=False)
54
+
55
+ with gr.Row():
56
+ examples = [
57
+ [
58
+ 'examples/source_image/art_10.png',
59
+ 'examples/driven_audio/deyu.wav',
60
+ True,
61
+ False,
62
+ False
63
+ ],
64
+ [
65
+ 'examples/source_image/art_1.png',
66
+ 'examples/driven_audio/fayu.wav',
67
+ True,
68
+ True,
69
+ False
70
+ ],
71
+ [
72
+ 'examples/source_image/art_9.png',
73
+ 'examples/driven_audio/itosinger1.wav',
74
+ True,
75
+ False,
76
+ True
77
+ ]
78
+ ]
79
+ gr.Examples(examples=examples,
80
+ inputs=[
81
+ source_image,
82
+ driven_audio,
83
+ is_still_mode,
84
+ is_resize_mode,
85
+ is_enhance_mode,
86
+ gr.Textbox(value=result_dir, visible=False)],
87
+ outputs=[gen_video, gen_text],
88
+ fn=sad_talker.test,
89
+ cache_examples=os.getenv('SYSTEM') == 'spaces')
90
+
91
+ submit.click(
92
+ fn=sad_talker.test,
93
+ inputs=[source_image,
94
+ driven_audio,
95
+ is_still_mode,
96
+ is_resize_mode,
97
+ is_enhance_mode,
98
+ gr.Textbox(value=result_dir, visible=False)],
99
+ outputs=[gen_video, gen_text]
100
+ )
101
+
102
+ return sadtalker_interface
103
+
104
+
105
+ if __name__ == "__main__":
106
+
107
+ sadtalker_result_dir = os.path.join('./', 'results')
108
+ demo = sadtalker_demo(sadtalker_result_dir)
109
+ demo.launch()
110
+
111
+
checkpoints/BFM_Fitting.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:785f77f3de288568e76666cd419dcf40646d3f74eae6d4fa3b766c933087a9d8
3
+ size 404051745
checkpoints/BFM_Fitting/01_MorphableModel.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2
3
+ size 240875364
checkpoints/BFM_Fitting/BFM09_model_info.mat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db8d00544f0b0182f1b8430a3bb87662b3ff674eb33c84e6f52dbe2971adb81b
3
+ size 127170280
checkpoints/BFM_Fitting/BFM_exp_idx.mat ADDED
Binary file (91.9 kB). View file
 
checkpoints/BFM_Fitting/BFM_front_idx.mat ADDED
Binary file (44.9 kB). View file
 
checkpoints/BFM_Fitting/Exp_Pca.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726
3
+ size 51086404
checkpoints/BFM_Fitting/facemodel_info.mat ADDED
Binary file (739 kB). View file
 
checkpoints/BFM_Fitting/select_vertex_id.mat ADDED
Binary file (62.3 kB). View file
 
checkpoints/BFM_Fitting/similarity_Lm3D_all.mat ADDED
Binary file (994 Bytes). View file
 
checkpoints/BFM_Fitting/std_exp.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 453980 257264 263068 211890 135873 184721 47055.6 72732 62787.4 106226 56708.5 51439.8 34887.1 44378.7 51813.4 31030.7 23354.9 23128.1 19400 21827.6 22767.7 22057.4 19894.3 16172.8 17142.7 10035.3 14727.5 12972.5 10763.8 8953.93 8682.62 8941.81 6342.3 5205.3 7065.65 6083.35 6678.88 4666.63 5082.89 5134.76 4908.16 3964.93 3739.95 3180.09 2470.45 1866.62 1624.71 2423.74 1668.53 1471.65 1194.52 782.102 815.044 835.782 834.937 744.496 575.146 633.76 705.685 753.409 620.306 673.326 766.189 619.866 559.93 357.264 396.472 556.849 455.048 460.592 400.735 326.702 279.428 291.535 326.584 305.664 287.816 283.642 276.19
checkpoints/auido2exp_00300-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7608f0e6b477e50e03ca569ac5b04a841b9217f89d502862fc78fda4e46dec4
3
+ size 34278319
checkpoints/auido2pose_00140-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fba6701852dc57efbed25b1e4276e4ff752941860d69fc4429f08a02326ebce
3
+ size 95916155
checkpoints/epoch_20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b
3
+ size 288860037
checkpoints/facevid2vid_00189-model.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbad01d46f0510276dc4521322dde6824a873a4222cd0740c85762e7067ea71d
3
+ size 2112619148
checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd938726adb1f15f361263cce2db9cb820c42585fa8796ec72ce19107f369a46
3
+ size 96316515
checkpoints/hub/checkpoints/s3fd-619a316812.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:619a31681264d3f7f7fc7a16a42cbbe8b23f31a256f75a366e5a1bcd59b33543
3
+ size 89843225
checkpoints/mapping_00229-model.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1
3
+ size 155521183
checkpoints/shape_predictor_68_face_landmarks.dat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
3
+ size 99693937
checkpoints/wav2lip.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b78b681b68ad9fe6c6fb1debc6ff43ad05834a8af8a62ffc4167b7b34ef63c37
3
+ size 435807851
config/auido2exp.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATASET:
2
+ TRAIN_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/file_list/train.txt
3
+ EVAL_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/file_list/val.txt
4
+ TRAIN_BATCH_SIZE: 32
5
+ EVAL_BATCH_SIZE: 32
6
+ EXP: True
7
+ EXP_DIM: 64
8
+ FRAME_LEN: 32
9
+ COEFF_LEN: 73
10
+ NUM_CLASSES: 46
11
+ AUDIO_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav
12
+ COEFF_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav2lip_3dmm
13
+ LMDB_PATH: /apdcephfs_cq2/share_1290939/shadowcun/datasets/VoxCeleb/v1/imdb
14
+ DEBUG: True
15
+ NUM_REPEATS: 2
16
+ T: 40
17
+
18
+
19
+ MODEL:
20
+ FRAMEWORK: V2
21
+ AUDIOENCODER:
22
+ LEAKY_RELU: True
23
+ NORM: 'IN'
24
+ DISCRIMINATOR:
25
+ LEAKY_RELU: False
26
+ INPUT_CHANNELS: 6
27
+ CVAE:
28
+ AUDIO_EMB_IN_SIZE: 512
29
+ AUDIO_EMB_OUT_SIZE: 128
30
+ SEQ_LEN: 32
31
+ LATENT_SIZE: 256
32
+ ENCODER_LAYER_SIZES: [192, 1024]
33
+ DECODER_LAYER_SIZES: [1024, 192]
34
+
35
+
36
+ TRAIN:
37
+ MAX_EPOCH: 300
38
+ GENERATOR:
39
+ LR: 2.0e-5
40
+ DISCRIMINATOR:
41
+ LR: 1.0e-5
42
+ LOSS:
43
+ W_FEAT: 0
44
+ W_COEFF_EXP: 2
45
+ W_LM: 1.0e-2
46
+ W_LM_MOUTH: 0
47
+ W_REG: 0
48
+ W_SYNC: 0
49
+ W_COLOR: 0
50
+ W_EXPRESSION: 0
51
+ W_LIPREADING: 0.01
52
+ W_LIPREADING_VV: 0
53
+ W_EYE_BLINK: 4
54
+
55
+ TAG:
56
+ NAME: small_dataset
57
+
58
+
config/auido2pose.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATASET:
2
+ TRAIN_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/audio2pose_unet_noAudio/dataset/train_33.txt
3
+ EVAL_FILE_LIST: /apdcephfs_cq2/share_1290939/wenxuazhang/code/audio2pose_unet_noAudio/dataset/val.txt
4
+ TRAIN_BATCH_SIZE: 64
5
+ EVAL_BATCH_SIZE: 1
6
+ EXP: True
7
+ EXP_DIM: 64
8
+ FRAME_LEN: 32
9
+ COEFF_LEN: 73
10
+ NUM_CLASSES: 46
11
+ AUDIO_ROOT_PATH: /apdcephfs_cq2/share_1290939/wenxuazhang/voxceleb1/wav
12
+ COEFF_ROOT_PATH: /apdcephfs_cq2/share_1290939/shadowcun/datasets/VoxCeleb/v1/imdb
13
+ DEBUG: True
14
+
15
+
16
+ MODEL:
17
+ AUDIOENCODER:
18
+ LEAKY_RELU: True
19
+ NORM: 'IN'
20
+ DISCRIMINATOR:
21
+ LEAKY_RELU: False
22
+ INPUT_CHANNELS: 6
23
+ CVAE:
24
+ AUDIO_EMB_IN_SIZE: 512
25
+ AUDIO_EMB_OUT_SIZE: 6
26
+ SEQ_LEN: 32
27
+ LATENT_SIZE: 64
28
+ ENCODER_LAYER_SIZES: [192, 128]
29
+ DECODER_LAYER_SIZES: [128, 192]
30
+
31
+
32
+ TRAIN:
33
+ MAX_EPOCH: 150
34
+ GENERATOR:
35
+ LR: 1.0e-4
36
+ DISCRIMINATOR:
37
+ LR: 1.0e-4
38
+ LOSS:
39
+ LAMBDA_REG: 1
40
+ LAMBDA_LANDMARKS: 0
41
+ LAMBDA_VERTICES: 0
42
+ LAMBDA_GAN_MOTION: 0.7
43
+ LAMBDA_GAN_COEFF: 0
44
+ LAMBDA_KL: 1
45
+
46
+ TAG:
47
+ NAME: cvae_UNET_useAudio_usewav2lipAudioEncoder
48
+
49
+
config/facerender.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_params:
2
+ common_params:
3
+ num_kp: 15
4
+ image_channel: 3
5
+ feature_channel: 32
6
+ estimate_jacobian: False # True
7
+ kp_detector_params:
8
+ temperature: 0.1
9
+ block_expansion: 32
10
+ max_features: 1024
11
+ scale_factor: 0.25 # 0.25
12
+ num_blocks: 5
13
+ reshape_channel: 16384 # 16384 = 1024 * 16
14
+ reshape_depth: 16
15
+ he_estimator_params:
16
+ block_expansion: 64
17
+ max_features: 2048
18
+ num_bins: 66
19
+ generator_params:
20
+ block_expansion: 64
21
+ max_features: 512
22
+ num_down_blocks: 2
23
+ reshape_channel: 32
24
+ reshape_depth: 16 # 512 = 32 * 16
25
+ num_resblocks: 6
26
+ estimate_occlusion_map: True
27
+ dense_motion_params:
28
+ block_expansion: 32
29
+ max_features: 1024
30
+ num_blocks: 5
31
+ reshape_depth: 16
32
+ compress: 4
33
+ discriminator_params:
34
+ scales: [1]
35
+ block_expansion: 32
36
+ max_features: 512
37
+ num_blocks: 4
38
+ sn: True
39
+ mapping_params:
40
+ coeff_nc: 70
41
+ descriptor_nc: 1024
42
+ layer: 3
43
+ num_kp: 15
44
+ num_bins: 66
45
+
examples/driven_audio/RD_Radio31_000.wav ADDED
Binary file (512 kB). View file
 
examples/driven_audio/RD_Radio34_002.wav ADDED
Binary file (512 kB). View file
 
examples/driven_audio/RD_Radio36_000.wav ADDED
Binary file (512 kB). View file
 
examples/driven_audio/RD_Radio40_000.wav ADDED
Binary file (512 kB). View file
 
examples/driven_audio/chinese_news.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b0f4d313a1ca671bc4831d60bcf0c12225efbffe6c0e93e54fbfe9bcd4021cb
3
+ size 1536078
examples/driven_audio/chinese_poem1.wav ADDED
Binary file (263 kB). View file
 
examples/driven_audio/chinese_poem2.wav ADDED
Binary file (461 kB). View file
 
examples/driven_audio/deyu.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba1839c57770a2ab0b593ce814344bfd4d750da02acc9be9e8cf5b9113a0f88a
3
+ size 2694784
examples/driven_audio/eluosi.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a3593815dc7b68c256672baa61934c9479efa770af2065fb0886f02713606e
3
+ size 1786672
examples/driven_audio/fayu.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16ebd13626ae4171030b4ea05cceef06078483c352e4b68d469fc2a52bfffceb
3
+ size 1940428
examples/driven_audio/imagine.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db410217e074d91ae6011e1c5dc0b94f02d05d381c50af8e54253eeacad17d2
3
+ size 1618510
examples/driven_audio/itosinger1.wav ADDED
Binary file (500 kB). View file
 
examples/driven_audio/japanese.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3db5426d0b158799e2be4f609b11f75bfbd4affffe18e9a1c8e6f241fcdedcfc
3
+ size 2622712
examples/source_image/art_0.png ADDED
examples/source_image/art_1.png ADDED
examples/source_image/art_10.png ADDED
examples/source_image/art_11.png ADDED
examples/source_image/art_12.png ADDED
examples/source_image/art_13.png ADDED
examples/source_image/art_14.png ADDED
examples/source_image/art_15.png ADDED
examples/source_image/art_16.png ADDED

Git LFS Details

  • SHA256: 3f6d350055eea3abe35ee3fe9df80dcd99d8edae66ef4fc20bf06168bf189f25
  • Pointer size: 132 Bytes
  • Size of remote file: 1.48 MB