Spaces:
Sleeping
Sleeping
use latest torch
Browse files- pyproject.toml +0 -11
- requirements.txt +38 -7
- sample.py +13 -10
pyproject.toml
CHANGED
@@ -11,15 +11,4 @@ dependencies = [
|
|
11 |
"soundfile>=0.12.1",
|
12 |
"torch>=2.5.1",
|
13 |
"vocos>=0.1.0",
|
14 |
-
]
|
15 |
-
|
16 |
-
|
17 |
-
[[tool.uv.index]]
|
18 |
-
name = "pytorch-cpu"
|
19 |
-
url = "https://download.pytorch.org/whl/cpu"
|
20 |
-
explicit = true
|
21 |
-
|
22 |
-
[tool.uv.sources]
|
23 |
-
torch = [
|
24 |
-
{ index = "pytorch-cpu", marker = "platform_system != 'Darwin'"},
|
25 |
]
|
|
|
11 |
"soundfile>=0.12.1",
|
12 |
"torch>=2.5.1",
|
13 |
"vocos>=0.1.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
]
|
requirements.txt
CHANGED
@@ -1,9 +1,5 @@
|
|
1 |
# This file was autogenerated by uv via the following command:
|
2 |
-
# uv pip compile pyproject.toml -o requirements.txt --python-platform x86_64-unknown-linux-gnu
|
3 |
-
--index-url https://pypi.org/simple
|
4 |
-
--extra-index-url https://download.pytorch.org/whl/cu113
|
5 |
-
|
6 |
-
|
7 |
aiofiles==23.2.1
|
8 |
# via gradio
|
9 |
annotated-types==0.7.0
|
@@ -43,6 +39,7 @@ filelock==3.13.1
|
|
43 |
# via
|
44 |
# huggingface-hub
|
45 |
# torch
|
|
|
46 |
fsspec==2024.2.0
|
47 |
# via
|
48 |
# gradio-client
|
@@ -109,6 +106,38 @@ numpy==1.26.3
|
|
109 |
# pandas
|
110 |
# scipy
|
111 |
# vocos
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
orjson==3.10.12
|
113 |
# via gradio
|
114 |
packaging==24.1
|
@@ -175,13 +204,13 @@ sympy==1.13.1
|
|
175 |
# via torch
|
176 |
tomlkit==0.13.2
|
177 |
# via gradio
|
178 |
-
torch
|
179 |
# via
|
180 |
# diffusion-speech-360h (pyproject.toml)
|
181 |
# encodec
|
182 |
# torchaudio
|
183 |
# vocos
|
184 |
-
torchaudio
|
185 |
# via
|
186 |
# encodec
|
187 |
# vocos
|
@@ -189,6 +218,8 @@ tqdm==4.66.5
|
|
189 |
# via
|
190 |
# huggingface-hub
|
191 |
# nltk
|
|
|
|
|
192 |
typeguard==4.2.0
|
193 |
# via inflect
|
194 |
typer==0.15.1
|
|
|
1 |
# This file was autogenerated by uv via the following command:
|
2 |
+
# uv pip compile pyproject.toml -o requirements.txt --python-platform x86_64-unknown-linux-gnu
|
|
|
|
|
|
|
|
|
3 |
aiofiles==23.2.1
|
4 |
# via gradio
|
5 |
annotated-types==0.7.0
|
|
|
39 |
# via
|
40 |
# huggingface-hub
|
41 |
# torch
|
42 |
+
# triton
|
43 |
fsspec==2024.2.0
|
44 |
# via
|
45 |
# gradio-client
|
|
|
106 |
# pandas
|
107 |
# scipy
|
108 |
# vocos
|
109 |
+
nvidia-cublas-cu12==12.4.5.8
|
110 |
+
# via
|
111 |
+
# nvidia-cudnn-cu12
|
112 |
+
# nvidia-cusolver-cu12
|
113 |
+
# torch
|
114 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
115 |
+
# via torch
|
116 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
117 |
+
# via torch
|
118 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
119 |
+
# via torch
|
120 |
+
nvidia-cudnn-cu12==9.1.0.70
|
121 |
+
# via torch
|
122 |
+
nvidia-cufft-cu12==11.2.1.3
|
123 |
+
# via torch
|
124 |
+
nvidia-curand-cu12==10.3.5.147
|
125 |
+
# via torch
|
126 |
+
nvidia-cusolver-cu12==11.6.1.9
|
127 |
+
# via torch
|
128 |
+
nvidia-cusparse-cu12==12.3.1.170
|
129 |
+
# via
|
130 |
+
# nvidia-cusolver-cu12
|
131 |
+
# torch
|
132 |
+
nvidia-nccl-cu12==2.21.5
|
133 |
+
# via torch
|
134 |
+
nvidia-nvjitlink-cu12==12.4.127
|
135 |
+
# via
|
136 |
+
# nvidia-cusolver-cu12
|
137 |
+
# nvidia-cusparse-cu12
|
138 |
+
# torch
|
139 |
+
nvidia-nvtx-cu12==12.4.127
|
140 |
+
# via torch
|
141 |
orjson==3.10.12
|
142 |
# via gradio
|
143 |
packaging==24.1
|
|
|
204 |
# via torch
|
205 |
tomlkit==0.13.2
|
206 |
# via gradio
|
207 |
+
torch==2.5.1
|
208 |
# via
|
209 |
# diffusion-speech-360h (pyproject.toml)
|
210 |
# encodec
|
211 |
# torchaudio
|
212 |
# vocos
|
213 |
+
torchaudio==2.5.1
|
214 |
# via
|
215 |
# encodec
|
216 |
# vocos
|
|
|
218 |
# via
|
219 |
# huggingface-hub
|
220 |
# nltk
|
221 |
+
triton==3.1.0
|
222 |
+
# via torch
|
223 |
typeguard==4.2.0
|
224 |
# via inflect
|
225 |
typer==0.15.1
|
sample.py
CHANGED
@@ -174,11 +174,12 @@ def sample(
|
|
174 |
embedding_vocab_size=model_config["embedding_vocab_size"],
|
175 |
learn_sigma=model_config["learn_sigma"],
|
176 |
in_channels=data_config["data_dim"],
|
177 |
-
).to(device)
|
178 |
|
179 |
state_dict = find_model(ckpt_path)
|
180 |
model.load_state_dict(state_dict)
|
181 |
model.eval() # important!
|
|
|
182 |
model_cache[ckpt_path] = model
|
183 |
else:
|
184 |
model = model_cache[ckpt_path]
|
@@ -206,15 +207,17 @@ def sample(
|
|
206 |
attn_mask=attn_mask,
|
207 |
)
|
208 |
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
|
|
|
|
218 |
samples = [s.chunk(2, dim=0)[0] for s in samples] # Remove null class samples
|
219 |
return samples
|
220 |
|
|
|
174 |
embedding_vocab_size=model_config["embedding_vocab_size"],
|
175 |
learn_sigma=model_config["learn_sigma"],
|
176 |
in_channels=data_config["data_dim"],
|
177 |
+
).to(device).bfloat16
|
178 |
|
179 |
state_dict = find_model(ckpt_path)
|
180 |
model.load_state_dict(state_dict)
|
181 |
model.eval() # important!
|
182 |
+
model = model.bfloat16()
|
183 |
model_cache[ckpt_path] = model
|
184 |
else:
|
185 |
model = model_cache[ckpt_path]
|
|
|
207 |
attn_mask=attn_mask,
|
208 |
)
|
209 |
|
210 |
+
with torch.no_grad():
|
211 |
+
with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
|
212 |
+
samples = diffusion.p_sample_loop(
|
213 |
+
model.forward_with_cfg,
|
214 |
+
z.shape,
|
215 |
+
z,
|
216 |
+
clip_denoised=False,
|
217 |
+
model_kwargs=model_kwargs,
|
218 |
+
progress=True,
|
219 |
+
device=device,
|
220 |
+
)
|
221 |
samples = [s.chunk(2, dim=0)[0] for s in samples] # Remove null class samples
|
222 |
return samples
|
223 |
|