ntt123 commited on
Commit
efb6748
·
1 Parent(s): ba4122d

use latest torch

Browse files
Files changed (3) hide show
  1. pyproject.toml +0 -11
  2. requirements.txt +38 -7
  3. sample.py +13 -10
pyproject.toml CHANGED
@@ -11,15 +11,4 @@ dependencies = [
11
  "soundfile>=0.12.1",
12
  "torch>=2.5.1",
13
  "vocos>=0.1.0",
14
- ]
15
-
16
-
17
- [[tool.uv.index]]
18
- name = "pytorch-cpu"
19
- url = "https://download.pytorch.org/whl/cpu"
20
- explicit = true
21
-
22
- [tool.uv.sources]
23
- torch = [
24
- { index = "pytorch-cpu", marker = "platform_system != 'Darwin'"},
25
  ]
 
11
  "soundfile>=0.12.1",
12
  "torch>=2.5.1",
13
  "vocos>=0.1.0",
 
 
 
 
 
 
 
 
 
 
 
14
  ]
requirements.txt CHANGED
@@ -1,9 +1,5 @@
1
  # This file was autogenerated by uv via the following command:
2
- # uv pip compile pyproject.toml -o requirements.txt --python-platform x86_64-unknown-linux-gnu --emit-index-url --extra-index-url https://download.pytorch.org/whl/cu113
3
- --index-url https://pypi.org/simple
4
- --extra-index-url https://download.pytorch.org/whl/cu113
5
-
6
-
7
  aiofiles==23.2.1
8
  # via gradio
9
  annotated-types==0.7.0
@@ -43,6 +39,7 @@ filelock==3.13.1
43
  # via
44
  # huggingface-hub
45
  # torch
 
46
  fsspec==2024.2.0
47
  # via
48
  # gradio-client
@@ -109,6 +106,38 @@ numpy==1.26.3
109
  # pandas
110
  # scipy
111
  # vocos
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  orjson==3.10.12
113
  # via gradio
114
  packaging==24.1
@@ -175,13 +204,13 @@ sympy==1.13.1
175
  # via torch
176
  tomlkit==0.13.2
177
  # via gradio
178
- torch
179
  # via
180
  # diffusion-speech-360h (pyproject.toml)
181
  # encodec
182
  # torchaudio
183
  # vocos
184
- torchaudio
185
  # via
186
  # encodec
187
  # vocos
@@ -189,6 +218,8 @@ tqdm==4.66.5
189
  # via
190
  # huggingface-hub
191
  # nltk
 
 
192
  typeguard==4.2.0
193
  # via inflect
194
  typer==0.15.1
 
1
  # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml -o requirements.txt --python-platform x86_64-unknown-linux-gnu
 
 
 
 
3
  aiofiles==23.2.1
4
  # via gradio
5
  annotated-types==0.7.0
 
39
  # via
40
  # huggingface-hub
41
  # torch
42
+ # triton
43
  fsspec==2024.2.0
44
  # via
45
  # gradio-client
 
106
  # pandas
107
  # scipy
108
  # vocos
109
+ nvidia-cublas-cu12==12.4.5.8
110
+ # via
111
+ # nvidia-cudnn-cu12
112
+ # nvidia-cusolver-cu12
113
+ # torch
114
+ nvidia-cuda-cupti-cu12==12.4.127
115
+ # via torch
116
+ nvidia-cuda-nvrtc-cu12==12.4.127
117
+ # via torch
118
+ nvidia-cuda-runtime-cu12==12.4.127
119
+ # via torch
120
+ nvidia-cudnn-cu12==9.1.0.70
121
+ # via torch
122
+ nvidia-cufft-cu12==11.2.1.3
123
+ # via torch
124
+ nvidia-curand-cu12==10.3.5.147
125
+ # via torch
126
+ nvidia-cusolver-cu12==11.6.1.9
127
+ # via torch
128
+ nvidia-cusparse-cu12==12.3.1.170
129
+ # via
130
+ # nvidia-cusolver-cu12
131
+ # torch
132
+ nvidia-nccl-cu12==2.21.5
133
+ # via torch
134
+ nvidia-nvjitlink-cu12==12.4.127
135
+ # via
136
+ # nvidia-cusolver-cu12
137
+ # nvidia-cusparse-cu12
138
+ # torch
139
+ nvidia-nvtx-cu12==12.4.127
140
+ # via torch
141
  orjson==3.10.12
142
  # via gradio
143
  packaging==24.1
 
204
  # via torch
205
  tomlkit==0.13.2
206
  # via gradio
207
+ torch==2.5.1
208
  # via
209
  # diffusion-speech-360h (pyproject.toml)
210
  # encodec
211
  # torchaudio
212
  # vocos
213
+ torchaudio==2.5.1
214
  # via
215
  # encodec
216
  # vocos
 
218
  # via
219
  # huggingface-hub
220
  # nltk
221
+ triton==3.1.0
222
+ # via torch
223
  typeguard==4.2.0
224
  # via inflect
225
  typer==0.15.1
sample.py CHANGED
@@ -174,11 +174,12 @@ def sample(
174
  embedding_vocab_size=model_config["embedding_vocab_size"],
175
  learn_sigma=model_config["learn_sigma"],
176
  in_channels=data_config["data_dim"],
177
- ).to(device)
178
 
179
  state_dict = find_model(ckpt_path)
180
  model.load_state_dict(state_dict)
181
  model.eval() # important!
 
182
  model_cache[ckpt_path] = model
183
  else:
184
  model = model_cache[ckpt_path]
@@ -206,15 +207,17 @@ def sample(
206
  attn_mask=attn_mask,
207
  )
208
 
209
- samples = diffusion.p_sample_loop(
210
- model.forward_with_cfg,
211
- z.shape,
212
- z,
213
- clip_denoised=False,
214
- model_kwargs=model_kwargs,
215
- progress=True,
216
- device=device,
217
- )
 
 
218
  samples = [s.chunk(2, dim=0)[0] for s in samples] # Remove null class samples
219
  return samples
220
 
 
174
  embedding_vocab_size=model_config["embedding_vocab_size"],
175
  learn_sigma=model_config["learn_sigma"],
176
  in_channels=data_config["data_dim"],
177
+ ).to(device).bfloat16
178
 
179
  state_dict = find_model(ckpt_path)
180
  model.load_state_dict(state_dict)
181
  model.eval() # important!
182
+ model = model.bfloat16()
183
  model_cache[ckpt_path] = model
184
  else:
185
  model = model_cache[ckpt_path]
 
207
  attn_mask=attn_mask,
208
  )
209
 
210
+ with torch.no_grad():
211
+ with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
212
+ samples = diffusion.p_sample_loop(
213
+ model.forward_with_cfg,
214
+ z.shape,
215
+ z,
216
+ clip_denoised=False,
217
+ model_kwargs=model_kwargs,
218
+ progress=True,
219
+ device=device,
220
+ )
221
  samples = [s.chunk(2, dim=0)[0] for s in samples] # Remove null class samples
222
  return samples
223