Spaces:
Runtime error
Runtime error
Commit
Β·
dc6a3d5
1
Parent(s):
febf9c9
update
Browse files- app.py +4 -0
- diffrhythm/infer/infer.py +10 -1
app.py
CHANGED
@@ -9,6 +9,7 @@ from einops import rearrange
|
|
9 |
import argparse
|
10 |
import json
|
11 |
import os
|
|
|
12 |
#import spaces
|
13 |
from tqdm import tqdm
|
14 |
import random
|
@@ -49,6 +50,9 @@ def infer_music(lrc, ref_audio_path, steps, file_type, max_frames=2048):
|
|
49 |
start_time=start_time,
|
50 |
file_type=file_type
|
51 |
)
|
|
|
|
|
|
|
52 |
return generated_song
|
53 |
|
54 |
def R1_infer1(theme, tags_gen, language):
|
|
|
9 |
import argparse
|
10 |
import json
|
11 |
import os
|
12 |
+
import gc
|
13 |
#import spaces
|
14 |
from tqdm import tqdm
|
15 |
import random
|
|
|
50 |
start_time=start_time,
|
51 |
file_type=file_type
|
52 |
)
|
53 |
+
torch.cuda.empty_cache()
|
54 |
+
gc.collect()
|
55 |
+
|
56 |
return generated_song
|
57 |
|
58 |
def R1_infer1(theme, tags_gen, language):
|
diffrhythm/infer/infer.py
CHANGED
@@ -9,6 +9,7 @@ import random
|
|
9 |
import numpy as np
|
10 |
import time
|
11 |
import io
|
|
|
12 |
import pydub
|
13 |
|
14 |
from diffrhythm.infer.infer_utils import (
|
@@ -88,11 +89,19 @@ def inference(cfm_model, vae_model, cond, text, duration, style_prompt, negative
|
|
88 |
sway_sampling_coef=sway_sampling_coef,
|
89 |
start_time=start_time
|
90 |
)
|
|
|
|
|
|
|
91 |
|
92 |
generated = generated.to(torch.float32)
|
93 |
latent = generated.transpose(1, 2) # [b d t]
|
94 |
output = decode_audio(latent, vae_model, chunked=False)
|
95 |
|
|
|
|
|
|
|
|
|
|
|
96 |
# Rearrange audio batch to a single sequence
|
97 |
output = rearrange(output, "b d n -> d (b n)")
|
98 |
output_tensor = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).cpu()
|
@@ -157,4 +166,4 @@ if __name__ == "__main__":
|
|
157 |
|
158 |
output_path = os.path.join(output_dir, "output.wav")
|
159 |
torchaudio.save(output_path, generated_song, sample_rate=44100)
|
160 |
-
|
|
|
9 |
import numpy as np
|
10 |
import time
|
11 |
import io
|
12 |
+
import gc
|
13 |
import pydub
|
14 |
|
15 |
from diffrhythm.infer.infer_utils import (
|
|
|
89 |
sway_sampling_coef=sway_sampling_coef,
|
90 |
start_time=start_time
|
91 |
)
|
92 |
+
torch.cuda.empty_cache()
|
93 |
+
gc.collect()
|
94 |
+
|
95 |
|
96 |
generated = generated.to(torch.float32)
|
97 |
latent = generated.transpose(1, 2) # [b d t]
|
98 |
output = decode_audio(latent, vae_model, chunked=False)
|
99 |
|
100 |
+
del latent, generated
|
101 |
+
torch.cuda.empty_cache()
|
102 |
+
gc.collect()
|
103 |
+
|
104 |
+
|
105 |
# Rearrange audio batch to a single sequence
|
106 |
output = rearrange(output, "b d n -> d (b n)")
|
107 |
output_tensor = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).cpu()
|
|
|
166 |
|
167 |
output_path = os.path.join(output_dir, "output.wav")
|
168 |
torchaudio.save(output_path, generated_song, sample_rate=44100)
|
169 |
+
|