Spaces:
Building
on
L40S
Building
on
L40S
Update app.py
Browse files
app.py
CHANGED
@@ -200,37 +200,89 @@ def install_flash_attn():
|
|
200 |
return False
|
201 |
|
202 |
|
|
|
|
|
|
|
|
|
|
|
203 |
def initialize_system():
|
204 |
optimize_gpu_settings()
|
205 |
|
206 |
-
|
207 |
-
|
|
|
|
|
|
|
208 |
|
209 |
-
|
210 |
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
-
|
214 |
-
os.
|
215 |
-
|
216 |
-
|
217 |
-
futures.append(executor.submit(
|
218 |
-
snapshot_download,
|
219 |
repo_id="m-a-p/xcodec_mini_infer",
|
220 |
-
local_dir=
|
221 |
-
|
222 |
-
)
|
223 |
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
logging.info(f"Working directory changed to: {os.getcwd()}")
|
230 |
-
|
231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
raise
|
233 |
|
|
|
|
|
234 |
@lru_cache(maxsize=100)
|
235 |
def get_cached_file_path(content_hash, prefix):
|
236 |
return create_temp_file(content_hash, prefix)
|
@@ -272,15 +324,21 @@ def get_audio_duration(file_path):
|
|
272 |
logging.error(f"Failed to get audio duration: {e}")
|
273 |
return None
|
274 |
|
|
|
275 |
def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
|
276 |
genre_txt_path = None
|
277 |
lyrics_txt_path = None
|
278 |
|
279 |
try:
|
|
|
|
|
|
|
280 |
model_path, config, params = optimize_model_selection(lyrics_txt_content, genre_txt_content)
|
281 |
logging.info(f"Selected model: {model_path}")
|
282 |
logging.info(f"Lyrics analysis: {params}")
|
283 |
|
|
|
|
|
284 |
has_chorus = params['sections']['chorus'] > 0
|
285 |
estimated_duration = params.get('estimated_duration', 90)
|
286 |
|
@@ -306,20 +364,24 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
|
|
306 |
os.makedirs(output_dir, exist_ok=True)
|
307 |
empty_output_folder(output_dir)
|
308 |
|
309 |
-
|
|
|
310 |
command = [
|
311 |
-
|
|
|
312 |
"--stage1_model", model_path,
|
313 |
"--stage2_model", "m-a-p/YuE-s2-1B-general",
|
314 |
-
"--genre_txt", genre_txt_path,
|
315 |
-
"--lyrics_txt", lyrics_txt_path,
|
316 |
"--run_n_segments", str(actual_num_segments),
|
317 |
"--stage2_batch_size", "16",
|
318 |
-
"--output_dir",
|
319 |
"--cuda_idx", "0",
|
320 |
"--max_new_tokens", str(actual_max_tokens),
|
321 |
-
"--disable_offload_model"
|
322 |
]
|
|
|
|
|
323 |
|
324 |
env = os.environ.copy()
|
325 |
if torch.cuda.is_available():
|
|
|
200 |
return False
|
201 |
|
202 |
|
203 |
+
# ์ ์ญ ๋ณ์๋ก ๊ฒฝ๋ก ์ค์
|
204 |
+
APP_DIR = os.path.abspath(os.path.dirname(__file__))
|
205 |
+
INFERENCE_DIR = os.path.join(APP_DIR, "inference")
|
206 |
+
INFER_SCRIPT = os.path.join(INFERENCE_DIR, "infer.py")
|
207 |
+
|
208 |
def initialize_system():
|
209 |
optimize_gpu_settings()
|
210 |
|
211 |
+
try:
|
212 |
+
# ๋๋ ํ ๋ฆฌ ๊ตฌ์กฐ ์์ฑ
|
213 |
+
os.makedirs(INFERENCE_DIR, exist_ok=True)
|
214 |
+
os.makedirs(os.path.join(INFERENCE_DIR, "models"), exist_ok=True)
|
215 |
+
os.makedirs(os.path.join(INFERENCE_DIR, "xcodec_mini_infer"), exist_ok=True)
|
216 |
|
217 |
+
from huggingface_hub import snapshot_download, hf_hub_download
|
218 |
|
219 |
+
# infer.py ํ์ผ ๋ค์ด๋ก๋
|
220 |
+
try:
|
221 |
+
infer_script_download = hf_hub_download(
|
222 |
+
repo_id="m-a-p/xcodec_mini_infer",
|
223 |
+
filename="infer.py",
|
224 |
+
local_dir=INFERENCE_DIR,
|
225 |
+
force_download=True
|
226 |
+
)
|
227 |
+
if not os.path.exists(INFER_SCRIPT):
|
228 |
+
shutil.copy2(infer_script_download, INFER_SCRIPT)
|
229 |
+
logging.info(f"infer.py available at: {INFER_SCRIPT}")
|
230 |
+
except Exception as e:
|
231 |
+
logging.error(f"Failed to download infer.py: {e}")
|
232 |
+
raise
|
233 |
|
234 |
+
# xcodec_mini_infer ๋ชจ๋ธ ๋ค์ด๋ก๋
|
235 |
+
xcodec_path = os.path.join(INFERENCE_DIR, "xcodec_mini_infer")
|
236 |
+
snapshot_download(
|
|
|
|
|
|
|
237 |
repo_id="m-a-p/xcodec_mini_infer",
|
238 |
+
local_dir=xcodec_path,
|
239 |
+
force_download=True
|
240 |
+
)
|
241 |
|
242 |
+
# YuE ๋ชจ๋ธ๋ค ๋ค์ด๋ก๋
|
243 |
+
models = [
|
244 |
+
"m-a-p/YuE-s1-7B-anneal-jp-kr-cot",
|
245 |
+
"m-a-p/YuE-s1-7B-anneal-en-cot",
|
246 |
+
"m-a-p/YuE-s1-7B-anneal-zh-cot",
|
247 |
+
"m-a-p/YuE-s2-1B-general"
|
248 |
+
]
|
249 |
+
|
250 |
+
for model in models:
|
251 |
+
model_name = model.split('/')[-1]
|
252 |
+
model_path = os.path.join(INFERENCE_DIR, "models", model_name)
|
253 |
+
snapshot_download(
|
254 |
+
repo_id=model,
|
255 |
+
local_dir=model_path,
|
256 |
+
force_download=True
|
257 |
+
)
|
258 |
+
|
259 |
+
# ์์
๋๋ ํ ๋ฆฌ ๋ณ๊ฒฝ
|
260 |
+
os.chdir(INFERENCE_DIR)
|
261 |
logging.info(f"Working directory changed to: {os.getcwd()}")
|
262 |
+
|
263 |
+
# ํ์ผ ์กด์ฌ ํ์ธ
|
264 |
+
required_files = [
|
265 |
+
INFER_SCRIPT,
|
266 |
+
os.path.join(xcodec_path, "config.json"),
|
267 |
+
os.path.join(xcodec_path, "vocal_decoder.pth"),
|
268 |
+
os.path.join(xcodec_path, "inst_decoder.pth")
|
269 |
+
]
|
270 |
+
|
271 |
+
for file_path in required_files:
|
272 |
+
if not os.path.exists(file_path):
|
273 |
+
raise FileNotFoundError(f"Required file not found: {file_path}")
|
274 |
+
else:
|
275 |
+
file_size = os.path.getsize(file_path)
|
276 |
+
logging.info(f"Verified {os.path.basename(file_path)}: {file_size} bytes")
|
277 |
+
|
278 |
+
logging.info("System initialization completed successfully")
|
279 |
+
|
280 |
+
except Exception as e:
|
281 |
+
logging.error(f"Initialization error: {e}")
|
282 |
raise
|
283 |
|
284 |
+
|
285 |
+
|
286 |
@lru_cache(maxsize=100)
|
287 |
def get_cached_file_path(content_hash, prefix):
|
288 |
return create_temp_file(content_hash, prefix)
|
|
|
324 |
logging.error(f"Failed to get audio duration: {e}")
|
325 |
return None
|
326 |
|
327 |
+
|
328 |
def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
|
329 |
genre_txt_path = None
|
330 |
lyrics_txt_path = None
|
331 |
|
332 |
try:
|
333 |
+
if not os.path.exists(INFER_SCRIPT):
|
334 |
+
raise FileNotFoundError(f"infer.py not found at: {INFER_SCRIPT}")
|
335 |
+
|
336 |
model_path, config, params = optimize_model_selection(lyrics_txt_content, genre_txt_content)
|
337 |
logging.info(f"Selected model: {model_path}")
|
338 |
logging.info(f"Lyrics analysis: {params}")
|
339 |
|
340 |
+
|
341 |
+
|
342 |
has_chorus = params['sections']['chorus'] > 0
|
343 |
estimated_duration = params.get('estimated_duration', 90)
|
344 |
|
|
|
364 |
os.makedirs(output_dir, exist_ok=True)
|
365 |
empty_output_folder(output_dir)
|
366 |
|
367 |
+
|
368 |
+
|
369 |
command = [
|
370 |
+
sys.executable,
|
371 |
+
INFER_SCRIPT,
|
372 |
"--stage1_model", model_path,
|
373 |
"--stage2_model", "m-a-p/YuE-s2-1B-general",
|
374 |
+
"--genre_txt", os.path.abspath(genre_txt_path),
|
375 |
+
"--lyrics_txt", os.path.abspath(lyrics_txt_path),
|
376 |
"--run_n_segments", str(actual_num_segments),
|
377 |
"--stage2_batch_size", "16",
|
378 |
+
"--output_dir", os.path.abspath("./output"),
|
379 |
"--cuda_idx", "0",
|
380 |
"--max_new_tokens", str(actual_max_tokens),
|
381 |
+
"--disable_offload_model"
|
382 |
]
|
383 |
+
|
384 |
+
|
385 |
|
386 |
env = os.environ.copy()
|
387 |
if torch.cuda.is_available():
|