YuE-music-generator-demo-zero

Paused

App Files Files Community

KingNish commited on Jan 29

Commit

bf07d15

1 Parent(s): a57df53

modified: app.py

Browse files

modified: inference/infer.py

Files changed (2) hide show

app.py +14 -12
inference/infer.py +3 -10

app.py CHANGED Viewed

@@ -104,14 +104,18 @@ def get_last_mp3_file(output_dir):
     # Return the most recent .mp3 file
     return mp3_files_with_path[0]
 @spaces.GPU(duration=200)
 def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=200):
-    # Create temporary files
-    genre_txt_path = create_temp_file(genre_txt_content, prefix="genre_")
-    lyrics_txt_path = create_temp_file(lyrics_txt_content, prefix="lyrics_")
-    print(f"Genre TXT path: {genre_txt_path}")
-    print(f"Lyrics TXT path: {lyrics_txt_path}")
     # Ensure the output folder exists
     output_dir = "./output"
@@ -123,16 +127,16 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=
     # Command and arguments with optimized settings
     command = [
         "python", "infer.py",
-        "--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
         # "--stage2_model", "m-a-p/YuE-s2-1B-general",
-        "--genre_txt", f"{genre_txt_path}",
-        "--lyrics_txt", f"{lyrics_txt_path}",
         "--run_n_segments", f"{num_segments}",
         # "--stage2_batch_size", "4",
         "--output_dir", f"{output_dir}",
         "--cuda_idx", "0",
         "--max_new_tokens", f"{max_new_tokens}",
-        "--disable_offload_model"
     ]
     # Set up environment variables for CUDA with optimized settings
@@ -165,8 +169,6 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=
         return None
     finally:
         # Clean up temporary files
-        os.remove(genre_txt_path)
-        os.remove(lyrics_txt_path)
         print("Temporary files deleted.")
 # Gradio

     # Return the most recent .mp3 file
     return mp3_files_with_path[0]
+device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu")
+model = AutoModelForCausalLM.from_pretrained(
+    "m-a-p/YuE-s1-7B-anneal-en-cot",
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
+    )
+model.to(device)
+model.eval()
 @spaces.GPU(duration=200)
 def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=200):
     # Ensure the output folder exists
     output_dir = "./output"
     # Command and arguments with optimized settings
     command = [
         "python", "infer.py",
+        "--stage1_model", model,
         # "--stage2_model", "m-a-p/YuE-s2-1B-general",
+        "--genre_txt", f"{genre_txt_content}",
+        "--lyrics_txt", f"{lyrics_txt_content}",
         "--run_n_segments", f"{num_segments}",
         # "--stage2_batch_size", "4",
         "--output_dir", f"{output_dir}",
         "--cuda_idx", "0",
         "--max_new_tokens", f"{max_new_tokens}",
+        # "--disable_offload_model"
     ]
     # Set up environment variables for CUDA with optimized settings
         return None
     finally:
         # Clean up temporary files
         print("Temporary files deleted.")
 # Gradio

inference/infer.py CHANGED Viewed

@@ -56,7 +56,7 @@ parser.add_argument('-r', '--rescale', action='store_true', help='Rescale output
 args = parser.parse_args()
 if args.use_audio_prompt and not args.audio_prompt_path:
     raise FileNotFoundError("Please offer audio prompt filepath using '--audio_prompt_path', when you enable 'use_audio_prompt'!")
-stage1_model = args.stage1_model
 cuda_idx = args.cuda_idx
 max_new_tokens = args.max_new_tokens
 stage1_output_dir = os.path.join(args.output_dir, f"stage1")
@@ -69,13 +69,6 @@ device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
-model = AutoModelForCausalLM.from_pretrained(
-    stage1_model,
-    torch_dtype=torch.bfloat16,
-    attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
-    )
-model.to(device)
-model.eval()
 codectool = CodecManipulator("xcodec", 0, 1)
 model_config = OmegaConf.load(args.basic_model_config)
@@ -115,9 +108,9 @@ stage1_output_set = []
 # genre tags support instrumental，genre，mood，vocal timbr and vocal gender
 # all kinds of tags are needed
 with open(args.genre_txt) as f:
-    genres = f.read().strip()
 with open(args.lyrics_txt) as f:
-    lyrics = split_lyrics(f.read())
 # intruction
 full_lyrics = "\n".join(lyrics)
 prompt_texts = [f"Generate music from the given lyrics segment by segment.\n[Genre] {genres}\n{full_lyrics}"]

 args = parser.parse_args()
 if args.use_audio_prompt and not args.audio_prompt_path:
     raise FileNotFoundError("Please offer audio prompt filepath using '--audio_prompt_path', when you enable 'use_audio_prompt'!")
+model = args.stage1_model
 cuda_idx = args.cuda_idx
 max_new_tokens = args.max_new_tokens
 stage1_output_dir = os.path.join(args.output_dir, f"stage1")
 print(f"Using device: {device}")
 mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
 codectool = CodecManipulator("xcodec", 0, 1)
 model_config = OmegaConf.load(args.basic_model_config)
 # genre tags support instrumental，genre，mood，vocal timbr and vocal gender
 # all kinds of tags are needed
 with open(args.genre_txt) as f:
+    genres = f.strip()
 with open(args.lyrics_txt) as f:
+    lyrics = split_lyrics(f)
 # intruction
 full_lyrics = "\n".join(lyrics)
 prompt_texts = [f"Generate music from the given lyrics segment by segment.\n[Genre] {genres}\n{full_lyrics}"]