KingNish commited on
Commit
bf07d15
·
1 Parent(s): a57df53

modified: app.py

Browse files

modified: inference/infer.py

Files changed (2) hide show
  1. app.py +14 -12
  2. inference/infer.py +3 -10
app.py CHANGED
@@ -104,14 +104,18 @@ def get_last_mp3_file(output_dir):
104
  # Return the most recent .mp3 file
105
  return mp3_files_with_path[0]
106
 
 
 
 
 
 
 
 
 
 
 
107
  @spaces.GPU(duration=200)
108
  def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=200):
109
- # Create temporary files
110
- genre_txt_path = create_temp_file(genre_txt_content, prefix="genre_")
111
- lyrics_txt_path = create_temp_file(lyrics_txt_content, prefix="lyrics_")
112
-
113
- print(f"Genre TXT path: {genre_txt_path}")
114
- print(f"Lyrics TXT path: {lyrics_txt_path}")
115
 
116
  # Ensure the output folder exists
117
  output_dir = "./output"
@@ -123,16 +127,16 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=
123
  # Command and arguments with optimized settings
124
  command = [
125
  "python", "infer.py",
126
- "--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
127
  # "--stage2_model", "m-a-p/YuE-s2-1B-general",
128
- "--genre_txt", f"{genre_txt_path}",
129
- "--lyrics_txt", f"{lyrics_txt_path}",
130
  "--run_n_segments", f"{num_segments}",
131
  # "--stage2_batch_size", "4",
132
  "--output_dir", f"{output_dir}",
133
  "--cuda_idx", "0",
134
  "--max_new_tokens", f"{max_new_tokens}",
135
- "--disable_offload_model"
136
  ]
137
 
138
  # Set up environment variables for CUDA with optimized settings
@@ -165,8 +169,6 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=
165
  return None
166
  finally:
167
  # Clean up temporary files
168
- os.remove(genre_txt_path)
169
- os.remove(lyrics_txt_path)
170
  print("Temporary files deleted.")
171
 
172
  # Gradio
 
104
  # Return the most recent .mp3 file
105
  return mp3_files_with_path[0]
106
 
107
+ device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu")
108
+
109
+ model = AutoModelForCausalLM.from_pretrained(
110
+ "m-a-p/YuE-s1-7B-anneal-en-cot",
111
+ torch_dtype=torch.bfloat16,
112
+ attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
113
+ )
114
+ model.to(device)
115
+ model.eval()
116
+
117
  @spaces.GPU(duration=200)
118
  def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=200):
 
 
 
 
 
 
119
 
120
  # Ensure the output folder exists
121
  output_dir = "./output"
 
127
  # Command and arguments with optimized settings
128
  command = [
129
  "python", "infer.py",
130
+ "--stage1_model", model,
131
  # "--stage2_model", "m-a-p/YuE-s2-1B-general",
132
+ "--genre_txt", f"{genre_txt_content}",
133
+ "--lyrics_txt", f"{lyrics_txt_content}",
134
  "--run_n_segments", f"{num_segments}",
135
  # "--stage2_batch_size", "4",
136
  "--output_dir", f"{output_dir}",
137
  "--cuda_idx", "0",
138
  "--max_new_tokens", f"{max_new_tokens}",
139
+ # "--disable_offload_model"
140
  ]
141
 
142
  # Set up environment variables for CUDA with optimized settings
 
169
  return None
170
  finally:
171
  # Clean up temporary files
 
 
172
  print("Temporary files deleted.")
173
 
174
  # Gradio
inference/infer.py CHANGED
@@ -56,7 +56,7 @@ parser.add_argument('-r', '--rescale', action='store_true', help='Rescale output
56
  args = parser.parse_args()
57
  if args.use_audio_prompt and not args.audio_prompt_path:
58
  raise FileNotFoundError("Please offer audio prompt filepath using '--audio_prompt_path', when you enable 'use_audio_prompt'!")
59
- stage1_model = args.stage1_model
60
  cuda_idx = args.cuda_idx
61
  max_new_tokens = args.max_new_tokens
62
  stage1_output_dir = os.path.join(args.output_dir, f"stage1")
@@ -69,13 +69,6 @@ device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu"
69
  print(f"Using device: {device}")
70
 
71
  mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
72
- model = AutoModelForCausalLM.from_pretrained(
73
- stage1_model,
74
- torch_dtype=torch.bfloat16,
75
- attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
76
- )
77
- model.to(device)
78
- model.eval()
79
 
80
  codectool = CodecManipulator("xcodec", 0, 1)
81
  model_config = OmegaConf.load(args.basic_model_config)
@@ -115,9 +108,9 @@ stage1_output_set = []
115
  # genre tags support instrumental,genre,mood,vocal timbr and vocal gender
116
  # all kinds of tags are needed
117
  with open(args.genre_txt) as f:
118
- genres = f.read().strip()
119
  with open(args.lyrics_txt) as f:
120
- lyrics = split_lyrics(f.read())
121
  # intruction
122
  full_lyrics = "\n".join(lyrics)
123
  prompt_texts = [f"Generate music from the given lyrics segment by segment.\n[Genre] {genres}\n{full_lyrics}"]
 
56
  args = parser.parse_args()
57
  if args.use_audio_prompt and not args.audio_prompt_path:
58
  raise FileNotFoundError("Please offer audio prompt filepath using '--audio_prompt_path', when you enable 'use_audio_prompt'!")
59
+ model = args.stage1_model
60
  cuda_idx = args.cuda_idx
61
  max_new_tokens = args.max_new_tokens
62
  stage1_output_dir = os.path.join(args.output_dir, f"stage1")
 
69
  print(f"Using device: {device}")
70
 
71
  mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
 
 
 
 
 
 
 
72
 
73
  codectool = CodecManipulator("xcodec", 0, 1)
74
  model_config = OmegaConf.load(args.basic_model_config)
 
108
  # genre tags support instrumental,genre,mood,vocal timbr and vocal gender
109
  # all kinds of tags are needed
110
  with open(args.genre_txt) as f:
111
+ genres = f.strip()
112
  with open(args.lyrics_txt) as f:
113
+ lyrics = split_lyrics(f)
114
  # intruction
115
  full_lyrics = "\n".join(lyrics)
116
  prompt_texts = [f"Generate music from the given lyrics segment by segment.\n[Genre] {genres}\n{full_lyrics}"]