KingNish commited on
Commit
af14a1a
·
1 Parent(s): 472d32d

modified: app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -50
app.py CHANGED
@@ -4,8 +4,33 @@ import os
4
  import shutil
5
  import tempfile
6
  import spaces
7
- from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessor, LogitsProcessorList
8
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  is_shared_ui = True if "innova-ai/YuE-music-generator-demo" in os.environ['SPACE_ID'] else False
11
 
@@ -116,33 +141,15 @@ model = AutoModelForCausalLM.from_pretrained(
116
  model.to(device)
117
  model.eval()
118
 
119
- import os
120
- import sys
121
- sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'xcodec_mini_infer'))
122
- sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'xcodec_mini_infer', 'descriptaudiocodec'))
123
- import argparse
124
- import torch
125
- import numpy as np
126
- import json
127
- from omegaconf import OmegaConf
128
- import torchaudio
129
- from torchaudio.transforms import Resample
130
- import soundfile as sf
131
 
132
- import uuid
133
- from tqdm import tqdm
134
- from einops import rearrange
135
- from codecmanipulator import CodecManipulator
136
- from mmtokenizer import _MMSentencePieceTokenizer
137
- from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessor, LogitsProcessorList
138
- import glob
139
- import time
140
- import copy
141
- from collections import Counter
142
- from models.soundstream_hubert_new import SoundStream
143
- from vocoder import build_codec_model, process_audio
144
- from post_process_audio import replace_low_freq_with_energy_matched
145
- import re
146
 
147
  def generate_music(
148
  stage1_model="m-a-p/YuE-s1-7B-anneal-en-cot",
@@ -174,22 +181,6 @@ def generate_music(
174
  stage1_output_dir = os.path.join(output_dir, f"stage1")
175
  os.makedirs(stage1_output_dir, exist_ok=True)
176
 
177
- # load tokenizer and model
178
- device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu")
179
-
180
- # Now you can use `device` to move your tensors or models to the GPU (if available)
181
- print(f"Using device: {device}")
182
-
183
- mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
184
-
185
- codectool = CodecManipulator("xcodec", 0, 1)
186
- model_config = OmegaConf.load(basic_model_config)
187
- codec_model = eval(model_config.generator.name)(**model_config.generator.config).to(device)
188
- parameter_dict = torch.load(resume_path, map_location='cpu')
189
- codec_model.load_state_dict(parameter_dict['codec_model'])
190
- codec_model.to(device)
191
- codec_model.eval()
192
-
193
  class BlockTokenRangeProcessor(LogitsProcessor):
194
  def __init__(self, start_id, end_id):
195
  self.blocked_token_ids = list(range(start_id, end_id))
@@ -216,13 +207,7 @@ def generate_music(
216
 
217
  # Call the function and print the result
218
  stage1_output_set = []
219
- # Tips:
220
- # genre tags support instrumental,genre,mood,vocal timbr and vocal gender
221
- # # all kinds of tags are needed
222
- # with open(genre_txt) as f:
223
- # genres = f.read().strip()
224
- # with open(lyrics_txt) as f:
225
- # lyrics = split_lyrics(f.read())
226
  genres = genre_txt.strip()
227
  lyrics = split_lyrics(lyrics_txt+"\n")
228
  # intruction
 
4
  import shutil
5
  import tempfile
6
  import spaces
 
7
  import torch
8
+ import os
9
+ import sys
10
+ sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'xcodec_mini_infer'))
11
+ sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'xcodec_mini_infer', 'descriptaudiocodec'))
12
+ import argparse
13
+ import numpy as np
14
+ import json
15
+ from omegaconf import OmegaConf
16
+ import torchaudio
17
+ from torchaudio.transforms import Resample
18
+ import soundfile as sf
19
+
20
+ import uuid
21
+ from tqdm import tqdm
22
+ from einops import rearrange
23
+ from codecmanipulator import CodecManipulator
24
+ from mmtokenizer import _MMSentencePieceTokenizer
25
+ from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessor, LogitsProcessorList
26
+ import glob
27
+ import time
28
+ import copy
29
+ from collections import Counter
30
+ from models.soundstream_hubert_new import SoundStream
31
+ from vocoder import build_codec_model, process_audio
32
+ from post_process_audio import replace_low_freq_with_energy_matched
33
+ import re
34
 
35
  is_shared_ui = True if "innova-ai/YuE-music-generator-demo" in os.environ['SPACE_ID'] else False
36
 
 
141
  model.to(device)
142
  model.eval()
143
 
144
+ mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
 
 
 
 
 
 
 
 
 
 
 
145
 
146
+ codectool = CodecManipulator("xcodec", 0, 1)
147
+ model_config = OmegaConf.load(basic_model_config)
148
+ codec_model = eval(model_config.generator.name)(**model_config.generator.config).to(device)
149
+ parameter_dict = torch.load(resume_path, map_location='cpu')
150
+ codec_model.load_state_dict(parameter_dict['codec_model'])
151
+ codec_model.to(device)
152
+ codec_model.eval()
 
 
 
 
 
 
 
153
 
154
  def generate_music(
155
  stage1_model="m-a-p/YuE-s1-7B-anneal-en-cot",
 
181
  stage1_output_dir = os.path.join(output_dir, f"stage1")
182
  os.makedirs(stage1_output_dir, exist_ok=True)
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  class BlockTokenRangeProcessor(LogitsProcessor):
185
  def __init__(self, start_id, end_id):
186
  self.blocked_token_ids = list(range(start_id, end_id))
 
207
 
208
  # Call the function and print the result
209
  stage1_output_set = []
210
+
 
 
 
 
 
 
211
  genres = genre_txt.strip()
212
  lyrics = split_lyrics(lyrics_txt+"\n")
213
  # intruction