faychu commited on
Commit
e8a0b79
·
verified ·
1 Parent(s): cd5e25c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -6
README.md CHANGED
@@ -35,24 +35,22 @@ Here provides offers a code snippet illustrating the process of loading both the
35
 
36
 
37
  ```python
38
- import requests
 
 
39
  from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration
40
- from transformers.pipelines.audio_utils import ffmpeg_read
41
 
42
  model = Qwen2AudioForConditionalGeneration.from_pretrained("Qwen/Qwen2-Audio-7B" ,trust_remote_code=True)
43
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B" ,trust_remote_code=True)
44
 
45
  prompt = "<|audio_bos|><|AUDIO|><|audio_eos|>Generate the caption in English:"
46
  url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Audio/glass-breaking-151256.mp3"
47
- audio = ffmpeg_read(requests.get(url).content, sampling_rate=processor.feature_extractor.sampling_rate)
48
-
49
  inputs = processor(text=prompt, audios=audio, return_tensors="pt")
50
 
51
- # Generate
52
  generated_ids = model.generate(**inputs, max_length=256)
53
  generated_ids = generated_ids[:, inputs.input_ids.size(1):]
54
  response = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
55
- # Glass is breaking.
56
  ```
57
 
58
  ## Citation
 
35
 
36
 
37
  ```python
38
+ from io import BytesIO
39
+ from urllib.request import urlopen
40
+ import librosa
41
  from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration
 
42
 
43
  model = Qwen2AudioForConditionalGeneration.from_pretrained("Qwen/Qwen2-Audio-7B" ,trust_remote_code=True)
44
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B" ,trust_remote_code=True)
45
 
46
  prompt = "<|audio_bos|><|AUDIO|><|audio_eos|>Generate the caption in English:"
47
  url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Audio/glass-breaking-151256.mp3"
48
+ audio, sr = librosa.load(BytesIO(urlopen(url).read()), sr=processor.feature_extractor.sampling_rate)
 
49
  inputs = processor(text=prompt, audios=audio, return_tensors="pt")
50
 
 
51
  generated_ids = model.generate(**inputs, max_length=256)
52
  generated_ids = generated_ids[:, inputs.input_ids.size(1):]
53
  response = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
 
54
  ```
55
 
56
  ## Citation