Add-Vishnu commited on
Commit
e9b4267
·
1 Parent(s): 93ed5b8

Update asr.py

Browse files
Files changed (1) hide show
  1. asr.py +17 -4
asr.py CHANGED
@@ -41,9 +41,16 @@ def transcribe(audio):
41
 
42
  def detect_language(audio):
43
  print(audio)
44
- audio = librosa.load(audio, sr=16_000, mono=True)[0]
 
 
 
 
 
 
 
45
  # print(audio)
46
- inputs_lid = processor_lid(audio, sampling_rate=16_000, return_tensors="pt")
47
  with torch.no_grad():
48
  start_time_lid = time.time()
49
  outputs_lid = model_lid(**inputs_lid).logits
@@ -56,11 +63,17 @@ def detect_language(audio):
56
 
57
 
58
  def transcribe_lang(audio,lang):
59
- audio = librosa.load(audio, sr=16_000, mono=True)[0]
 
 
 
 
 
60
  processor.tokenizer.set_target_lang(lang)
61
  model.load_adapter(lang)
62
  print(lang)
63
- inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
 
64
  with torch.no_grad():
65
  tr_start_time = time.time()
66
  outputs = model(**inputs).logits
 
41
 
42
  def detect_language(audio):
43
  print(audio)
44
+ # audio = librosa.load(audio, sr=16_000, mono=True)[0]
45
+ sr,y = audio
46
+ y = y.astype(np.float32)
47
+ y /= np.max(np.abs(y))
48
+ y_resampled = resample_to_16k(y, sr)
49
+ print("Without using librosa to load:",y_resampled)
50
+ # inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
51
+ inputs = processor(y_resampled, sampling_rate=16_000,return_tensors="pt")
52
  # print(audio)
53
+ # inputs_lid = processor_lid(audio, sampling_rate=16_000, return_tensors="pt")
54
  with torch.no_grad():
55
  start_time_lid = time.time()
56
  outputs_lid = model_lid(**inputs_lid).logits
 
63
 
64
 
65
  def transcribe_lang(audio,lang):
66
+ # audio = librosa.load(audio, sr=16_000, mono=True)[0]
67
+ sr,y = audio
68
+ y = y.astype(np.float32)
69
+ y /= np.max(np.abs(y))
70
+ y_resampled = resample_to_16k(y, sr)
71
+ print("Without using librosa to load:",y_resampled)
72
  processor.tokenizer.set_target_lang(lang)
73
  model.load_adapter(lang)
74
  print(lang)
75
+ # inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
76
+ inputs = processor(y_resampled, sampling_rate=16_000,return_tensors="pt")
77
  with torch.no_grad():
78
  tr_start_time = time.time()
79
  outputs = model(**inputs).logits