Spaces:

andgrt
/

doc-qa-demo-gradio

Runtime error

andgrt commited on Nov 3, 2024

Commit

cfcd1f4

1 Parent(s): 2b8da86

fix: whisper model params

Files changed (1) hide show

app.py CHANGED Viewed

@@ -84,7 +84,17 @@ def transcribe(image, audio):
     y = y.astype(np.float32)
     y /= np.max(np.abs(y))
-    return generate_answer(image, transcriber({"sampling_rate": sr, "raw": y})["text"])
 qa_interface = gr.Interface(

     y = y.astype(np.float32)
     y /= np.max(np.abs(y))
+    input_features = transcriber.feature_extractor(
+        y, sampling_rate=sr, return_tensors="pt"
+    ).input_features
+    transcription = transcriber.model.generate(input_features)
+    transcription_text = transcriber.tokenizer.decode(
+        transcription[0], skip_special_tokens=True
+    )
+    return generate_answer(image, transcription_text)
 qa_interface = gr.Interface(