Spaces:

yasserrmd
/

revai_reverb_asr

Sleeping

yasserrmd commited on Oct 4, 2024

Commit

3d435c7

verified ·

1 Parent(s): 85e4503

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,10 +9,10 @@ files = ['reverb_asr_v1.jit.zip', 'tk.units.txt']
 downloaded_files = [hf_hub_download(repo_id=REPO_ID, filename=f) for f in files]
 model = load_model(downloaded_files[0], downloaded_files[1])
-def process_cat_embs(cat_embs):
-    device = "gpu"
-    cat_embs = torch.tensor([float(c) for c in cat_embs.split(',')]).to(device)
-    return cat_embs
 @spaces.GPU
@@ -30,23 +30,22 @@ def recognition(audio, style=0):
     return text_output
-# Gradio UI Components
-inputs = [
-    gr.Audio(type="filepath", label='Input audio'),
-    gr.Slider(0, 1, value=0, label="Transcription Style", info="Adjust between non-verbatim (0) and verbatim (1) transcription")
-]
-output = gr.Textbox(label="Output Text")
-# UI and Interface
 iface = gr.Interface(
-    fn=recognition,
-    inputs=inputs,
-    outputs=output,
-    title="Reverb ASR Transcription",
-    description="Supports verbatim and non-verbatim transcription styles.",
-    article="<p style='text-align: center'><a href='https://rev.com' target='_blank'>Learn more about Rev</a></p>",
-    theme='huggingface'
-)
-iface.launch(enable_queue=True)

 downloaded_files = [hf_hub_download(repo_id=REPO_ID, filename=f) for f in files]
 model = load_model(downloaded_files[0], downloaded_files[1])
+def process_style_embedding(style):
+    device = torch.device("cpu")
+    embedding = torch.tensor([style, 1 - style], device=device)
+    return embedding
 @spaces.GPU
     return text_output
+audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
+style_slider = gr.Slider(0, 1, value=0, step=0.1, label="Transcription Style",
+                             info="Adjust the transcription style: 0 (casual) to 1 (formal).")
+output_textbox = gr.Textbox(label="Transcription Output")
+description = "This tool transcribes audio using a customizable transcription style ranging from casual to formal. Upload or record an audio file to begin."
 iface = gr.Interface(
+        fn=transcribe_audio,
+        inputs=[audio_input, style_slider],
+        outputs=output_textbox,
+        title="Audio Transcription with Style Control",
+        description=description,
+        theme="default"
+    )
+iface.launch()