Spaces:

hackengine
/

Paraformer-for-Chinese-Podcast

Runtime error

App Files Files Community

Yuekai Zhang commited on Apr 9, 2023

Commit

e8bec2d

1 Parent(s): 74585b5

update app

Browse files

Files changed (5) hide show

Dockerfile +1 -0
Dockerfile.origin +26 -0
README.md +9 -1
app.py +102 -0
requirements-gradio.txt +11 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ FROM soar97/torch-paraformer-gradio:22.12

Dockerfile.origin ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM soar97/torch-paraformer:22.12
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y ffmpeg
+COPY ./requirements-gradio.txt ./
+RUN pip install --no-cache-dir --upgrade -r ./requirements-gradio.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user app.py $HOME/app/
+COPY --chown=user --from=soar97/torch-paraformer:22.12 /workspace/ $HOME/app/
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -7,4 +7,12 @@ sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 pinned: false
 ---
+Using paraformer large to transcribe long audios.
+### Using Docker
+```
+docker build -f Dockerfile.origin -t soar97/torch-paraformer-gradio:22.12 .
+# docker pull soar97/torch-paraformer-gradio:22.12
+docker run -it --name "paraformerX" --net host soar97/torch-paraformer-gradio:22.12
+```

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from funasr_onnx import Fsmn_vad, Paraformer, CT_Transformer
+from transcribe import get_models, transcribe
+import soundfile
+import gradio as gr
+import pytube as pt
+import datetime
+import os
+asr_model, vad_model, punc_model = get_models("./models")
+def convert_to_wav(in_filename: str) -> str:
+    """Convert the input audio file to a wave file"""
+    out_filename = in_filename + ".wav"
+    if '.mp3' in in_filename:
+        os.system(f"ffmpeg -y -i '{in_filename}' -acodec pcm_s16le -ac 1 -ar 16000 '{out_filename}'")
+    else:
+        _ = os.system(f"ffmpeg -hide_banner -y -i '{in_filename}' -ar 16000 '{out_filename}'")
+    speech, _ = soundfile.read(out_filename)
+    return speech
+def file_transcribe(microphone, file_upload):
+    warn_output = ""
+    if (microphone is not None) and (file_upload is not None):
+        warn_output = (
+            "WARNING: You've uploaded an audio file and used the microphone. "
+            "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
+        )
+    elif (microphone is None) and (file_upload is None):
+        return "ERROR: You have to either use the microphone or upload an audio file"
+    file = microphone if microphone is not None else file_upload
+    speech = convert_to_wav(file)
+    text = "\n".join([item for item in transcribe(speech, asr_model, vad_model, punc_model)])
+    return warn_output + text
+def _return_yt_html_embed(yt_url):
+    video_id = yt_url.split("?v=")[-1]
+    HTML_str = (
+        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
+        " </center>"
+    )
+    return HTML_str
+def youtube_transcribe(yt_url):
+    yt = pt.YouTube(yt_url)
+    html_embed_str = _return_yt_html_embed(yt_url)
+    stream = yt.streams.filter(only_audio=True)[0]
+    filename = f"audio.mp3"
+    stream.download(filename=filename)
+    speech=convert_to_wav(filename)
+    text = "\n".join([item for item in transcribe(speech, asr_model, vad_model, punc_model)])
+    os.system(f"rm -rf audio.mp3 audio.mp3.wav")
+    return html_embed_str, text
+def run():
+    gr.close_all()
+    demo = gr.Blocks()
+    mf_transcribe = gr.Interface(
+        fn=file_transcribe,
+        inputs=[
+            gr.inputs.Audio(source="microphone", type="filepath", optional=True),
+            gr.inputs.Audio(source="upload", type="filepath", optional=True),
+        ],
+        outputs="text",
+        layout="horizontal",
+        theme="huggingface",
+        title="ParaformerX: Copilot for Audio",
+        description=(
+            "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the pretrained paraformer model to transcribe audio files of arbitrary length."
+        ),
+        allow_flagging="never",
+    )
+    yt_transcribe = gr.Interface(
+        fn=youtube_transcribe,
+        inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
+        outputs=["html", "text"],
+        layout="horizontal",
+        theme="huggingface",
+        title="Demo: Transcribe YouTube",
+        description=(
+            "Transcribe long-form YouTube videos with the click of a button! Demo uses the the pretrained paraformer model to transcribe audio files of arbitrary length."
+        ),
+        allow_flagging="never",
+    )
+    with demo:
+        gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
+    demo.launch(server_name="0.0.0.0", server_port=7860, enable_queue=True)
+if __name__ == "__main__":
+    run()

requirements-gradio.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+WeTextProcessing
+onnxruntime
+soundfile
+librosa
+scipy
+numpy
+typeguard==2.13.3
+kaldi-native-fbank
+PyYAML>=5.1.2
+gradio
+pytube