Spaces:

JulienHalgand
/

hf-gradio-example

Running on Zero

App Files Files Community

JulienHalgand commited on 23 days ago

Commit

e5007d2

1 Parent(s): b538a96

Ça marche sur le CPU

Browse files

Files changed (3) hide show

README.md +2 -0
app.py +67 -5
environment.yml +40 -0

README.md CHANGED Viewed

@@ -10,3 +10,5 @@ pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


10	---
11
12	Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13	+
14	+

app.py CHANGED Viewed

@@ -1,14 +1,18 @@
 import sys
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'amt/src')))
-import os
 import shutil
 import mimetypes
 import gradio as gr
 from model_helper import load_model_checkpoint, transcribe
 from prepare_media import prepare_media
 MODEL_NAME = 'YPTF.MoE+Multi (noPS)' # @param ["YMT3+", "YPTF+Single (noPS)", "YPTF+Multi (PS)", "YPTF.MoE+Multi (noPS)", "YPTF.MoE+Multi (PS)"]
 PRECISION = '16'# if torch.cuda.is_available() else '32'# @param ["32", "bf16-mixed", "16"]
 PROJECT = '2024'
@@ -44,24 +48,82 @@ MODELS = {
     }
 }
 model = load_model_checkpoint(args=MODELS[MODEL_NAME]["args"], device="cpu")
 #model.to("cuda")
 def handle_audio(file_path):
     # Guess extension from MIME
     mime_type, _ = mimetypes.guess_type(file_path)
     ext = mimetypes.guess_extension(mime_type) or os.path.splitext(file_path)[1] or ".bin"
     output_path = f"received_audio{ext}"
     shutil.copy(file_path, output_path)
-    return output_path
 demo = gr.Interface(
     fn=handle_audio,
     inputs=gr.Audio(type="filepath"),
-    outputs=gr.File()
 )
 if __name__ == "__main__":
-    demo.launch()

+import os
 import sys
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'amt/src')))
 import shutil
 import mimetypes
+import subprocess
 import gradio as gr
+import torchaudio
 from model_helper import load_model_checkpoint, transcribe
 from prepare_media import prepare_media
+from typing import Tuple, Dict, Literal
 MODEL_NAME = 'YPTF.MoE+Multi (noPS)' # @param ["YMT3+", "YPTF+Single (noPS)", "YPTF+Multi (PS)", "YPTF.MoE+Multi (noPS)", "YPTF.MoE+Multi (PS)"]
 PRECISION = '16'# if torch.cuda.is_available() else '32'# @param ["32", "bf16-mixed", "16"]
 PROJECT = '2024'
     }
 }
+log_file = 'amt/log.txt'
 model = load_model_checkpoint(args=MODELS[MODEL_NAME]["args"], device="cpu")
 #model.to("cuda")
+def prepare_media(source_path_or_url: os.PathLike,
+                  source_type: Literal['audio_filepath', 'youtube_url'],
+                  delete_video: bool = True,
+                  simulate = False) -> Dict:
+    """prepare media from source path or youtube, and return audio info"""
+    # Get audio_file
+    if source_type == 'audio_filepath':
+        audio_file = source_path_or_url
+    elif source_type == 'youtube_url':
+        if os.path.exists('/download/yt_audio.mp3'):
+            os.remove('/download/yt_audio.mp3')
+        # Download from youtube
+        with open(log_file, 'w') as lf:
+            audio_file = './downloaded/yt_audio'
+            command = ['yt-dlp', '-x', source_path_or_url, '-f', 'bestaudio',
+                '-o', audio_file, '--audio-format', 'mp3', '--restrict-filenames',
+                '--extractor-retries', '10',
+                '--force-overwrites', '--username', 'oauth2', '--password', '', '-v']
+            if simulate:
+                command = command + ['-s']
+            process = subprocess.Popen(command,
+                stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
+            for line in iter(process.stdout.readline, ''):
+                # Filter out unnecessary messages
+                print(line)
+                if "www.google.com/device" in line:
+                    hl_text = line.replace("https://www.google.com/device", "\033[93mhttps://www.google.com/device\x1b[0m").split()
+                    hl_text[-1] = "\x1b[31;1m" + hl_text[-1] + "\x1b[0m"
+                    lf.write(' '.join(hl_text)); lf.flush()
+                elif "Authorization successful" in line or "Video unavailable" in line:
+                    lf.write(line); lf.flush()
+            process.stdout.close()
+            process.wait()
+        audio_file += '.mp3'
+    else:
+        raise ValueError(source_type)
+    # Create info
+    info = torchaudio.info(audio_file)
+    return {
+        "filepath": audio_file,
+        "track_name": os.path.basename(audio_file).split('.')[0],
+        "sample_rate": int(info.sample_rate),
+        "bits_per_sample": int(info.bits_per_sample),
+        "num_channels": int(info.num_channels),
+        "num_frames": int(info.num_frames),
+        "duration": int(info.num_frames / info.sample_rate),
+        "encoding": str.lower(info.encoding),
+        }
 def handle_audio(file_path):
     # Guess extension from MIME
     mime_type, _ = mimetypes.guess_type(file_path)
     ext = mimetypes.guess_extension(mime_type) or os.path.splitext(file_path)[1] or ".bin"
     output_path = f"received_audio{ext}"
     shutil.copy(file_path, output_path)
+    audio_info = prepare_media(output_path, source_type='audio_filepath')
+    midifile_path = transcribe(model, audio_info)
+    return midifile_path
 demo = gr.Interface(
     fn=handle_audio,
     inputs=gr.Audio(type="filepath"),
+    outputs=gr.File(),
 )
 if __name__ == "__main__":
+    demo.launch(
+        server_port=7860
+    )

environment.yml ADDED Viewed

	@@ -0,0 +1,40 @@

+name: gradio
+channels:
+  - conda-forge
+dependencies:
+  - python=3.12
+  - lightning>=2.2.1
+  - deprecated
+  - librosa
+  - einops
+  - transformers==4.45.1
+  - numpy==1.26.4
+  - wandb
+  - annotated-types==0.7.0
+  - anyio==4.9.0
+  - blinker==1.9.0
+  - ertifi==2025.4.26
+  - click==8.1.8
+  - fastapi==0.115.12
+  - Flask==3.1.0
+  - h11==0.16.0
+  - idna==3.10
+  - importlib_metadata==8.6.1
+  - itsdangerous==2.2.0
+  - Jinja2==3.1.6
+  - MarkupSafe==2.1.5
+  - pip==25.1.1
+  - pydantic==2.11.4
+  - pydantic_core==2.33.2
+  - python-multipart==0.0.20
+  - setuptools==80.1.0
+  - sniffio==1.3.1
+  - starlette==0.46.2
+  - typing_extensions==4.13.2
+  - typing-inspection==0.4.0
+  - uvicorn==0.34.2
+  - Werkzeug==3.1.3
+  - wheel==0.45.1
+  - zipp==3.21.0
+pip:
+  - --extra-index-url https://download.pytorch.org/whl/cu113