image-to-music-v2

Sleeping

App Files Files Community

fffiloni commited on Feb 1, 2024

Commit

396214f

verified ·

1 Parent(s): 6a3a19b

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -9

app.py CHANGED Viewed

@@ -65,7 +65,28 @@ def get_caption_from_MD(image_in):
     print(result)
     return result
 import re
 import torch
 from transformers import pipeline
@@ -102,8 +123,10 @@ def infer(image_in):
     cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
     print(f"SUGGESTED Musical prompt: {cleaned_text}")
-    return user_prompt, cleaned_text.lstrip("\n")
 title = "Image to Music V2",
 description = "Get music from a picture"
@@ -133,14 +156,10 @@ with gr.Blocks(css=css) as demo:
                 submit_btn = gr.Button("Make LLM system from my pic !")
             with gr.Column():
                 caption = gr.Textbox(
-                    label = "Image caption (Kosmos2)",
-                    elem_id = "image-caption"
                 )
-                result = gr.Textbox(
-                    label = "Suggested System",
-                    lines = 6,
-                    max_lines = 30,
-                    elem_id = "suggested-system-prompt"
                 )
         with gr.Row():
             gr.Examples(

     print(result)
     return result
+def get_magnet(prompt):
+    amended_prompt = f"{prompt}"
+    print(amended_prompt)
+    client = Client("https://fffiloni-magnet.hf.space/")
+    result = client.predict(
+        "facebook/magnet-small-10secs",	# Literal['facebook/magnet-small-10secs', 'facebook/magnet-medium-10secs', 'facebook/magnet-small-30secs', 'facebook/magnet-medium-30secs', 'facebook/audio-magnet-small', 'facebook/audio-magnet-medium']  in 'Model' Radio component
+        "",	# str  in 'Model Path (custom models)' Textbox component
+        amended_prompt,	# str  in 'Input Text' Textbox component
+        3,	# float  in 'Temperature' Number component
+        0.9,	# float  in 'Top-p' Number component
+        10,	# float  in 'Max CFG coefficient' Number component
+        1,	# float  in 'Min CFG coefficient' Number component
+        20,	# float  in 'Decoding Steps (stage 1)' Number component
+        10,	# float  in 'Decoding Steps (stage 2)' Number component
+        10,	# float  in 'Decoding Steps (stage 3)' Number component
+        10,	# float  in 'Decoding Steps (stage 4)' Number component
+        "prod-stride1 (new!)",	# Literal['max-nonoverlap', 'prod-stride1 (new!)']  in 'Span Scoring' Radio component
+        api_name="/predict_full"
+    )
+    print(result)
+    return result[1]
 import re
 import torch
 from transformers import pipeline
     cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
     print(f"SUGGESTED Musical prompt: {cleaned_text}")
+    music_o = get_magnet(cleaned_text)
+    return cleaned_text, music_o
 title = "Image to Music V2",
 description = "Get music from a picture"
                 submit_btn = gr.Button("Make LLM system from my pic !")
             with gr.Column():
                 caption = gr.Textbox(
+                    label = "Musical prompt"
                 )
+                result = gr.Audio(
+                    label = "Music"
                 )
         with gr.Row():
             gr.Examples(