image-to-music-v2

Sleeping

App Files Files Community

fffiloni commited on Jun 5

Commit

64f0bfe

verified ·

1 Parent(s): 0fbecad

MCP ready

Browse files

Files changed (1) hide show

app.py +30 -6

app.py CHANGED Viewed

@@ -251,7 +251,7 @@ Immediately STOP after that. It should be EXACTLY in this format:
 "The song is an instrumental. The song is in medium tempo with a classical guitar playing a lilting melody in accompaniment style. The song is emotional and romantic. The song is a romantic instrumental song. The chord sequence is Gm, F6, Ebm. The time signature is 4/4. This song is in Adagio. The key of this song is G minor."
 """
-@spaces.GPU(enable_queue=True)
 def get_musical_prompt(user_prompt, chosen_model):
     """
@@ -276,13 +276,35 @@ def get_musical_prompt(user_prompt, chosen_model):
     print(f"SUGGESTED Musical prompt: {cleaned_text}")
     return cleaned_text.lstrip("\n")
-def infer(image_in, chosen_model, api_status):
     if image_in == None :
         raise gr.Error("Please provide an image input")
     if chosen_model == [] :
         raise gr.Error("Please pick a model")
     if api_status == "api not ready yet" :
         raise gr.Error("This model is not ready yet, you can pick another one instead :)")
@@ -440,13 +462,15 @@ with gr.Blocks(css=css) as demo:
         fn = check_api,
         inputs = chosen_model,
         outputs = check_status,
-        queue = False
     )
     retry_btn.click(
         fn = retry,
         inputs = [chosen_model, caption],
-        outputs = [result]
     )
     submit_btn.click(
@@ -454,7 +478,7 @@ with gr.Blocks(css=css) as demo:
         inputs = [
             image_in,
             chosen_model,
-            check_status
         ],
         outputs =[
             caption,
@@ -463,4 +487,4 @@ with gr.Blocks(css=css) as demo:
         ]
     )
-demo.queue(max_size=16).launch(show_api=False, show_error=True)

 "The song is an instrumental. The song is in medium tempo with a classical guitar playing a lilting melody in accompaniment style. The song is emotional and romantic. The song is a romantic instrumental song. The chord sequence is Gm, F6, Ebm. The time signature is 4/4. This song is in Adagio. The key of this song is G minor."
 """
+@spaces.GPU()
 def get_musical_prompt(user_prompt, chosen_model):
     """
     print(f"SUGGESTED Musical prompt: {cleaned_text}")
     return cleaned_text.lstrip("\n")
+def infer(image_in, chosen_model):
+    """
+    Generate music from an input image and selected music generation model.
+    This function performs the following steps:
+    1. Checks that an image and a model have been provided.
+    2. Verifies if the selected model's API is currently available.
+    3. Uses an image captioning model (Kosmos-2) to describe the image.
+    4. Generates a musical prompt from the image caption using a language model.
+    5. Sends the musical prompt to the selected music generation model and retrieves the result.
+    Args:
+        image_in: The filepath to an input image. This image is used as inspiration to generate music.
+        chosen_model: The name of the model to use for music generation. Supported values include:
+                      "Mustango", "ACE Step".
+    Returns:
+        - A string containing the musical prompt generated from the image.
+        - A flag to show the retry button in the UI (for user to edit and retry the generation).
+        - The output of the selected model, typically an audio filepath or object depending on model.
+    """
     if image_in == None :
         raise gr.Error("Please provide an image input")
     if chosen_model == [] :
         raise gr.Error("Please pick a model")
+    api_status = check_api(chosen_model)
     if api_status == "api not ready yet" :
         raise gr.Error("This model is not ready yet, you can pick another one instead :)")
         fn = check_api,
         inputs = chosen_model,
         outputs = check_status,
+        queue = False,
+        show_api=False
     )
     retry_btn.click(
         fn = retry,
         inputs = [chosen_model, caption],
+        outputs = [result],
+        show_api=False
     )
     submit_btn.click(
         inputs = [
             image_in,
             chosen_model,
+            #check_status
         ],
         outputs =[
             caption,
         ]
     )
+demo.queue(max_size=16).launch(show_api=True, show_error=True, ssr_mode=False, mcp_server=True)