text-to-speech

Sleeping

@@ -2,14 +2,13 @@ from smolagents import Tool
 from huggingface_hub import InferenceClient
-class TextToImageTool(Tool):
-    description = "This tool creates an image according to a prompt, which is a text description."
-    name = "image_generator"
-    inputs = {"prompt": {"type": "string", "description": "The image generator prompt. Don't hesitate to add details in the prompt to make the image look better, like 'high-res, photorealistic', etc."}}
-    output_type = "image"
-    model_sdxl = "black-forest-labs/FLUX.1-schnell"
-    client = InferenceClient(model_sdxl)
-    def forward(self, prompt):
-        return self.client.text_to_image(prompt)

 from huggingface_hub import InferenceClient
+class TextToSpeechTool(Tool):
+    description = "This tool synthesizes speech from text."
+    name = "speech_generator"
+    inputs = {"text": {"type": "string", "description": "The text input"}}
+    output_type = "audio"
+    client = InferenceClient()
+    def forward(self, text):
+        return self.client.text_to_speech(text)

tool_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "description": "This is a tool that creates an image according to a prompt, which is a text description.",
-  "inputs": "{'prompt': {'type': 'string', 'description': \"The image generator prompt. Don't hesitate to add details in the prompt to make the image look better, like 'high-res, photorealistic', etc.\"}}",
-  "name": "image_generator",
-  "output_type": "image",
-  "tool_class": "tool.TextToImageTool"
 }

 {
+  "description": "This is a tool that synthesizes speech from text.",
+  "inputs": "{'prompt': {'type': 'string', 'description': \"The text input"}}",
+  "name": "speech_generator",
+  "output_type": "audio",
+  "tool_class": "tool.TextToSpeechTool"
 }