Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import os | |
| import requests | |
| # Set up the Hugging Face API key for Whisper | |
| api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
| # Set up the API URL for Whisper | |
| WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" | |
| # Set up headers for the Whisper API request | |
| headers = {"Authorization": f"Bearer {api_key}"} | |
| # Load the DeepSeek model using Gradio's load function from the registry | |
| demo = gr.load(name="deepseek-ai/DeepSeek-R1", src="transformers_gradio.registry") | |
| # Function to query the Hugging Face Whisper model for audio transcription (API call) | |
| def transcribe_audio(audio_file): | |
| with open(audio_file, "rb") as f: | |
| data = f.read() | |
| response = requests.post(WHISPER_API_URL, headers=headers, data=data) | |
| if response.status_code == 200: | |
| return response.json().get("text", "Transcription not available.") | |
| else: | |
| return f"Error: {response.status_code}, {response.text}" | |
| # Function to generate Mermaid.js code using DeepSeek-R1 model | |
| def generate_mermaid_code(prompt): | |
| # Instruction included in the prompt to guide DeepSeek to generate valid MermaidJS code | |
| deepseek_prompt = f"Generate a valid MermaidJS diagram code for the following: {prompt}" | |
| # Use the loaded model `demo` to generate the MermaidJS code | |
| response = demo(deepseek_prompt) | |
| return response.strip() | |
| # Function to process text, audio, or both inputs | |
| def process_input(input_type, text_input, audio_input): | |
| if input_type == "Audio" and audio_input is not None: | |
| # Transcribe audio using the Whisper API | |
| transcription = transcribe_audio(audio_input) | |
| # Generate Mermaid.js code from transcription using DeepSeek-R1 | |
| return generate_mermaid_code(transcription) | |
| elif input_type == "Text" and text_input: | |
| # Generate Mermaid.js code directly from text input using DeepSeek-R1 | |
| return generate_mermaid_code(text_input) | |
| elif input_type == "Text and Audio" and text_input and audio_input is not None: | |
| # Transcribe audio using the Whisper API | |
| transcription = transcribe_audio(audio_input) | |
| # Combine text input and transcription | |
| combined_input = f"{text_input} and {transcription}" | |
| # Generate Mermaid.js code using DeepSeek-R1 | |
| return generate_mermaid_code(combined_input) | |
| else: | |
| return "No valid input provided." | |
| # Set up the Gradio interface | |
| iface = gr.Interface( | |
| fn=process_input, | |
| inputs=[ | |
| gr.Radio(["Text", "Audio", "Text and Audio"], label="Input Type", value="Text"), | |
| gr.Textbox(lines=10, label="Text Input", placeholder="Enter task flow description here..."), | |
| gr.Audio(type="filepath", label="Audio Input") | |
| ], | |
| outputs=[ | |
| gr.Textbox(lines=20, label="Generated Mermaid.js Code") | |
| ], | |
| title="Mermaid.js Generator", | |
| description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, or their combination." | |
| ) | |
| # Launch the Gradio app | |
| iface.launch() | |