import gradio as gr from transformers import pipeline import os import requests # Set up the Hugging Face API key for Whisper api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Set up the API URL for Whisper WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" # Set up headers for the Whisper API request headers = {"Authorization": f"Bearer {api_key}"} # Load the DeepSeek model using Gradio's load function from the registry demo = gr.load(name="deepseek-ai/DeepSeek-R1", src="transformers_gradio.registry") # Function to query the Hugging Face Whisper model for audio transcription (API call) def transcribe_audio(audio_file): with open(audio_file, "rb") as f: data = f.read() response = requests.post(WHISPER_API_URL, headers=headers, data=data) if response.status_code == 200: return response.json().get("text", "Transcription not available.") else: return f"Error: {response.status_code}, {response.text}" # Function to generate Mermaid.js code using DeepSeek-R1 model def generate_mermaid_code(prompt): # Instruction included in the prompt to guide DeepSeek to generate valid MermaidJS code deepseek_prompt = f"Generate a valid MermaidJS diagram code for the following: {prompt}" # Use the loaded model `demo` to generate the MermaidJS code response = demo(deepseek_prompt) return response.strip() # Function to process text, audio, or both inputs def process_input(input_type, text_input, audio_input): if input_type == "Audio" and audio_input is not None: # Transcribe audio using the Whisper API transcription = transcribe_audio(audio_input) # Generate Mermaid.js code from transcription using DeepSeek-R1 return generate_mermaid_code(transcription) elif input_type == "Text" and text_input: # Generate Mermaid.js code directly from text input using DeepSeek-R1 return generate_mermaid_code(text_input) elif input_type == "Text and Audio" and text_input and audio_input is not None: # Transcribe audio using the Whisper API transcription = transcribe_audio(audio_input) # Combine text input and transcription combined_input = f"{text_input} and {transcription}" # Generate Mermaid.js code using DeepSeek-R1 return generate_mermaid_code(combined_input) else: return "No valid input provided." # Set up the Gradio interface iface = gr.Interface( fn=process_input, inputs=[ gr.Radio(["Text", "Audio", "Text and Audio"], label="Input Type", value="Text"), gr.Textbox(lines=10, label="Text Input", placeholder="Enter task flow description here..."), gr.Audio(type="filepath", label="Audio Input") ], outputs=[ gr.Textbox(lines=20, label="Generated Mermaid.js Code") ], title="Mermaid.js Generator", description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, or their combination." ) # Launch the Gradio app iface.launch()