import gradio as gr import requests import os # Set up the Hugging Face API key (ensure you've set this as an environment variable) api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN") # API URLs WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" # MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" API_URL = f"https://api-inference.huggingface.co/models/{MODEL}" HEADERS = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} # Function to query the Hugging Face Whisper model for audio transcription def transcribe_audio(audio_file): with open(audio_file, "rb") as f: data = f.read() response = requests.post(WHISPER_API_URL, headers=HEADERS, data=data) return response.json().get("text", "Transcription failed.") if response.status_code == 200 else f"Error: {response.status_code}, {response.text}" # Function to generate optimized Mermaid.js code def generate_mermaid_code(prompt): mermaid_prompt = ("Convert the following user process into well-structured, bug-free all possible (flow, class, sequence, graph LR,graph TD etc) Mermaid.js code. " "Ensure correctness and logical flow:\n" + prompt) payload = {"inputs": mermaid_prompt, "parameters": {"max_length": 250, "temperature": 0.3, "top_p": 0.9}} response = requests.post(API_URL, headers=HEADERS, json=payload) return response.json()[0]['generated_text'].strip() if response.status_code == 200 else f"Error: {response.status_code}, {response.text}" # Function to process text, audio, or both inputs def process_input(input_type, text_input, audio_input): transcription = transcribe_audio(audio_input) if input_type in ["Audio", "Text and Audio"] and audio_input else "" combined_input = f"{text_input} {transcription}".strip() return generate_mermaid_code(combined_input) if combined_input else "No valid input provided." # Set up the Gradio interface iface = gr.Interface( fn=process_input, inputs=[ gr.Radio(["Text", "Audio", "Text and Audio"], label="Input Type", value="Text"), gr.Textbox(lines=10, label="Text Input", placeholder="Enter task flow description here..."), gr.Audio(type="filepath", label="Audio Input") ], outputs=[gr.Textbox(lines=20, label="Generated Mermaid.js Code")], title="Mermaid.js Generator", description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, ensuring accuracy." ) # Launch the Gradio app iface.launch()