Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import os | |
import requests | |
# Set up the Hugging Face API key for Whisper | |
api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
# Set up the API URL for Whisper | |
WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" | |
# Set up headers for the Whisper API request | |
headers = {"Authorization": f"Bearer {api_key}"} | |
# Load the DeepSeek model using Gradio's load function from the registry | |
demo = gr.load(name="deepseek-ai/DeepSeek-R1", src="transformers_gradio.registry") | |
# Function to query the Hugging Face Whisper model for audio transcription (API call) | |
def transcribe_audio(audio_file): | |
with open(audio_file, "rb") as f: | |
data = f.read() | |
response = requests.post(WHISPER_API_URL, headers=headers, data=data) | |
if response.status_code == 200: | |
return response.json().get("text", "Transcription not available.") | |
else: | |
return f"Error: {response.status_code}, {response.text}" | |
# Function to generate Mermaid.js code using DeepSeek-R1 model | |
def generate_mermaid_code(prompt): | |
# Instruction included in the prompt to guide DeepSeek to generate valid MermaidJS code | |
deepseek_prompt = f"Generate a valid MermaidJS diagram code for the following: {prompt}" | |
# Use the loaded model `demo` to generate the MermaidJS code | |
response = demo(deepseek_prompt) | |
return response.strip() | |
# Function to process text, audio, or both inputs | |
def process_input(input_type, text_input, audio_input): | |
if input_type == "Audio" and audio_input is not None: | |
# Transcribe audio using the Whisper API | |
transcription = transcribe_audio(audio_input) | |
# Generate Mermaid.js code from transcription using DeepSeek-R1 | |
return generate_mermaid_code(transcription) | |
elif input_type == "Text" and text_input: | |
# Generate Mermaid.js code directly from text input using DeepSeek-R1 | |
return generate_mermaid_code(text_input) | |
elif input_type == "Text and Audio" and text_input and audio_input is not None: | |
# Transcribe audio using the Whisper API | |
transcription = transcribe_audio(audio_input) | |
# Combine text input and transcription | |
combined_input = f"{text_input} and {transcription}" | |
# Generate Mermaid.js code using DeepSeek-R1 | |
return generate_mermaid_code(combined_input) | |
else: | |
return "No valid input provided." | |
# Set up the Gradio interface | |
iface = gr.Interface( | |
fn=process_input, | |
inputs=[ | |
gr.Radio(["Text", "Audio", "Text and Audio"], label="Input Type", value="Text"), | |
gr.Textbox(lines=10, label="Text Input", placeholder="Enter task flow description here..."), | |
gr.Audio(type="filepath", label="Audio Input") | |
], | |
outputs=[ | |
gr.Textbox(lines=20, label="Generated Mermaid.js Code") | |
], | |
title="Mermaid.js Generator", | |
description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, or their combination." | |
) | |
# Launch the Gradio app | |
iface.launch() | |