Spaces:

genaibeauty
/

mermaid_diagrams

Sleeping

App Files Files Community

mermaid_diagrams / app.py

genaibeauty

Update app.py

a4c2d51 verified 10 months ago

raw

history blame

3.08 kB

	import gradio as gr
	from transformers import pipeline
	import os
	import requests

	# Set up the Hugging Face API key for Whisper
	api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")

	# Set up the API URL for Whisper
	WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"

	# Set up headers for the Whisper API request
	headers = {"Authorization": f"Bearer {api_key}"}

	# Load the DeepSeek model using Gradio's load function from the registry
	demo = gr.load(name="deepseek-ai/DeepSeek-R1", src="transformers_gradio.registry")

	# Function to query the Hugging Face Whisper model for audio transcription (API call)
	def transcribe_audio(audio_file):
	with open(audio_file, "rb") as f:
	data = f.read()
	response = requests.post(WHISPER_API_URL, headers=headers, data=data)
	if response.status_code == 200:
	return response.json().get("text", "Transcription not available.")
	else:
	return f"Error: {response.status_code}, {response.text}"

	# Function to generate Mermaid.js code using DeepSeek-R1 model
	def generate_mermaid_code(prompt):
	# Instruction included in the prompt to guide DeepSeek to generate valid MermaidJS code
	deepseek_prompt = f"Generate a valid MermaidJS diagram code for the following: {prompt}"

	# Use the loaded model `demo` to generate the MermaidJS code
	response = demo(deepseek_prompt)
	return response.strip()

	# Function to process text, audio, or both inputs
	def process_input(input_type, text_input, audio_input):
	if input_type == "Audio" and audio_input is not None:
	# Transcribe audio using the Whisper API
	transcription = transcribe_audio(audio_input)
	# Generate Mermaid.js code from transcription using DeepSeek-R1
	return generate_mermaid_code(transcription)

	elif input_type == "Text" and text_input:
	# Generate Mermaid.js code directly from text input using DeepSeek-R1
	return generate_mermaid_code(text_input)

	elif input_type == "Text and Audio" and text_input and audio_input is not None:
	# Transcribe audio using the Whisper API
	transcription = transcribe_audio(audio_input)
	# Combine text input and transcription
	combined_input = f"{text_input} and {transcription}"
	# Generate Mermaid.js code using DeepSeek-R1
	return generate_mermaid_code(combined_input)

	else:
	return "No valid input provided."

	# Set up the Gradio interface
	iface = gr.Interface(
	fn=process_input,
	inputs=[
	gr.Radio(["Text", "Audio", "Text and Audio"], label="Input Type", value="Text"),
	gr.Textbox(lines=10, label="Text Input", placeholder="Enter task flow description here..."),
	gr.Audio(type="filepath", label="Audio Input")
	],
	outputs=[
	gr.Textbox(lines=20, label="Generated Mermaid.js Code")
	],
	title="Mermaid.js Generator",
	description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, or their combination."
	)

	# Launch the Gradio app
	iface.launch()