import gradio as gr
from transformers import pipeline
import os
import requests

# Set up the Hugging Face API key for Whisper
api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")

# Set up the API URL for Whisper
WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"

# Set up headers for the Whisper API request
headers = {"Authorization": f"Bearer {api_key}"}

# Load the DeepSeek model using Gradio's load function from the registry
demo = gr.load(name="deepseek-ai/DeepSeek-R1", src="transformers_gradio.registry")

# Function to query the Hugging Face Whisper model for audio transcription (API call)
def transcribe_audio(audio_file):
    with open(audio_file, "rb") as f:
        data = f.read()
    response = requests.post(WHISPER_API_URL, headers=headers, data=data)
    if response.status_code == 200:
        return response.json().get("text", "Transcription not available.")
    else:
        return f"Error: {response.status_code}, {response.text}"

# Function to generate Mermaid.js code using DeepSeek-R1 model
def generate_mermaid_code(prompt):
    # Instruction included in the prompt to guide DeepSeek to generate valid MermaidJS code
    deepseek_prompt = f"Generate a valid MermaidJS diagram code for the following: {prompt}"
    
    # Use the loaded model `demo` to generate the MermaidJS code
    response = demo(deepseek_prompt)
    return response.strip()

# Function to process text, audio, or both inputs
def process_input(input_type, text_input, audio_input):
    if input_type == "Audio" and audio_input is not None:
        # Transcribe audio using the Whisper API
        transcription = transcribe_audio(audio_input)
        # Generate Mermaid.js code from transcription using DeepSeek-R1
        return generate_mermaid_code(transcription)

    elif input_type == "Text" and text_input:
        # Generate Mermaid.js code directly from text input using DeepSeek-R1
        return generate_mermaid_code(text_input)

    elif input_type == "Text and Audio" and text_input and audio_input is not None:
        # Transcribe audio using the Whisper API
        transcription = transcribe_audio(audio_input)
        # Combine text input and transcription
        combined_input = f"{text_input} and {transcription}"
        # Generate Mermaid.js code using DeepSeek-R1
        return generate_mermaid_code(combined_input)

    else:
        return "No valid input provided."

# Set up the Gradio interface
iface = gr.Interface(
    fn=process_input,
    inputs=[
        gr.Radio(["Text", "Audio", "Text and Audio"], label="Input Type", value="Text"),
        gr.Textbox(lines=10, label="Text Input", placeholder="Enter task flow description here..."),
        gr.Audio(type="filepath", label="Audio Input")
    ],
    outputs=[
        gr.Textbox(lines=20, label="Generated Mermaid.js Code")
    ],
    title="Mermaid.js Generator",
    description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, or their combination."
)

# Launch the Gradio app
iface.launch()