Spaces:

genaibeauty
/

mermaid_diagrams

Running

App Files Files Community

genaibeauty commited on Jan 29

Commit

80c1f05

verified ·

1 Parent(s): ece27dd

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -18

app.py CHANGED Viewed

@@ -1,51 +1,63 @@
 import gradio as gr
 import os
-from huggingface_hub import InferenceClient
-# Set up the Hugging Face API key (ensure you've set this as an environment variable)
 api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-# Initialize the Hugging Face Inference Client
-client = InferenceClient(
-    provider="together",
-    api_key=api_key
-)
-# API URL for Whisper
 WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
 def transcribe_audio(audio_file):
     with open(audio_file, "rb") as f:
         data = f.read()
-    headers = {"Authorization": f"Bearer {api_key}"}
     response = requests.post(WHISPER_API_URL, headers=headers, data=data)
     if response.status_code == 200:
         return response.json().get("text", "Transcription not available.")
     else:
         return f"Error: {response.status_code}, {response.text}"
 def generate_mermaid_code(prompt):
-    messages = [{"role": "user", "content": f"Generate a valid MermaidJS diagram code for the following: {prompt}"}]
-    completion = client.chat.completions.create(
-        model="deepseek-ai/DeepSeek-R1",
-        messages=messages,
-        max_tokens=500
-    )
-    return completion.choices[0].message.content.strip()
 def process_input(input_type, text_input, audio_input):
     if input_type == "Audio" and audio_input is not None:
         transcription = transcribe_audio(audio_input)
         return generate_mermaid_code(transcription)
     elif input_type == "Text" and text_input:
         return generate_mermaid_code(text_input)
     elif input_type == "Text and Audio" and text_input and audio_input is not None:
         transcription = transcribe_audio(audio_input)
         combined_input = f"{text_input} and {transcription}"
         return generate_mermaid_code(combined_input)
     else:
         return "No valid input provided."
 iface = gr.Interface(
     fn=process_input,
     inputs=[
@@ -60,4 +72,5 @@ iface = gr.Interface(
     description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, or their combination."
 )
-iface.launch()

 import gradio as gr
+from transformers import pipeline
 import os
+import requests
+# Set up the Hugging Face API key for Whisper
 api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# Set up the API URL for Whisper
 WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
+# Set up headers for the Whisper API request
+headers = {"Authorization": f"Bearer {api_key}"}
+# Load the DeepSeek model using Hugging Face's pipeline (no API call, local model)
+pipe = pipeline("text-generation", model="deepseek-ai/DeepSeek-R1", trust_remote_code=True)
+# Function to query the Hugging Face Whisper model for audio transcription (API call)
 def transcribe_audio(audio_file):
     with open(audio_file, "rb") as f:
         data = f.read()
     response = requests.post(WHISPER_API_URL, headers=headers, data=data)
     if response.status_code == 200:
         return response.json().get("text", "Transcription not available.")
     else:
         return f"Error: {response.status_code}, {response.text}"
+# Function to generate Mermaid.js code using DeepSeek-R1 model (local processing)
 def generate_mermaid_code(prompt):
+    # Instruction included in the prompt to guide DeepSeek to generate valid MermaidJS code
+    deepseek_prompt = f"Generate all possible valid MermaidJS diagram code for the following: {prompt}"
+    # Using the DeepSeek model pipeline for text generation
+    response = pipe([{"role": "user", "content": deepseek_prompt}])
+    return response[0]["generated_text"].strip()
+# Function to process text, audio, or both inputs
 def process_input(input_type, text_input, audio_input):
     if input_type == "Audio" and audio_input is not None:
+        # Transcribe audio using the Whisper API
         transcription = transcribe_audio(audio_input)
+        # Generate Mermaid.js code from transcription using DeepSeek-R1
         return generate_mermaid_code(transcription)
     elif input_type == "Text" and text_input:
+        # Generate Mermaid.js code directly from text input using DeepSeek-R1
         return generate_mermaid_code(text_input)
     elif input_type == "Text and Audio" and text_input and audio_input is not None:
+        # Transcribe audio using the Whisper API
         transcription = transcribe_audio(audio_input)
+        # Combine text input and transcription
         combined_input = f"{text_input} and {transcription}"
+        # Generate Mermaid.js code using DeepSeek-R1
         return generate_mermaid_code(combined_input)
     else:
         return "No valid input provided."
+# Set up the Gradio interface
 iface = gr.Interface(
     fn=process_input,
     inputs=[
     description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, or their combination."
 )
+# Launch the Gradio app
+iface.launch()