genaibeauty commited on
Commit
80c1f05
·
verified ·
1 Parent(s): ece27dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -18
app.py CHANGED
@@ -1,51 +1,63 @@
1
  import gradio as gr
 
2
  import os
3
- from huggingface_hub import InferenceClient
4
 
5
- # Set up the Hugging Face API key (ensure you've set this as an environment variable)
6
  api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
7
 
8
- # Initialize the Hugging Face Inference Client
9
- client = InferenceClient(
10
- provider="together",
11
- api_key=api_key
12
- )
13
-
14
- # API URL for Whisper
15
  WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
16
 
 
 
 
 
 
 
 
17
  def transcribe_audio(audio_file):
18
  with open(audio_file, "rb") as f:
19
  data = f.read()
20
- headers = {"Authorization": f"Bearer {api_key}"}
21
  response = requests.post(WHISPER_API_URL, headers=headers, data=data)
22
  if response.status_code == 200:
23
  return response.json().get("text", "Transcription not available.")
24
  else:
25
  return f"Error: {response.status_code}, {response.text}"
26
 
 
27
  def generate_mermaid_code(prompt):
28
- messages = [{"role": "user", "content": f"Generate a valid MermaidJS diagram code for the following: {prompt}"}]
29
- completion = client.chat.completions.create(
30
- model="deepseek-ai/DeepSeek-R1",
31
- messages=messages,
32
- max_tokens=500
33
- )
34
- return completion.choices[0].message.content.strip()
35
 
 
36
  def process_input(input_type, text_input, audio_input):
37
  if input_type == "Audio" and audio_input is not None:
 
38
  transcription = transcribe_audio(audio_input)
 
39
  return generate_mermaid_code(transcription)
 
40
  elif input_type == "Text" and text_input:
 
41
  return generate_mermaid_code(text_input)
 
42
  elif input_type == "Text and Audio" and text_input and audio_input is not None:
 
43
  transcription = transcribe_audio(audio_input)
 
44
  combined_input = f"{text_input} and {transcription}"
 
45
  return generate_mermaid_code(combined_input)
 
46
  else:
47
  return "No valid input provided."
48
 
 
49
  iface = gr.Interface(
50
  fn=process_input,
51
  inputs=[
@@ -60,4 +72,5 @@ iface = gr.Interface(
60
  description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, or their combination."
61
  )
62
 
63
- iface.launch()
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  import os
4
+ import requests
5
 
6
+ # Set up the Hugging Face API key for Whisper
7
  api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
8
 
9
+ # Set up the API URL for Whisper
 
 
 
 
 
 
10
  WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
11
 
12
+ # Set up headers for the Whisper API request
13
+ headers = {"Authorization": f"Bearer {api_key}"}
14
+
15
+ # Load the DeepSeek model using Hugging Face's pipeline (no API call, local model)
16
+ pipe = pipeline("text-generation", model="deepseek-ai/DeepSeek-R1", trust_remote_code=True)
17
+
18
+ # Function to query the Hugging Face Whisper model for audio transcription (API call)
19
  def transcribe_audio(audio_file):
20
  with open(audio_file, "rb") as f:
21
  data = f.read()
 
22
  response = requests.post(WHISPER_API_URL, headers=headers, data=data)
23
  if response.status_code == 200:
24
  return response.json().get("text", "Transcription not available.")
25
  else:
26
  return f"Error: {response.status_code}, {response.text}"
27
 
28
+ # Function to generate Mermaid.js code using DeepSeek-R1 model (local processing)
29
  def generate_mermaid_code(prompt):
30
+ # Instruction included in the prompt to guide DeepSeek to generate valid MermaidJS code
31
+ deepseek_prompt = f"Generate all possible valid MermaidJS diagram code for the following: {prompt}"
32
+
33
+ # Using the DeepSeek model pipeline for text generation
34
+ response = pipe([{"role": "user", "content": deepseek_prompt}])
35
+ return response[0]["generated_text"].strip()
 
36
 
37
+ # Function to process text, audio, or both inputs
38
  def process_input(input_type, text_input, audio_input):
39
  if input_type == "Audio" and audio_input is not None:
40
+ # Transcribe audio using the Whisper API
41
  transcription = transcribe_audio(audio_input)
42
+ # Generate Mermaid.js code from transcription using DeepSeek-R1
43
  return generate_mermaid_code(transcription)
44
+
45
  elif input_type == "Text" and text_input:
46
+ # Generate Mermaid.js code directly from text input using DeepSeek-R1
47
  return generate_mermaid_code(text_input)
48
+
49
  elif input_type == "Text and Audio" and text_input and audio_input is not None:
50
+ # Transcribe audio using the Whisper API
51
  transcription = transcribe_audio(audio_input)
52
+ # Combine text input and transcription
53
  combined_input = f"{text_input} and {transcription}"
54
+ # Generate Mermaid.js code using DeepSeek-R1
55
  return generate_mermaid_code(combined_input)
56
+
57
  else:
58
  return "No valid input provided."
59
 
60
+ # Set up the Gradio interface
61
  iface = gr.Interface(
62
  fn=process_input,
63
  inputs=[
 
72
  description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, or their combination."
73
  )
74
 
75
+ # Launch the Gradio app
76
+ iface.launch()