genaibeauty commited on
Commit
ffd20df
·
verified ·
1 Parent(s): d11c779

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -48
app.py CHANGED
@@ -1,52 +1,77 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
3
 
4
- # Initialize pipelines for text generation and speech recognition
5
- text_generation_pipeline = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
6
- speech_recognition_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo")
7
 
8
- # Function to transcribe audio using the Whisper pipeline
 
 
 
 
 
 
 
 
9
  def transcribe_audio(audio_file):
10
- try:
11
- # Use the Whisper pipeline for automatic speech recognition
12
- transcription = speech_recognition_pipeline(audio_file)
13
- return transcription.get("text", "Transcription not available.")
14
- except Exception as e:
15
- return f"Error in transcription: {e}"
16
-
17
- # Function to generate Mermaid.js code using the Mistral pipeline
 
18
  def generate_mermaid_code(prompt):
19
- try:
20
- # Use the text-generation pipeline to generate Mermaid.js code
21
- result = text_generation_pipeline(
22
- prompt,
23
- max_length=256,
24
- temperature=0.7,
25
- num_return_sequences=1
26
- )
27
- return result[0]["generated_text"].strip() if result else "No Mermaid.js code generated."
28
- except Exception as e:
29
- return f"Error in Mermaid.js generation: {e}"
30
-
31
- # Function to process input (text, audio, or both)
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def process_input(input_type, text_input, audio_input):
33
- try:
34
- if input_type == "Audio" and audio_input:
35
- transcription = transcribe_audio(audio_input)
36
- return generate_mermaid_code(transcription)
 
37
 
38
- elif input_type == "Text" and text_input:
39
- return generate_mermaid_code(text_input)
 
40
 
41
- elif input_type == "Text and Audio" and text_input and audio_input:
42
- transcription = transcribe_audio(audio_input)
43
- combined_input = f"{text_input} and {transcription}"
44
- return generate_mermaid_code(combined_input)
 
 
 
45
 
46
- else:
47
- return "Please provide valid input."
48
- except Exception as e:
49
- return f"Error: {e}"
50
 
51
  # Set up the Gradio interface
52
  iface = gr.Interface(
@@ -54,18 +79,14 @@ iface = gr.Interface(
54
  inputs=[
55
  gr.Radio(["Text", "Audio", "Text and Audio"], label="Input Type", value="Text"),
56
  gr.Textbox(lines=10, label="Text Input", placeholder="Enter task flow description here..."),
57
- gr.Audio(type="filepath", label="Audio Input"),
58
  ],
59
  outputs=[
60
- gr.Textbox(lines=20, label="Generated Mermaid.js Code"),
61
  ],
62
  title="Mermaid.js Generator",
63
- description=(
64
- "Provide text, audio, or both."
65
- "Mermaid.js code will be generated for text or audio input, or their combination."
66
- ),
67
  )
68
 
69
  # Launch the Gradio app
70
- if __name__ == "__main__":
71
- iface.launch()
 
1
  import gradio as gr
2
+ import requests
3
+ import os
4
 
5
+ # Set up the Hugging Face API key (ensure you've set this as an environment variable)
6
+ api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 
7
 
8
+
9
+ # API URLs
10
+ WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
11
+ MISTRAL_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
12
+
13
+ # Set up headers for API requests
14
+ headers = {"Authorization": f"Bearer {api_key}"}
15
+
16
+ # Function to query the Hugging Face Whisper model for audio transcription
17
  def transcribe_audio(audio_file):
18
+ with open(audio_file, "rb") as f:
19
+ data = f.read()
20
+ response = requests.post(WHISPER_API_URL, headers=headers, data=data)
21
+ if response.status_code == 200:
22
+ return response.json().get("text", "Transcription not available.")
23
+ else:
24
+ return f"Error: {response.status_code}, {response.text}"
25
+
26
+ # Function to query the Mistral model to generate Mermaid.js code
27
  def generate_mermaid_code(prompt):
28
+ # mermaid_prompt = f"Use the appropriate diagram type (Use Case Diagram, Flowchart, Sequence Diagram, Entity-Relationship (ER) Diagram,State Diagram, Pie Chart etc.) based on the context.\n" \
29
+ # "Generate a valid, syntactically correct MermaidJS diagram code for the following: {prompt}"
30
+ mermaid_prompt = f"Generate a valid MermaidJS diagram code for the following: {prompt}"
31
+
32
+
33
+ # Prepare the payload (input for the model)
34
+ payload = {
35
+ "inputs": mermaid_prompt,
36
+ "parameters": {
37
+ "max_length": 256,
38
+ "temperature": 0.7
39
+ }
40
+ }
41
+
42
+ # Send the request to the Mistral API
43
+ response = requests.post(MISTRAL_API_URL, headers=headers, json=payload)
44
+
45
+ # Check if the request was successful
46
+ if response.status_code == 200:
47
+ result = response.json()
48
+ # Extract the generated Mermaid.js code
49
+ return result[0]['generated_text'].strip()
50
+ else:
51
+ return f"Error: {response.status_code}, {response.text}"
52
+
53
+ # Function to process text, audio, or both inputs
54
  def process_input(input_type, text_input, audio_input):
55
+ if input_type == "Audio" and audio_input is not None:
56
+ # Transcribe audio
57
+ transcription = transcribe_audio(audio_input)
58
+ # Generate Mermaid.js code
59
+ return generate_mermaid_code(transcription)
60
 
61
+ elif input_type == "Text" and text_input:
62
+ # Generate Mermaid.js code directly from text input
63
+ return generate_mermaid_code(text_input)
64
 
65
+ elif input_type == "Text and Audio" and text_input and audio_input is not None:
66
+ # Transcribe audio
67
+ transcription = transcribe_audio(audio_input)
68
+ # Combine text input and transcription
69
+ combined_input = f"{text_input} and {transcription}"
70
+ # Generate Mermaid.js code
71
+ return generate_mermaid_code(combined_input)
72
 
73
+ else:
74
+ return "No valid input provided."
 
 
75
 
76
  # Set up the Gradio interface
77
  iface = gr.Interface(
 
79
  inputs=[
80
  gr.Radio(["Text", "Audio", "Text and Audio"], label="Input Type", value="Text"),
81
  gr.Textbox(lines=10, label="Text Input", placeholder="Enter task flow description here..."),
82
+ gr.Audio(type="filepath", label="Audio Input")
83
  ],
84
  outputs=[
85
+ gr.Textbox(lines=20, label="Generated Mermaid.js Code")
86
  ],
87
  title="Mermaid.js Generator",
88
+ description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, or their combination."
 
 
 
89
  )
90
 
91
  # Launch the Gradio app
92
+ iface.launch()