lodhrangpt commited on
Commit
bcd1dcf
·
verified ·
1 Parent(s): ca1b859

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -80
app.py CHANGED
@@ -1,88 +1,126 @@
 
1
  import requests
2
- import json
3
- from nltk.tokenize import sent_tokenize, word_tokenize
4
- from nltk.corpus import stopwords
5
  import nltk
6
- from flask import Flask, request, jsonify
7
-
8
- # Download NLTK data
9
- nltk.download("punkt")
10
- nltk.download("stopwords")
11
-
12
- # Initialize stop words
13
- stop_words = set(stopwords.words("english"))
14
-
15
- # Initialize Flask app
16
- app = Flask(__name__)
17
-
18
- # Groq API credentials and endpoints
19
- GROQ_API_KEY = "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"
20
- TRANSCRIBE_ENDPOINT = "https://api.groq.com/transcribe" # Replace with actual endpoint
21
- KEYWORD_EXTRACTION_ENDPOINT = "https://api.groq.com/keywords" # Replace with actual endpoint
22
-
23
- def transcribe_audio(file_path):
24
- """Send audio file to Groq's transcription API."""
25
- with open(file_path, "rb") as audio_file:
26
- response = requests.post(
27
- TRANSCRIBE_ENDPOINT,
28
- headers={"Authorization": f"Bearer {GROQ_API_KEY}"},
29
- files={"file": audio_file}
30
- )
31
- response.raise_for_status()
32
- return response.json()["transcript"]
33
-
34
- def extract_keywords(text):
35
- """Send text to Groq's keyword extraction API."""
36
- response = requests.post(
37
- KEYWORD_EXTRACTION_ENDPOINT,
38
- headers={"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"},
39
- json={"text": text}
40
- )
41
- response.raise_for_status()
42
- return response.json()["keywords"]
43
-
44
- def extract_key_sentences(transcript):
45
- """Extract sentences containing keywords from the transcript."""
46
- sentences = sent_tokenize(transcript)
47
- important_sentences = [
48
- sentence for sentence in sentences
49
- if any(word.lower() not in stop_words for word in word_tokenize(sentence))
50
- ]
51
- return important_sentences
52
-
53
- @app.route("/transcribe", methods=["POST"])
54
- def transcribe():
55
- """API endpoint to transcribe audio and generate notes."""
56
- if "file" not in request.files:
57
- return jsonify({"error": "No file uploaded"}), 400
58
- file = request.files["file"]
59
- file_path = "/tmp/audio_file.wav"
60
- file.save(file_path)
61
-
62
- # Transcribe the audio
63
- transcript = transcribe_audio(file_path)
64
-
65
- # Generate notes
66
- notes = generate_notes(transcript)
67
-
68
- return jsonify(notes)
69
 
70
  def generate_notes(transcript):
71
- """Generate summarized notes based on keywords and important sentences."""
72
- # Extract key sentences
73
- key_sentences = extract_key_sentences(transcript)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- # Extract keywords
76
- keywords = extract_keywords(transcript)
 
 
 
 
 
 
 
77
 
78
- # Prepare notes
79
- notes = {
80
- "short_questions": keywords[:5], # Select top 5 keywords as short questions
81
- "long_questions": key_sentences[:3], # Select first 3 key sentences for long questions
82
- "mcq": [{"question": f"What is {kw}?", "answer": "Yes/No"} for kw in keywords[:3]]
83
- }
84
 
85
- return notes
 
 
 
 
 
 
 
86
 
87
- if __name__ == "__main__":
88
- app.run(debug=True)
 
1
+ import gradio as gr
2
  import requests
3
+ from fpdf import FPDF
 
 
4
  import nltk
5
+ import os
6
+ import tempfile
7
+ from nltk.tokenize import sent_tokenize
8
+ import random
9
+
10
+ # Attempt to download punkt tokenizer
11
+ try:
12
+ nltk.download("punkt")
13
+ except:
14
+ print("NLTK punkt tokenizer download failed. Using custom tokenizer.")
15
+
16
+ def custom_sent_tokenize(text):
17
+ return text.split(". ")
18
+
19
+ def transcribe(audio_path):
20
+ with open(audio_path, "rb") as audio_file:
21
+ audio_data = audio_file.read()
22
+
23
+ groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
24
+ headers = {
25
+ "Authorization": "Bearer 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", # Replace with your actual API key
26
+ }
27
+ files = {
28
+ 'file': ('audio.wav', audio_data, 'audio/wav'),
29
+ }
30
+ data = {
31
+ 'model': 'whisper-large-v3-turbo',
32
+ 'response_format': 'json',
33
+ 'language': 'en',
34
+ }
35
+
36
+ response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
37
+
38
+ if response.status_code == 200:
39
+ result = response.json()
40
+ transcript = result.get("text", "No transcription available.")
41
+ return generate_notes(transcript)
42
+ else:
43
+ error_msg = response.json().get("error", {}).get("message", "Unknown error.")
44
+ print(f"API Error: {error_msg}")
45
+ return create_error_pdf(f"API Error: {error_msg}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def generate_notes(transcript):
48
+ try:
49
+ sentences = sent_tokenize(transcript)
50
+ except LookupError:
51
+ sentences = custom_sent_tokenize(transcript)
52
+
53
+ long_questions = [f"What is meant by '{sentence}'?" for sentence in sentences[:5]]
54
+ short_questions = [f"Define '{sentence.split()[0]}'." for sentence in sentences[:5]]
55
+
56
+ mcqs = []
57
+ for sentence in sentences[:5]:
58
+ mcq = {
59
+ "question": f"What is '{sentence.split()[0]}'?",
60
+ "options": [sentence.split()[0]] + random.sample(["Option 1", "Option 2", "Option 3"], 3),
61
+ "answer": sentence.split()[0]
62
+ }
63
+ mcqs.append(mcq)
64
+
65
+ pdf_path = create_pdf(transcript, long_questions, short_questions, mcqs)
66
+ return pdf_path
67
+
68
+ def create_pdf(transcript, long_questions, short_questions, mcqs):
69
+ pdf = FPDF()
70
+ pdf.add_page()
71
+
72
+ pdf.set_font("Arial", "B", 16)
73
+ pdf.cell(200, 10, "Transcription Notes", ln=True, align="C")
74
+
75
+ pdf.set_font("Arial", "", 12)
76
+ pdf.multi_cell(0, 10, f"Transcription:\n{transcript.encode('latin1', 'replace').decode('latin1')}\n\n")
77
+
78
+ pdf.set_font("Arial", "B", 14)
79
+ pdf.cell(200, 10, "Long Questions", ln=True)
80
+ pdf.set_font("Arial", "", 12)
81
+ for question in long_questions:
82
+ pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
83
+
84
+ pdf.set_font("Arial", "B", 14)
85
+ pdf.cell(200, 10, "Short Questions", ln=True)
86
+ pdf.set_font("Arial", "", 12)
87
+ for question in short_questions:
88
+ pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
89
+
90
+ pdf.set_font("Arial", "B", 14)
91
+ pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
92
+ pdf.set_font("Arial", "", 12)
93
+ for mcq in mcqs:
94
+ pdf.multi_cell(0, 10, f"Q: {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
95
+ for option in mcq["options"]:
96
+ pdf.multi_cell(0, 10, f" - {option.encode('latin1', 'replace').decode('latin1')}")
97
+ pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")
98
+
99
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
100
+ pdf.output(temp_pdf.name)
101
+ pdf_path = temp_pdf.name
102
 
103
+ return pdf_path
104
+
105
+ def create_error_pdf(message):
106
+ pdf = FPDF()
107
+ pdf.add_page()
108
+ pdf.set_font("Arial", "B", 16)
109
+ pdf.cell(200, 10, "Error Report", ln=True, align="C")
110
+ pdf.set_font("Arial", "", 12)
111
+ pdf.multi_cell(0, 10, message.encode('latin1', 'replace').decode('latin1'))
112
 
113
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
114
+ pdf.output(temp_pdf.name)
115
+ error_pdf_path = temp_pdf.name
 
 
 
116
 
117
+ return error_pdf_path
118
+
119
+ iface = gr.Interface(
120
+ fn=transcribe,
121
+ inputs=gr.Audio(type="filepath"),
122
+ outputs=gr.File(label="Download PDF with Notes or Error Report"),
123
+ title="Voice to Text Converter and Notes Generator",
124
+ )
125
 
126
+ iface.launch()