Voice-To-Text

Sleeping

App Files Files Community

lodhrangpt commited on Nov 16, 2024

Commit

778fecb

verified ·

1 Parent(s): e584a9f

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -27

app.py CHANGED Viewed

@@ -7,10 +7,7 @@ import tempfile
 from nltk.tokenize import sent_tokenize
 import random
 from groq import Groq
-# Ensure no unexpected indentation here
 api_key = os.environ.get("GROQ_API_KEY")
 # Attempt to download punkt tokenizer
 try:
     nltk.download("punkt")
@@ -26,7 +23,7 @@ def transcribe(audio_path):
     groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
     headers = {
-        "Authorization": f"Bearer {api_key}",  # Fix: api_key is used properly
     }
     files = {
         'file': ('audio.wav', audio_data, 'audio/wav'),
@@ -49,34 +46,91 @@ def transcribe(audio_path):
         return create_error_pdf(f"API Error: {error_msg}")
 def generate_notes(transcript):
-    client = Groq(api_key=api_key)  # Use the api_key here
     chat_completion = client.chat.completions.create(
-        messages=[
-            {
-                "role": "system",
-                "content": "you are expert question generator from content. Generate one long question, possible number of short questions and mcqs. plz also provide the notes"
-            },
-            {
-                "role": "user",
-                "content": transcript,
-            }
-        ],
-        model="llama3-8b-8192",
-        temperature=0.5,
-        max_tokens=1024,
-        top_p=1,
-        stop=None,
-        stream=False,
-    )
-    res = chat_completion.choices[0].message.content
     # Generate and save a structured PDF
-    pdf_path = create_pdf(res, transcript)
     return pdf_path
-def create_pdf(question, transcript):
     pdf = FPDF()
     pdf.add_page()
@@ -95,6 +149,23 @@ def create_pdf(question, transcript):
     pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
         pdf.output(temp_pdf.name)
         pdf_path = temp_pdf.name
@@ -123,4 +194,4 @@ iface = gr.Interface(
     description="This app converts audio to text and generates academic questions including long, short, and multiple-choice questions."
 )
-iface.launch()

 from nltk.tokenize import sent_tokenize
 import random
 from groq import Groq
 api_key = os.environ.get("GROQ_API_KEY")
 # Attempt to download punkt tokenizer
 try:
     nltk.download("punkt")
     groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
     headers = {
+         "Authorization": f"Bearer {api_key}",  # Fix: api_key is used properly
     }
     files = {
         'file': ('audio.wav', audio_data, 'audio/wav'),
         return create_error_pdf(f"API Error: {error_msg}")
 def generate_notes(transcript):
+    # try:
+    #     sentences = sent_tokenize(transcript)
+    # except LookupError:
+    #     sentences = custom_sent_tokenize(transcript)
+    # # Generate long questions
+    # long_questions = [f"Explain the concept discussed in: '{sentence}'." for sentence in sentences[:5]]
+    # # Generate short questions
+    # short_questions = [f"What does '{sentence.split()[0]}' mean in the context of this text?" for sentence in sentences[:5]]
+    # # Generate MCQs with relevant distractors
+    # mcqs = []
+    # for sentence in sentences[:5]:
+    #     if len(sentence.split()) > 1:  # Ensure there are enough words to create meaningful options
+    #         key_word = sentence.split()[0]  # Use the first word as a key term
+    #         distractors = ["Term A", "Term B", "Term C"]  # Replace with relevant terms if needed
+    #         options = [key_word] + distractors
+    #         random.shuffle(options)  # Shuffle options for randomness
+    #         mcq = {
+    #             "question": f"What is '{key_word}' based on the context?",
+    #             "options": options,
+    #             "answer": key_word
+    #         }
+    #         mcqs.append(mcq)
+    client = Groq(api_key="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")
     chat_completion = client.chat.completions.create(
+    #
+    # Required parameters
+    #
+    messages=[
+        # Set an optional system message. This sets the behavior of the
+        # assistant and can be used to provide specific instructions for
+        # how it should behave throughout the conversation.
+        {
+            "role": "system",
+            "content": "you are expert question generator from content. Generate one long question,possible number of short questions and mcqs.plz also provide the notes"
+        },
+        # Set a user message for the assistant to respond to.
+        {
+            "role": "user",
+            "content": transcript,
+        }
+    ],
+    # The language model which will generate the completion.
+    model="llama3-8b-8192",
+    #
+    # Optional parameters
+    #
+    # Controls randomness: lowering results in less random completions.
+    # As the temperature approaches zero, the model will become deterministic
+    # and repetitive.
+    temperature=0.5,
+    # The maximum number of tokens to generate. Requests can use up to
+    # 32,768 tokens shared between prompt and completion.
+    max_tokens=1024,
+    # Controls diversity via nucleus sampling: 0.5 means half of all
+    # likelihood-weighted options are considered.
+    top_p=1,
+    # A stop sequence is a predefined or user-specified text string that
+    # signals an AI to stop generating content, ensuring its responses
+    # remain focused and concise. Examples include punctuation marks and
+    # markers like "[end]".
+    stop=None,
+    # If set, partial message deltas will be sent.
+    stream=False,
+)
+# Print the completion returned by the LLM.
+    res=chat_completion.choices[0].message.content
     # Generate and save a structured PDF
+    pdf_path = create_pdf(res,transcript)
     return pdf_path
+def create_pdf(question,transcript):
     pdf = FPDF()
     pdf.add_page()
     pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
+    # # Add short questions
+    # pdf.set_font("Arial", "B", 14)
+    # pdf.cell(200, 10, "Short Questions", ln=True)
+    # pdf.set_font("Arial", "", 12)
+    # for question in short_questions:
+    #     pdf.multi_cell(0, 10, f"- {question.encode('latin1', 'replace').decode('latin1')}\n")
+    # # Add MCQs
+    # pdf.set_font("Arial", "B", 14)
+    # pdf.cell(200, 10, "Multiple Choice Questions (MCQs)", ln=True)
+    # pdf.set_font("Arial", "", 12)
+    # for mcq in mcqs:
+    #     pdf.multi_cell(0, 10, f"Q: {mcq['question'].encode('latin1', 'replace').decode('latin1')}")
+    #     for option in mcq["options"]:
+    #         pdf.multi_cell(0, 10, f"   - {option.encode('latin1', 'replace').decode('latin1')}")
+    #     pdf.multi_cell(0, 10, f"Answer: {mcq['answer'].encode('latin1', 'replace').decode('latin1')}\n")
     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
         pdf.output(temp_pdf.name)
         pdf_path = temp_pdf.name
     description="This app converts audio to text and generates academic questions including long, short, and multiple-choice questions."
 )
+iface.launch()