Spaces:

Rehman1603
/

YouTube_to_Quiz

Sleeping

App Files Files Community

Rehman1603 commited on Jun 24, 2024

Commit

7b4f776

verified ·

1 Parent(s): 552d9ba

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -48

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import streamlit as st
 import pytube
 from youtube_transcript_api import YouTubeTranscriptApi as yt
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
@@ -6,72 +6,78 @@ import os
 from langchain import PromptTemplate
 from langchain import LLMChain
 from langchain_together import Together
-import re
-import json
-import os
 # Set the API key with double quotes
 os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"
 def Summary_BART(text):
     checkpoint = "sshleifer/distilbart-cnn-12-6"
     tokenizer = AutoTokenizer.from_pretrained(checkpoint)
     model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
-    inputs = tokenizer(text,
-                    max_length=1024,
-                    truncation=True,
-                    return_tensors="pt")
     summary_ids = model.generate(inputs["input_ids"])
-    summary = tokenizer.batch_decode(summary_ids,
-                                  skip_special_tokens=True,
-                                  clean_up_tokenization_spaces=False)
     return summary[0]
-def YtToQuizz(link,difficulty_level):
-  video_id=pytube.extract.video_id(link)
-  transcript=yt.get_transcript(video_id)
-  data=""
-  for text in transcript:
-    data+=text.get('text')
-  summary=Summary_BART(data)
-  print(summary)
-  mcq_template = """
     Give a 10 different multiple-choice question MCQ related to the summary: {summary}
     The difficulty level of the question should be: {difficulty_level}
     Please provide the following in:
     1. Question
     2. Correct answer
     3. Three plausible incorrect answer options
-    4. Proper Mcqs format
     """
-  prompt=PromptTemplate(
-    input_variables=['summary','difficulty_level'],
-    template=mcq_template
-  )
-  llama3 = Together(model = "meta-llama/Llama-3-70b-chat-hf",
-                  max_tokens = 2500
-                  )
-  Generated_mcqs=LLMChain(llm=llama3,prompt=prompt)
-  response = Generated_mcqs.invoke({
-    "summary": summary,
-    "difficulty_level": difficulty_level
-  })
-  st.write("MCQ's are following")
-  st.write(response['text'])
-def main():
-    st.title("YouTube video Subtitle to MCQ's Quizz")
-    url_link=st.text_area("Enter YouTube video link")
-    diffculity_level=st.selectbox("Select diffculity level:",["Eassy","Medium","Hard"])
-    if st.button("Generate MCQS Quizz"):
-      YtToQuizz(url_link,diffculity_level)
 if __name__ == '__main__':
-    main()

+import gradio as gr
 import pytube
 from youtube_transcript_api import YouTubeTranscriptApi as yt
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from langchain import PromptTemplate
 from langchain import LLMChain
 from langchain_together import Together
 # Set the API key with double quotes
 os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"
 def Summary_BART(text):
     checkpoint = "sshleifer/distilbart-cnn-12-6"
     tokenizer = AutoTokenizer.from_pretrained(checkpoint)
     model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+    inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
     summary_ids = model.generate(inputs["input_ids"])
+    summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
     return summary[0]
+def YtToQuizz(link, difficulty_level):
+    video_id = pytube.extract.video_id(link)
+    transcript = yt.get_transcript(video_id)
+    data = ""
+    for text in transcript:
+        data += text.get('text')
+    summary = Summary_BART(data)
+    mcq_template = """
     Give a 10 different multiple-choice question MCQ related to the summary: {summary}
     The difficulty level of the question should be: {difficulty_level}
     Please provide the following in:
     1. Question
     2. Correct answer
     3. Three plausible incorrect answer options
+    4. Proper MCQ format
     """
+    prompt = PromptTemplate(
+        input_variables=['summary', 'difficulty_level'],
+        template=mcq_template
+    )
+    llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
+    Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
+    response = Generated_mcqs.invoke({
+        "summary": summary,
+        "difficulty_level": difficulty_level
+    })
+    response_text = response['text']
+    # Extract MCQs, correct answers, and options
+    questions = re.findall(r'Question: (.*?)\n', response_text)
+    correct_answers = re.findall(r'Correct answer: (.*?)\n', response_text)
+    options = re.findall(r'Options: (.*?)\n', response_text)
+    all_options = [option.split(', ') for option in options]
+    return questions, all_options, correct_answers
+def main(link, difficulty_level):
+    questions, options, correct_answers = YtToQuizz(link, difficulty_level)
+    return {
+        "Questions": questions,
+        "Options": options,
+        "Correct Answers": correct_answers
+    }
+iface = gr.Interface(
+    fn=main,
+    inputs=[
+        gr.inputs.Textbox(lines=2, placeholder="Enter YouTube video link"),
+        gr.inputs.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:")
+    ],
+    outputs=[
+        gr.outputs.JSON(label="MCQs Output")
+    ],
+    title="YouTube Video Subtitle to MCQs Quiz",
+    description="Generate MCQs from YouTube video subtitles"
+)
 if __name__ == '__main__':
+    iface.launch()