Spaces:

Rehman1603
/

YouTube_to_Quiz

Sleeping

File size: 5,528 Bytes

7b4f776
23a0568
 
76e82bd
23a0568
 
 
 
06e461d
61e7eb4
552d9ba
 
 
23a0568
 
 
 
7b4f776
23a0568
7b4f776
23a0568
 
76e82bd
 
 
 
 
 
7b4f776
 
76e82bd
 
7b4f776
 
76e82bd
 
 
 
 
 
7b4f776
61e7eb4
 
 
23a0568
 
 
61e7eb4
76e82bd
23a0568
7b4f776
76e82bd
7b4f776
 
 
 
 
76e82bd
7b4f776
76e82bd
 
 
 
 
 
 
 
7b4f776
 
76e82bd
 
7b4f776
61e7eb4
 
76e82bd
 
 
 
 
61e7eb4
76e82bd
 
0b7048f
76e82bd
 
 
 
 
 
7b4f776
76e82bd
61e7eb4
 
76e82bd
 
 
61e7eb4
76e82bd
23a0568
76e82bd
 
61e7eb4
7b4f776
 
 
23c6c56
76e82bd
 
 
 
 
7b4f776
 
76e82bd
 
 
 
 
 
7b4f776
 
 
 
23a0568
 
7b4f776

import gradio as gr
import pytube
from youtube_transcript_api import YouTubeTranscriptApi as yt
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import os
from langchain import PromptTemplate
from langchain import LLMChain
from langchain_together import Together
import re

# Set the API key with double quotes
os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"

def Summary_BART(text):
    checkpoint = "sshleifer/distilbart-cnn-12-6"
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
    inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
    summary_ids = model.generate(inputs["input_ids"])
    summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
    return summary[0]

def translate_text(text, target_language):
    translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
    translated_text = translator(text, max_length=512)
    return translated_text[0]['translation_text']

def YtToQuizz(link, difficulty_level, language):
    video_id = pytube.extract.video_id(link)
    transcript = yt.get_transcript(video_id)
    data = " ".join([text['text'] for text in transcript])
    
    summary = Summary_BART(data)
    
    if language != "en":
        translated_data = translate_text(data, language)
        translated_summary = Summary_BART(translated_data)
    else:
        translated_summary = summary
    
    mcq_template = """
    Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
    The difficulty level of the questions should be: {difficulty_level}
    Please provide the following for each question:
    1. Question
    2. Correct answer
    3. Three plausible incorrect answer options
    4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
    The language of the questions should be: {language}
    """ 
    prompt = PromptTemplate(
        input_variables=['summary', 'difficulty_level', 'language'],
        template=mcq_template
    )
    llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
    Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)

    response_en = Generated_mcqs.invoke({
        "summary": summary,
        "difficulty_level": difficulty_level,
        "language": "English"
    })
    
    response_translated = Generated_mcqs.invoke({
        "summary": translated_summary,
        "difficulty_level": difficulty_level,
        "language": language
    })

    response_text_en = response_en['text']
    response_text_translated = response_translated['text']

    # Extract MCQs
    mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)'
    mcqs_en = re.findall(mcq_pattern, response_text_en, re.DOTALL)
    mcqs_translated = re.findall(mcq_pattern, response_text_translated, re.DOTALL)

    if len(mcqs_en) < 10 or len(mcqs_translated) < 10:
        return ["Failed to generate 10 complete MCQs. Please try again."] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3

    questions_en, correct_answers_en, options_en = [], [], []
    questions_translated, correct_answers_translated, options_translated = [], [], []

    for idx, mcq in enumerate(mcqs_en[:10]):
        question, correct_answer, incorrect_answers = mcq
        incorrect_answers = incorrect_answers.split(', ')
        questions_en.append(f"Q{idx+1}: {question}")
        correct_answers_en.append(f"Q{idx+1}: {correct_answer}")
        options_en.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")

    for idx, mcq in enumerate(mcqs_translated[:10]):
        question, correct_answer, incorrect_answers = mcq
        incorrect_answers = incorrect_answers.split(', ')
        questions_translated.append(f"Q{idx+1}: {question}")
        correct_answers_translated.append(f"Q{idx+1}: {correct_answer}")
        options_translated.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")

    return questions_en, correct_answers_en, options_en, questions_translated, correct_answers_translated, options_translated

def main(link, difficulty_level, language):
    return YtToQuizz(link, difficulty_level, language)

iface = gr.Interface(
    fn=main,
    inputs=[
        gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
        gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:"),
        gr.components.Dropdown(
            ["en", "fr", "es", "de", "it", "pt", "nl", "ru", "zh", "ja", "ko"], 
            label="Select language:"
        )
    ],
    outputs=[
        gr.components.Textbox(label="MCQs Statements (English)", lines=20),
        gr.components.Textbox(label="Correct Answers (English)", lines=10),
        gr.components.Textbox(label="Options (English)", lines=30),
        gr.components.Textbox(label="MCQs Statements (Translated)", lines=20),
        gr.components.Textbox(label="Correct Answers (Translated)", lines=10),
        gr.components.Textbox(label="Options (Translated)", lines=30)
    ],
    title="YouTube Video Subtitle to MCQs Quiz",
    description="Generate MCQs from YouTube video subtitles"
)

if __name__ == '__main__':
    iface.launch()