File size: 2,920 Bytes
7b4f776
23a0568
 
 
 
 
 
 
552d9ba
 
 
 
23a0568
 
 
 
7b4f776
23a0568
7b4f776
23a0568
 
7b4f776
 
 
 
 
 
 
 
 
23a0568
 
 
 
 
 
7b4f776
23a0568
7b4f776
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23a0568
7b4f776
 
 
 
 
 
 
23a0568
7b4f776
 
 
 
 
 
 
 
 
 
 
 
23a0568
 
7b4f776
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import pytube
from youtube_transcript_api import YouTubeTranscriptApi as yt
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import os
from langchain import PromptTemplate
from langchain import LLMChain
from langchain_together import Together

# Set the API key with double quotes
os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"

def Summary_BART(text):
    checkpoint = "sshleifer/distilbart-cnn-12-6"
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
    inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
    summary_ids = model.generate(inputs["input_ids"])
    summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
    return summary[0]

def YtToQuizz(link, difficulty_level):
    video_id = pytube.extract.video_id(link)
    transcript = yt.get_transcript(video_id)
    data = ""
    for text in transcript:
        data += text.get('text')
    summary = Summary_BART(data)
    
    mcq_template = """
    Give a 10 different multiple-choice question MCQ related to the summary: {summary}
    The difficulty level of the question should be: {difficulty_level}
    Please provide the following in:
    1. Question
    2. Correct answer
    3. Three plausible incorrect answer options
    4. Proper MCQ format
    """ 
    prompt = PromptTemplate(
        input_variables=['summary', 'difficulty_level'],
        template=mcq_template
    )
    llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
    Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)

    response = Generated_mcqs.invoke({
        "summary": summary,
        "difficulty_level": difficulty_level
    })

    response_text = response['text']

    # Extract MCQs, correct answers, and options
    questions = re.findall(r'Question: (.*?)\n', response_text)
    correct_answers = re.findall(r'Correct answer: (.*?)\n', response_text)
    options = re.findall(r'Options: (.*?)\n', response_text)
    all_options = [option.split(', ') for option in options]

    return questions, all_options, correct_answers

def main(link, difficulty_level):
    questions, options, correct_answers = YtToQuizz(link, difficulty_level)
    return {
        "Questions": questions,
        "Options": options,
        "Correct Answers": correct_answers
    }

iface = gr.Interface(
    fn=main,
    inputs=[
        gr.inputs.Textbox(lines=2, placeholder="Enter YouTube video link"),
        gr.inputs.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:")
    ],
    outputs=[
        gr.outputs.JSON(label="MCQs Output")
    ],
    title="YouTube Video Subtitle to MCQs Quiz",
    description="Generate MCQs from YouTube video subtitles"
)

if __name__ == '__main__':
    iface.launch()