Spaces:
Sleeping
Sleeping
File size: 5,528 Bytes
7b4f776 23a0568 76e82bd 23a0568 06e461d 61e7eb4 552d9ba 23a0568 7b4f776 23a0568 7b4f776 23a0568 76e82bd 7b4f776 76e82bd 7b4f776 76e82bd 7b4f776 61e7eb4 23a0568 61e7eb4 76e82bd 23a0568 7b4f776 76e82bd 7b4f776 76e82bd 7b4f776 76e82bd 7b4f776 76e82bd 7b4f776 61e7eb4 76e82bd 61e7eb4 76e82bd 0b7048f 76e82bd 7b4f776 76e82bd 61e7eb4 76e82bd 61e7eb4 76e82bd 23a0568 76e82bd 61e7eb4 7b4f776 23c6c56 76e82bd 7b4f776 76e82bd 7b4f776 23a0568 7b4f776 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import gradio as gr
import pytube
from youtube_transcript_api import YouTubeTranscriptApi as yt
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import os
from langchain import PromptTemplate
from langchain import LLMChain
from langchain_together import Together
import re
# Set the API key with double quotes
os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"
def Summary_BART(text):
checkpoint = "sshleifer/distilbart-cnn-12-6"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
summary_ids = model.generate(inputs["input_ids"])
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
return summary[0]
def translate_text(text, target_language):
translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
translated_text = translator(text, max_length=512)
return translated_text[0]['translation_text']
def YtToQuizz(link, difficulty_level, language):
video_id = pytube.extract.video_id(link)
transcript = yt.get_transcript(video_id)
data = " ".join([text['text'] for text in transcript])
summary = Summary_BART(data)
if language != "en":
translated_data = translate_text(data, language)
translated_summary = Summary_BART(translated_data)
else:
translated_summary = summary
mcq_template = """
Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
The difficulty level of the questions should be: {difficulty_level}
Please provide the following for each question:
1. Question
2. Correct answer
3. Three plausible incorrect answer options
4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
The language of the questions should be: {language}
"""
prompt = PromptTemplate(
input_variables=['summary', 'difficulty_level', 'language'],
template=mcq_template
)
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
response_en = Generated_mcqs.invoke({
"summary": summary,
"difficulty_level": difficulty_level,
"language": "English"
})
response_translated = Generated_mcqs.invoke({
"summary": translated_summary,
"difficulty_level": difficulty_level,
"language": language
})
response_text_en = response_en['text']
response_text_translated = response_translated['text']
# Extract MCQs
mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)'
mcqs_en = re.findall(mcq_pattern, response_text_en, re.DOTALL)
mcqs_translated = re.findall(mcq_pattern, response_text_translated, re.DOTALL)
if len(mcqs_en) < 10 or len(mcqs_translated) < 10:
return ["Failed to generate 10 complete MCQs. Please try again."] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3
questions_en, correct_answers_en, options_en = [], [], []
questions_translated, correct_answers_translated, options_translated = [], [], []
for idx, mcq in enumerate(mcqs_en[:10]):
question, correct_answer, incorrect_answers = mcq
incorrect_answers = incorrect_answers.split(', ')
questions_en.append(f"Q{idx+1}: {question}")
correct_answers_en.append(f"Q{idx+1}: {correct_answer}")
options_en.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
for idx, mcq in enumerate(mcqs_translated[:10]):
question, correct_answer, incorrect_answers = mcq
incorrect_answers = incorrect_answers.split(', ')
questions_translated.append(f"Q{idx+1}: {question}")
correct_answers_translated.append(f"Q{idx+1}: {correct_answer}")
options_translated.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
return questions_en, correct_answers_en, options_en, questions_translated, correct_answers_translated, options_translated
def main(link, difficulty_level, language):
return YtToQuizz(link, difficulty_level, language)
iface = gr.Interface(
fn=main,
inputs=[
gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:"),
gr.components.Dropdown(
["en", "fr", "es", "de", "it", "pt", "nl", "ru", "zh", "ja", "ko"],
label="Select language:"
)
],
outputs=[
gr.components.Textbox(label="MCQs Statements (English)", lines=20),
gr.components.Textbox(label="Correct Answers (English)", lines=10),
gr.components.Textbox(label="Options (English)", lines=30),
gr.components.Textbox(label="MCQs Statements (Translated)", lines=20),
gr.components.Textbox(label="Correct Answers (Translated)", lines=10),
gr.components.Textbox(label="Options (Translated)", lines=30)
],
title="YouTube Video Subtitle to MCQs Quiz",
description="Generate MCQs from YouTube video subtitles"
)
if __name__ == '__main__':
iface.launch()
|