Spaces:

Rehman1603
/

YouTube_to_Quiz

Sleeping

App Files Files Community

YouTube_to_Quiz / app.py

Rehman1603

Update app.py

76e82bd verified over 1 year ago

raw

history blame

5.53 kB

	import gradio as gr
	import pytube
	from youtube_transcript_api import YouTubeTranscriptApi as yt
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
	import os
	from langchain import PromptTemplate
	from langchain import LLMChain
	from langchain_together import Together
	import re

	# Set the API key with double quotes
	os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"

	def Summary_BART(text):
	checkpoint = "sshleifer/distilbart-cnn-12-6"
	tokenizer = AutoTokenizer.from_pretrained(checkpoint)
	model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
	inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
	summary_ids = model.generate(inputs["input_ids"])
	summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
	return summary[0]

	def translate_text(text, target_language):
	translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
	translated_text = translator(text, max_length=512)
	return translated_text[0]['translation_text']

	def YtToQuizz(link, difficulty_level, language):
	video_id = pytube.extract.video_id(link)
	transcript = yt.get_transcript(video_id)
	data = " ".join([text['text'] for text in transcript])

	summary = Summary_BART(data)

	if language != "en":
	translated_data = translate_text(data, language)
	translated_summary = Summary_BART(translated_data)
	else:
	translated_summary = summary

	mcq_template = """
	Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
	The difficulty level of the questions should be: {difficulty_level}
	Please provide the following for each question:
	1. Question
	2. Correct answer
	3. Three plausible incorrect answer options
	4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
	The language of the questions should be: {language}
	"""
	prompt = PromptTemplate(
	input_variables=['summary', 'difficulty_level', 'language'],
	template=mcq_template
	)
	llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
	Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)

	response_en = Generated_mcqs.invoke({
	"summary": summary,
	"difficulty_level": difficulty_level,
	"language": "English"
	})

	response_translated = Generated_mcqs.invoke({
	"summary": translated_summary,
	"difficulty_level": difficulty_level,
	"language": language
	})

	response_text_en = response_en['text']
	response_text_translated = response_translated['text']

	# Extract MCQs
	mcq_pattern = r'Question: (.?)\nCorrect answer: (.?)\nIncorrect answers: (.*?)(?:\n\|$)'
	mcqs_en = re.findall(mcq_pattern, response_text_en, re.DOTALL)
	mcqs_translated = re.findall(mcq_pattern, response_text_translated, re.DOTALL)

	if len(mcqs_en) < 10 or len(mcqs_translated) < 10:
	return ["Failed to generate 10 complete MCQs. Please try again."] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3

	questions_en, correct_answers_en, options_en = [], [], []
	questions_translated, correct_answers_translated, options_translated = [], [], []

	for idx, mcq in enumerate(mcqs_en[:10]):
	question, correct_answer, incorrect_answers = mcq
	incorrect_answers = incorrect_answers.split(', ')
	questions_en.append(f"Q{idx+1}: {question}")
	correct_answers_en.append(f"Q{idx+1}: {correct_answer}")
	options_en.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")

	for idx, mcq in enumerate(mcqs_translated[:10]):
	question, correct_answer, incorrect_answers = mcq
	incorrect_answers = incorrect_answers.split(', ')
	questions_translated.append(f"Q{idx+1}: {question}")
	correct_answers_translated.append(f"Q{idx+1}: {correct_answer}")
	options_translated.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")

	return questions_en, correct_answers_en, options_en, questions_translated, correct_answers_translated, options_translated

	def main(link, difficulty_level, language):
	return YtToQuizz(link, difficulty_level, language)

	iface = gr.Interface(
	fn=main,
	inputs=[
	gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
	gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:"),
	gr.components.Dropdown(
	["en", "fr", "es", "de", "it", "pt", "nl", "ru", "zh", "ja", "ko"],
	label="Select language:"
	)
	],
	outputs=[
	gr.components.Textbox(label="MCQs Statements (English)", lines=20),
	gr.components.Textbox(label="Correct Answers (English)", lines=10),
	gr.components.Textbox(label="Options (English)", lines=30),
	gr.components.Textbox(label="MCQs Statements (Translated)", lines=20),
	gr.components.Textbox(label="Correct Answers (Translated)", lines=10),
	gr.components.Textbox(label="Options (Translated)", lines=30)
	],
	title="YouTube Video Subtitle to MCQs Quiz",
	description="Generate MCQs from YouTube video subtitles"
	)

	if __name__ == '__main__':
	iface.launch()