Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pytube | |
| from youtube_transcript_api import YouTubeTranscriptApi as yt | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| import os | |
| from langchain import PromptTemplate | |
| from langchain import LLMChain | |
| from langchain_together import Together | |
| import re | |
| # Set the API key with double quotes | |
| os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8" | |
| def Summary_BART(text): | |
| checkpoint = "sshleifer/distilbart-cnn-12-6" | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) | |
| inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt") | |
| summary_ids = model.generate(inputs["input_ids"]) | |
| summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) | |
| return summary[0] | |
| def translate_text(text, target_language): | |
| translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}") | |
| translated_text = translator(text, max_length=512) | |
| return translated_text[0]['translation_text'] | |
| def YtToQuizz(link, difficulty_level, language): | |
| video_id = pytube.extract.video_id(link) | |
| transcript = yt.get_transcript(video_id) | |
| data = " ".join([text['text'] for text in transcript]) | |
| summary = Summary_BART(data) | |
| if language != "en": | |
| translated_data = translate_text(data, language) | |
| translated_summary = Summary_BART(translated_data) | |
| else: | |
| translated_summary = summary | |
| mcq_template = """ | |
| Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary} | |
| The difficulty level of the questions should be: {difficulty_level} | |
| Please provide the following for each question: | |
| 1. Question | |
| 2. Correct answer | |
| 3. Three plausible incorrect answer options | |
| 4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>" | |
| The language of the questions should be: {language} | |
| """ | |
| prompt = PromptTemplate( | |
| input_variables=['summary', 'difficulty_level', 'language'], | |
| template=mcq_template | |
| ) | |
| llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500) | |
| Generated_mcqs = LLMChain(llm=llama3, prompt=prompt) | |
| response_en = Generated_mcqs.invoke({ | |
| "summary": summary, | |
| "difficulty_level": difficulty_level, | |
| "language": "English" | |
| }) | |
| response_translated = Generated_mcqs.invoke({ | |
| "summary": translated_summary, | |
| "difficulty_level": difficulty_level, | |
| "language": language | |
| }) | |
| response_text_en = response_en['text'] | |
| response_text_translated = response_translated['text'] | |
| # Extract MCQs | |
| mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)' | |
| mcqs_en = re.findall(mcq_pattern, response_text_en, re.DOTALL) | |
| mcqs_translated = re.findall(mcq_pattern, response_text_translated, re.DOTALL) | |
| if len(mcqs_en) < 10 or len(mcqs_translated) < 10: | |
| return ["Failed to generate 10 complete MCQs. Please try again."] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3 | |
| questions_en, correct_answers_en, options_en = [], [], [] | |
| questions_translated, correct_answers_translated, options_translated = [], [], [] | |
| for idx, mcq in enumerate(mcqs_en[:10]): | |
| question, correct_answer, incorrect_answers = mcq | |
| incorrect_answers = incorrect_answers.split(', ') | |
| questions_en.append(f"Q{idx+1}: {question}") | |
| correct_answers_en.append(f"Q{idx+1}: {correct_answer}") | |
| options_en.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}") | |
| for idx, mcq in enumerate(mcqs_translated[:10]): | |
| question, correct_answer, incorrect_answers = mcq | |
| incorrect_answers = incorrect_answers.split(', ') | |
| questions_translated.append(f"Q{idx+1}: {question}") | |
| correct_answers_translated.append(f"Q{idx+1}: {correct_answer}") | |
| options_translated.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}") | |
| return questions_en, correct_answers_en, options_en, questions_translated, correct_answers_translated, options_translated | |
| def main(link, difficulty_level, language): | |
| return YtToQuizz(link, difficulty_level, language) | |
| iface = gr.Interface( | |
| fn=main, | |
| inputs=[ | |
| gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"), | |
| gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:"), | |
| gr.components.Dropdown( | |
| ["en", "fr", "es", "de", "it", "pt", "nl", "ru", "zh", "ja", "ko"], | |
| label="Select language:" | |
| ) | |
| ], | |
| outputs=[ | |
| gr.components.Textbox(label="MCQs Statements (English)", lines=20), | |
| gr.components.Textbox(label="Correct Answers (English)", lines=10), | |
| gr.components.Textbox(label="Options (English)", lines=30), | |
| gr.components.Textbox(label="MCQs Statements (Translated)", lines=20), | |
| gr.components.Textbox(label="Correct Answers (Translated)", lines=10), | |
| gr.components.Textbox(label="Options (Translated)", lines=30) | |
| ], | |
| title="YouTube Video Subtitle to MCQs Quiz", | |
| description="Generate MCQs from YouTube video subtitles" | |
| ) | |
| if __name__ == '__main__': | |
| iface.launch() | |