Rehman1603 commited on
Commit
7b4f776
·
verified ·
1 Parent(s): 552d9ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -48
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import streamlit as st
2
  import pytube
3
  from youtube_transcript_api import YouTubeTranscriptApi as yt
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
@@ -6,72 +6,78 @@ import os
6
  from langchain import PromptTemplate
7
  from langchain import LLMChain
8
  from langchain_together import Together
9
- import re
10
- import json
11
- import os
12
 
13
  # Set the API key with double quotes
14
  os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"
15
 
16
-
17
-
18
-
19
-
20
-
21
  def Summary_BART(text):
22
  checkpoint = "sshleifer/distilbart-cnn-12-6"
23
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
24
  model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
25
- inputs = tokenizer(text,
26
- max_length=1024,
27
- truncation=True,
28
- return_tensors="pt")
29
  summary_ids = model.generate(inputs["input_ids"])
30
- summary = tokenizer.batch_decode(summary_ids,
31
- skip_special_tokens=True,
32
- clean_up_tokenization_spaces=False)
33
  return summary[0]
34
 
35
- def YtToQuizz(link,difficulty_level):
36
- video_id=pytube.extract.video_id(link)
37
- transcript=yt.get_transcript(video_id)
38
- data=""
39
- for text in transcript:
40
- data+=text.get('text')
41
- summary=Summary_BART(data)
42
- print(summary)
43
- mcq_template = """
44
  Give a 10 different multiple-choice question MCQ related to the summary: {summary}
45
  The difficulty level of the question should be: {difficulty_level}
46
-
47
  Please provide the following in:
48
  1. Question
49
  2. Correct answer
50
  3. Three plausible incorrect answer options
51
- 4. Proper Mcqs format
52
  """
53
- prompt=PromptTemplate(
54
- input_variables=['summary','difficulty_level'],
55
- template=mcq_template
56
- )
57
- llama3 = Together(model = "meta-llama/Llama-3-70b-chat-hf",
58
- max_tokens = 2500
59
- )
60
- Generated_mcqs=LLMChain(llm=llama3,prompt=prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- response = Generated_mcqs.invoke({
63
- "summary": summary,
64
- "difficulty_level": difficulty_level
65
- })
66
- st.write("MCQ's are following")
67
- st.write(response['text'])
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- def main():
71
- st.title("YouTube video Subtitle to MCQ's Quizz")
72
- url_link=st.text_area("Enter YouTube video link")
73
- diffculity_level=st.selectbox("Select diffculity level:",["Eassy","Medium","Hard"])
74
- if st.button("Generate MCQS Quizz"):
75
- YtToQuizz(url_link,diffculity_level)
76
  if __name__ == '__main__':
77
- main()
 
1
+ import gradio as gr
2
  import pytube
3
  from youtube_transcript_api import YouTubeTranscriptApi as yt
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
6
  from langchain import PromptTemplate
7
  from langchain import LLMChain
8
  from langchain_together import Together
 
 
 
9
 
10
  # Set the API key with double quotes
11
  os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"
12
 
 
 
 
 
 
13
  def Summary_BART(text):
14
  checkpoint = "sshleifer/distilbart-cnn-12-6"
15
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
16
  model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
17
+ inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
 
 
 
18
  summary_ids = model.generate(inputs["input_ids"])
19
+ summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
 
 
20
  return summary[0]
21
 
22
+ def YtToQuizz(link, difficulty_level):
23
+ video_id = pytube.extract.video_id(link)
24
+ transcript = yt.get_transcript(video_id)
25
+ data = ""
26
+ for text in transcript:
27
+ data += text.get('text')
28
+ summary = Summary_BART(data)
29
+
30
+ mcq_template = """
31
  Give a 10 different multiple-choice question MCQ related to the summary: {summary}
32
  The difficulty level of the question should be: {difficulty_level}
 
33
  Please provide the following in:
34
  1. Question
35
  2. Correct answer
36
  3. Three plausible incorrect answer options
37
+ 4. Proper MCQ format
38
  """
39
+ prompt = PromptTemplate(
40
+ input_variables=['summary', 'difficulty_level'],
41
+ template=mcq_template
42
+ )
43
+ llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
44
+ Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
45
+
46
+ response = Generated_mcqs.invoke({
47
+ "summary": summary,
48
+ "difficulty_level": difficulty_level
49
+ })
50
+
51
+ response_text = response['text']
52
+
53
+ # Extract MCQs, correct answers, and options
54
+ questions = re.findall(r'Question: (.*?)\n', response_text)
55
+ correct_answers = re.findall(r'Correct answer: (.*?)\n', response_text)
56
+ options = re.findall(r'Options: (.*?)\n', response_text)
57
+ all_options = [option.split(', ') for option in options]
58
+
59
+ return questions, all_options, correct_answers
60
 
61
+ def main(link, difficulty_level):
62
+ questions, options, correct_answers = YtToQuizz(link, difficulty_level)
63
+ return {
64
+ "Questions": questions,
65
+ "Options": options,
66
+ "Correct Answers": correct_answers
67
+ }
68
 
69
+ iface = gr.Interface(
70
+ fn=main,
71
+ inputs=[
72
+ gr.inputs.Textbox(lines=2, placeholder="Enter YouTube video link"),
73
+ gr.inputs.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:")
74
+ ],
75
+ outputs=[
76
+ gr.outputs.JSON(label="MCQs Output")
77
+ ],
78
+ title="YouTube Video Subtitle to MCQs Quiz",
79
+ description="Generate MCQs from YouTube video subtitles"
80
+ )
81
 
 
 
 
 
 
 
82
  if __name__ == '__main__':
83
+ iface.launch()