File size: 5,089 Bytes
2274cd3
 
 
 
 
 
 
5ac9a87
5a8ba9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2274cd3
5a8ba9d
2274cd3
 
 
 
5a8ba9d
 
 
 
 
 
 
 
 
 
 
 
 
 
2274cd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8fcb49
2274cd3
 
 
99814c3
2274cd3
 
 
 
 
 
 
 
 
5a8ba9d
 
 
 
 
 
 
 
 
 
 
 
2274cd3
5a8ba9d
2274cd3
 
5a8ba9d
2274cd3
 
 
 
 
 
5a8ba9d
2274cd3
 
 
 
5a8ba9d
 
 
 
 
 
2274cd3
5a8ba9d
2274cd3
5a8ba9d
 
 
 
 
 
2274cd3
 
 
5a8ba9d
 
 
2274cd3
7693546
5a8ba9d
2274cd3
5a8ba9d
 
2274cd3
 
 
 
 
7693546
 
 
 
 
 
2274cd3
5a8ba9d
2274cd3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
import openai 
import gradio as gr 
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
from requests.structures import CaseInsensitiveDict

openai.api_key = os.getenv("OPENAI_KEY")

google_analtycs="""
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-S9JEXRFQJF"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());

  gtag('config', 'G-S9JEXRFQJF');
</script>
"""


def Prompt_T(context, lang):

  prompt  = """I want you to act as a content writer who is working with youtube video transcript. Summarise the following text in 70 words:
  =========
  """+ context +"""
  =========
  Answer:"""

  if (lang=="ru"):prompt  = """Я хочу, чтобы вы выступили в роли автора контента, который взят с  транскрипт youtube видео, его нужно преобразовать  в читаемый вид. Резюмируйте следующий текст в 50 слов:
  =========
  """+ context +"""
  =========
  Ответ:"""

  if (lang=="uk"):prompt  = """Я хочу, щоб ви виступили в ролі автора контенту, який узятий з транскрипту youtube відео,  його треба перетворити у читабельний вигляд. Резюмуйте наступний текст у 50 слів: 
  =========
  """+ context +"""
  =========
  Ответ:"""

  return prompt


def split_string(string, chunk_size):
    return [string[i:i+chunk_size] for i in range(0, len(string), chunk_size)]


def gpt_api (input_text):
  completion = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[  {"role": "system", "content": input_text} ]
    )
  response = completion.choices[0].message.content
  return response

def generate(video_url, request: gr.Request):
    my_v = ""

    #Если две переменные пустые, то показываем базовую страницу с рекламой
    if (video_url =="") and (my_v == ""):  
      html_embed='<div></div>'
      summarize=""
      return summarize, html_embed
    
    #похоже ли video_url на номальну ссылку
    if "youtube.com/watch?v=" in video_url: x=111
    else: return "Неверный URL", "Ошибка"
 
    #Пробуем извлеч video_id пока на английском
    video_id = video_url[-11:]

    try:
      transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
    except Exception as e:
      return "No access for transcript ", "Error transcript_list" 
    
    # iterate over all available transcripts
    lang_video="en"
    for transcript in transcript_list:
      if (transcript.is_generated == True): lang_video = transcript.language_code
      print ("transcript.language_code=", transcript.language_code)
      
    try:
      t = YouTubeTranscriptApi.get_transcript(video_id,languages=[lang_video])
      # do something with the transcript
    except Exception as e:
      return "No access for transcript", "Error transcript"
      
    finalString = ""
    for item in t:
        text = item['text']
        finalString += text + " "

  
    print("Transcript:",finalString)
    print("Transcript lenght:",len(finalString))
    print ("===============================================")
    input_string = finalString


    chunk_size = 12000
    if (lang_video=="ru"): chunk_size = 5000
    if (lang_video=="uk"): chunk_size = 5000

    result_list = split_string(input_string, chunk_size)
    final_answer_gpt=""
    count= 0

    print("++++++++++++++++++++++++++++++++++++++")
    for item in result_list:
        print(item)


    for item in result_list:
      count = count +1
      context = item
      input_gpt = Prompt_T(context,lang_video)
      final_answer_gpt = final_answer_gpt +"<p>" + gpt_api (input_gpt)+"</p>"

      
    html_embed='<iframe width="450" height="250" src="https://www.youtube.com/embed/'+ video_id +'" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>'
    html_content="<h6>"+"<br>"+final_answer_gpt+"</h6>"
    return html_content, html_embed

title = "YouTube Summorize (en,ua,ru)"
css="""
footer {visibility: hidden}
.gradio-container {padding-top: 100px}
"""
with gr.Blocks(css=css, title=title) as demo:
    gr.HTML("<h1>A simple way to summarise   the YouTube video </h1>")
    with gr.Row():   
        with gr.Column():
          input_d = gr.Textbox(label="YouTube video URL", placeholder="https://www.youtube.com/watch?v=XXXXXXXX", value="")
          greet_btn = gr.Button("Summarise")
        dt_2 = gr.outputs.HTML() 
    dt_1 = gr.outputs.HTML()
    dt =[dt_1, dt_2]
    greet_btn.click(generate, inputs=input_d, outputs=dt)
    demo.load(generate, inputs=input_d, outputs=dt)

demo.launch(share=False, debug=True )