ginipick commited on
Commit
ebf865c
ยท
verified ยท
1 Parent(s): f220324

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -124
app.py CHANGED
@@ -1,15 +1,11 @@
1
  import gradio as gr
2
- import requests
3
- import re
4
  import os
5
- import json
6
- import time
7
- import threading
8
- from googleapiclient.discovery import build
9
- from huggingface_hub import InferenceClient
10
  from pytube import YouTube
11
  import whisper
12
  import logging
 
13
 
14
  # ๋กœ๊ทธ ์„ค์ •
15
  logging.basicConfig(level=logging.INFO)
@@ -17,31 +13,42 @@ logging.basicConfig(level=logging.INFO)
17
  # Whisper ๋ชจ๋ธ ๋กœ๋“œ
18
  model = whisper.load_model("base")
19
 
20
- # YouTube API ํ‚ค
21
- API_KEY = 'AIzaSyDUz3wkGal0ewRtPlzeMit88bV4hS4ZIVY'
22
-
23
- # YouTube API ์„œ๋น„์Šค ๋นŒ๋“œ
24
- youtube = build('youtube', 'v3', developerKey=API_KEY)
25
-
26
  # Hugging Face API ์„ค์ •
27
  client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct", token=os.getenv("HF_TOKEN"))
28
 
29
- WEBHOOK_URL = "https://connect.pabbly.com/workflow/sendwebhookdata/IjU3NjUwNTZhMDYzMDA0MzA1MjZhNTUzMzUxM2Ii_pc"
30
- COMMENTS_FILE = 'comments.json'
31
-
32
- DEFAULT_SYSTEM_PROMPT = "๋Œ€ํ™”์‹œ ๋ฐ˜๋“œ์‹œ ๋‚˜์˜ ์ด๋ฆ„ 'GPTube'๋ฅผ ๋ฐํžˆ๋ฉฐ ํ•œ๊ธ€๋กœ ์ธ์‚ฌ๋ฅผํ•˜๋ผ. ๋ฐ˜๋“œ์‹œ 'ํ•œ๊ธ€'(ํ•œ๊ตญ์–ด)๋กœ 250 ํ† ํฐ ์ด๋‚ด๋กœ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๊ณ  ์ถœ๋ ฅํ•˜๋ผ. Respond to the following YouTube comment in a friendly and helpful manner:"
33
-
34
- stop_event = threading.Event() # ์Šค๋ ˆ๋“œ ์ค‘์ง€๋ฅผ ์œ„ํ•œ ์ด๋ฒคํŠธ
35
-
36
- def load_existing_comments():
37
- if os.path.exists(COMMENTS_FILE):
38
- with open(COMMENTS_FILE, 'r') as file:
39
- return json.load(file)
40
- return []
41
-
42
- def save_comments(comments):
43
- with open(COMMENTS_FILE, 'w') as file:
44
- json.dump(comments, file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def download_audio(video_url):
47
  yt = YouTube(video_url)
@@ -71,11 +78,11 @@ def generate_transcript(audio_path):
71
  logging.error(f"Exception during transcription: {str(e)}")
72
  return f"์ „์‚ฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
73
 
74
- def generate_reply(comment_text, system_prompt):
75
- prompt = f"{system_prompt}\n\nComment: {comment_text}\n\nReply:"
76
  response = client.text_generation(
77
  prompt=prompt,
78
- max_new_tokens=250,
79
  temperature=0.7,
80
  top_p=0.9
81
  )
@@ -83,98 +90,18 @@ def generate_reply(comment_text, system_prompt):
83
  return response['generated_text']
84
  return response
85
 
86
- def send_webhook(data):
87
- response = requests.post(WEBHOOK_URL, json=data)
88
- return response.status_code, response.text
89
-
90
- def get_video_comments(video_id):
91
- try:
92
- comments = []
93
- request = youtube.commentThreads().list(
94
- part='snippet',
95
- videoId=video_id,
96
- maxResults=100, #๋Œ“๊ธ€ ์ฝ์–ด๋“ค์ด๋Š” ์ˆ˜ ์ •์˜
97
- textFormat='plainText'
98
- )
99
- response = request.execute()
100
- while request is not None:
101
- for item in response['items']:
102
- snippet = item['snippet']['topLevelComment']['snippet']
103
- comment = {
104
- 'comment_id': item['snippet']['topLevelComment']['id'],
105
- 'author': snippet['authorDisplayName'],
106
- 'published_at': snippet['publishedAt'],
107
- 'text': snippet['textDisplay'],
108
- 'reply_count': item['snippet']['totalReplyCount']
109
- }
110
- comments.append(comment)
111
- if 'nextPageToken' in response:
112
- request = youtube.commentThreads().list(
113
- part='snippet',
114
- videoId=video_id,
115
- pageToken=response['nextPageToken'],
116
- maxResults=100, #๋Œ“๊ธ€ ์ฝ์–ด๋“ค์ด๋Š” ์ˆ˜ ์ •์˜
117
- textFormat='plainText'
118
- )
119
- response = request.execute()
120
- else:
121
- break
122
- return comments
123
- except Exception as e:
124
- return [{'error': str(e)}]
125
-
126
- def fetch_comments(video_url, system_prompt):
127
  log_entries = []
128
- video_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', video_url)
129
- if video_id_match:
130
- video_id = video_id_match.group(1)
131
- audio_path = download_audio(video_url)
132
- if not audio_path:
133
- return "์˜ค๋””์˜ค๋ฅผ ๋‹ค์šด๋กœ๋“œํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
134
-
135
- transcript = generate_transcript(audio_path)
136
-
137
- existing_comments = load_existing_comments()
138
- new_comments = get_video_comments(video_id)
139
-
140
- if not new_comments or 'error' in new_comments[0]:
141
- return "๋Œ“๊ธ€์„ ์ฐพ์„ ์ˆ˜ ์—†๊ฑฐ๋‚˜ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค."
142
-
143
- recent_new_comments = [c for c in new_comments if c['comment_id'] not in {c['comment_id'] for c in existing_comments} and c['reply_count'] == 0]
144
-
145
- if recent_new_comments:
146
- for most_recent_comment in recent_new_comments:
147
- combined_prompt = f"{transcript}\n\n{system_prompt}"
148
- reply_text = generate_reply(most_recent_comment['text'], combined_prompt)
149
- webhook_data = {
150
- "comment_id": most_recent_comment['comment_id'],
151
- "author": most_recent_comment['author'],
152
- "published_at": most_recent_comment['published_at'],
153
- "text": most_recent_comment['text'],
154
- "reply_text": reply_text
155
- }
156
- webhook_status, webhook_response = send_webhook(webhook_data)
157
- log_entries.append(f"์ตœ๊ทผ ๋Œ“๊ธ€: {most_recent_comment['text']}\n\n๋‹ต๋ณ€ ์ƒ์„ฑ: {reply_text}\n\n์›นํ›… ์‘๋‹ต: {webhook_status} - {webhook_response}")
158
- existing_comments.append(most_recent_comment)
159
- save_comments(existing_comments)
160
- else:
161
- log_entries.append("์ƒˆ๋กœ์šด ๋Œ“๊ธ€์ด ์—†์Šต๋‹ˆ๋‹ค.")
162
- else:
163
- log_entries.append("์œ ํšจํ•˜์ง€ ์•Š์€ YouTube URL์ž…๋‹ˆ๋‹ค.")
164
  return "\n\n".join(log_entries)
165
 
166
- def background_fetch_comments():
167
- while not stop_event.is_set():
168
- result = fetch_comments("https://www.youtube.com/watch?v=dQw4w9WgXcQ", DEFAULT_SYSTEM_PROMPT) # URL๊ณผ ํ”„๋กฌํ”„ํŠธ ์‹ค์ œ ์‚ฌ์šฉ ์˜ˆ์‹œ
169
- print(result)
170
- time.sleep(10)
171
-
172
- def start_background_fetch():
173
- threading.Thread(target=background_fetch_comments).start()
174
-
175
- def stop_background_fetch():
176
- stop_event.set()
177
-
178
  def get_text(video_url):
179
  audio_path = download_audio(video_url)
180
  if not audio_path:
@@ -195,14 +122,15 @@ with demo:
195
 
196
  with gr.Row():
197
  result_button_transcribe = gr.Button('Transcribe')
198
- result_button_comments = gr.Button('Fetch Comments and Generate Reply')
199
 
200
  with gr.Row():
201
  output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript', lines=20)
202
- output_text_prompt = gr.Textbox(placeholder='์‘๋‹ต ํ…์ŠคํŠธ', label='์‘๋‹ต ํ…์ŠคํŠธ', lines=20)
203
 
204
  result_button_transcribe.click(get_text, inputs=input_text_url, outputs=output_text_transcribe, api_name="transcribe_api")
205
- result_button_comments.click(fetch_comments, inputs=[input_text_url, input_text_prompt], outputs=output_text_prompt, api_name="fetch_comments_api")
206
 
207
  # ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
208
  demo.launch()
 
 
1
  import gradio as gr
 
 
2
  import os
3
+ import re
4
+ import requests
 
 
 
5
  from pytube import YouTube
6
  import whisper
7
  import logging
8
+ from huggingface_hub import InferenceClient
9
 
10
  # ๋กœ๊ทธ ์„ค์ •
11
  logging.basicConfig(level=logging.INFO)
 
13
  # Whisper ๋ชจ๋ธ ๋กœ๋“œ
14
  model = whisper.load_model("base")
15
 
 
 
 
 
 
 
16
  # Hugging Face API ์„ค์ •
17
  client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct", token=os.getenv("HF_TOKEN"))
18
 
19
+ DEFAULT_SYSTEM_PROMPT = """
20
+ ๋‹น์‹ ์€ ๋ธ”๋กœ๊ทธ๋ฅผ ์šด์˜ํ•˜๋Š” ์ „๋ฌธ ๋ธ”๋กœ๊ฑฐ์ž…๋‹ˆ๋‹ค.
21
+ SEO์— ๋งž๋Š” ํ€„๋ฆฌํ‹ฐ ๋†’์€ ํฌ์ŠคํŒ…์„ ๋งŒ๋“œ๋Š” ๊ฒƒ์ด ์ตœ์šฐ์„  ๋ชฉํ‘œ๊ฐ€ ๋˜์–ด์•ผ ํ•˜๋ฉฐ, ๋ธ”๋กœ๊ทธ์˜ ๊ธ€์„ ์ž‘์„ฑํ• ๋•Œ๋Š”
22
+ ๋ฒˆ์—ญ์ฒด๊ฐ€ ์•„๋‹Œ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด๊ฐ€ ๋‚˜์˜ค๋Š” ๊ฒƒ์„ ๋ฌด์—‡๋ณด๋‹ค ์ตœ์„ ์„ ๋‹ค ํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค.
23
+ ํ•œ๊ตญ์–ด๊ฐ€ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํ•˜๊ธฐ ์œ„ํ•ด ์•„๋ž˜[ํ•œ๊ตญ์–ด ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํ•˜๋Š” ์กฐ๊ฑด์ •๋ฆฌ]๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๋ชจ๋“  ๊ธ€์„ ์ž‘์„ฑํ•ด์ฃผ์…”์•ผ ํ•ฉ๋‹ˆ๋‹ค.
24
+ ๊ธ€์ž‘์„ฑ์‹œ ์ค„๋งˆ๋‹ค ์ค„ ๋ฐ”๊ฟˆ์„ ๊ผญ ํ•˜์—ฌ ๋ณด๊ธฐ์ข‹๊ฒŒ ์ž‘์„ฑํ•˜์—ฌ์•ผ ํ•˜๋ฉฐ, markdown ๋“ฑ์„ ํ™œ์šฉํ•˜์—ฌ ๊ฐ€๋…์„ฑ ์žˆ๊ฒŒ ์ž‘์„ฑํ• ๊ฒƒ.
25
+ ์ถœ๋ ฅ๋ฌธ์— "ํ•œ์ž(์ค‘๊ตญ์–ด)", ์ผ๋ณธ์–ด๊ฐ€ ํฌํ•จ๋˜์–ด ์ถœ๋ ฅ์‹œ์—๋Š” ๋ฐ˜๋“œ์‹œ "ํ•œ๊ธ€(ํ•œ๊ตญ์–ด)"๋กœ ๋ฒˆ์—ญํ•˜์—ฌ ์ถœ๋ ฅ๋˜๊ฒŒ ํ•˜๋ผ.
26
+ [ํ•œ๊ตญ์–ด ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํ•˜๋Š” ์กฐ๊ฑด์ •๋ฆฌ]
27
+ 1. ์ฃผ์ œ์— ๋”ฐ๋ฅธ ๋ฌธ๋งฅ ์ดํ•ด์— ๋งž๋Š” ๊ธ€์„ ์จ์ฃผ์„ธ์š”.
28
+ 2. ์ฃผ์ œ์™€ ์ƒํ™ฉ์— ๋งž๋Š” ์ ์ ˆํ•œ ์–ดํœ˜ ์„ ํƒํ•ด์ฃผ์„ธ์š”
29
+ 3. ํ•œ๊ตญ ๋ฌธํ™”์™€ ์ ํ•ฉ์„ฑ๋ฅผ ๊ณ ๋ คํ•ด์ฃผ์„ธ์š”
30
+ 4. ์ •์„œ์  ๋Šฌ์•™์Šค๋ฅผ ๊ณ ๋ คํ•ด์ฃผ์„ธ์š”.[๊ฐ์ •์  ๋‰˜์•™์Šค๋ฅผ ์ ์ ˆํžˆ ํ‘œํ˜„ํ•ด ๊ณต๊ฐ์„ ์ด๋”]
31
+ 5. ์ง์—ญ๊ณผ ์˜์—ญ์˜ ๊ท ํ˜•์„ ๋งž์ณ์ฃผ์„ธ์š”.
32
+ 6. ํ”ผ๋™๋ฌธ์—์„œ ๋Šฅ๋™๋ฌธ์„ ์šฐ์„ 
33
+ 7. ๋Œ€๋ช…์‚ฌ๋ฅผ ์ตœ์†Œํ™”
34
+ 8. ๋ช…์‚ฌํ˜•๋ณด๋‹ค๋Š” ๋™์‚ฌ์™€ ํ˜•์šฉ์‚ฌ๋ฅผ ์šฐ์„ 
35
+ 9. ์ฃผ์–ด-๋ชฉ์ ์–ด-๋™์‚ฌ์œผ๋กœ ๊ตฌ์„ฑ
36
+ 10. โ€œ~์–ด์š”โ€, โ€œ~์—์š”โ€, โ€œ~๊ณ ์š”โ€,์˜ ์ œ์™ธํ•ด์ฃผ์„ธ์š”
37
+ 11. ๊ณผ๊ฑฐํ˜•์˜ ๋ฌธ์žฅ์€ ์ตœ์†Œํ™” ํ•ด์ฃผ์„ธ์š”
38
+ 12. ์ƒ์„ฑํ˜•AI๊ฐ€ ์“ด๊ธ€์ด๋ผ๊ณ  ์ธ์‹ํ• ์ˆ˜ ์—†๋„๋ก ์ฃผ์˜ํ•ด์„œ ๊ธ€์„ ์จ ์ฃผ์„ธ์š”.
39
+ 13. ๋ฌธ์žฅ์˜ ๊ธธ์ด๋ฅผ ๊ฐ„๊ฒฐํ•˜๊ฒŒ ํ•ด์ฃผ์„ธ์š”
40
+ 14. ์–ดํœ˜์˜ ๋‚œ์ด๋„๋Š” ์‰ฌ์šด ์šฉ์–ด๋ฅผ ์ค‘์‹ฌ์œผ๋กœ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”
41
+ 15. ์ด ๊ธ€์„ ์“ฐ๋Š” ๋ชฉ์ ์€ ์‚ฌ์šฉ ํ›„๊ธฐ๋ฅผ ์ง์ ‘ ์‚ฌ์šฉํ•œ ๊ฒƒ์ฒ˜๋Ÿผ ์ƒ์ƒํ•˜๊ฒŒ ์•Œ๋ ค์ฃผ๋Š” ์šฉ๋„์ž…๋‹ˆ๋‹ค.
42
+ [๋ณธ๋ฌธ๋‚ด์šฉ]
43
+ 1. ๊ฐ ์ฑ•ํ„ฐ ์‹œ์ž‘ํ•˜๊ธฐ ์ „์— [ํ•œ๊ตญ์–ด ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์กฐ๊ฑด์ •๋ฆฌ]์„ ์ธ์ง€ํ•˜์‹œ๊ณ  ์ ์šฉํ•˜๋Š”๊ฒƒ์ด ์šฐ์„ ์ž…๋‹ˆ๋‹ค.
44
+ 2. ๋ณธ๋ฌธ๋‚ด์šฉ์˜ ๋ชจ๋“  ๋‚ด์šฉ์€ ์ƒ์„ฑํ•˜๋Š”๊ฒƒ์ด ์•„๋‹ˆ๋ผ ์˜ˆ์‹œ1~3์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์ž‘์„ฑํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค.
45
+ 3. ๋ณธ๋ฌธ์˜ ๊ฒฝ์šฐ ์ด์ „์— ์ž…๋ ฅ ๋ฐ›์€ ํ‚ค์›Œ๋“œ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ SEO์— ๋งž๋„๋ก ์ž‘์„ฑํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
46
+ 4. ๊ธฐ๋ณธ ์„ธ ์ฑ•ํ„ฐ๋ฅผ ํ•œ ๋ฒˆ์— ์ž‘์„ฑ ํ›„ ๋งˆ๋ฌด๋ฆฌ ๊ฒฐ๋ก ์„ ์ž‘์„ฑํ•˜๋ผ.
47
+ 5. ์„œ๋‘์— ๋ฉ”์ธ ํ‚ค์›Œ๋“œ๋ฅผ ๋„ฃ์ง€ ๋งˆ์„ธ์š”.
48
+ 6. ์ฃผ์ œ ๊ด€๋ จ ํ‚ค์›Œ๋“œ๋“ค์„ ๋‹ค์–‘ํ•˜๊ฒŒ ์‚ฌ์šฉ ํ•œ ์ฑ•ํ„ฐ๋‹น ์ตœ๋Œ€ 2๋ฒˆ ์ด์ƒ ์ž‘์„ฑ์„ ์ ˆ๋Œ€ ๊ธˆ์ง€ํ•ด์ฃผ์„ธ์š”.
49
+ 7. ๊ธ€์˜ ์ „์ฒด๊ฐ€ ์•„๋‹ˆ๋ผ ์ฑ•ํ„ฐ ๋งˆ๋‹ค ์ตœ์†Œ 1,000์ž ์ด์ƒ์œผ๋กœ ์„ธ ์ฑ•ํ„ฐ๋ฅผ ํฌํ•จํ•˜๋ฉด 3,000์ž ์ด์ƒ ์ž‘์„ฑํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
50
+ 8. "#ํƒœ๊ทธ"๋ฅผ 10๊ฐœ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.
51
+ """
52
 
53
  def download_audio(video_url):
54
  yt = YouTube(video_url)
 
78
  logging.error(f"Exception during transcription: {str(e)}")
79
  return f"์ „์‚ฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
80
 
81
+ def generate_blog_post(transcript, system_prompt):
82
+ prompt = f"{system_prompt}\n\nTranscript: {transcript}\n\nBlog Post:"
83
  response = client.text_generation(
84
  prompt=prompt,
85
+ max_new_tokens=3000,
86
  temperature=0.7,
87
  top_p=0.9
88
  )
 
90
  return response['generated_text']
91
  return response
92
 
93
+ def process_video_url(video_url, system_prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  log_entries = []
95
+ audio_path = download_audio(video_url)
96
+ if not audio_path:
97
+ return "์˜ค๋””์˜ค๋ฅผ ๋‹ค์šด๋กœ๋“œํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
98
+
99
+ transcript = generate_transcript(audio_path)
100
+ blog_post_text = generate_blog_post(transcript, system_prompt)
101
+
102
+ log_entries.append(f"๋ธ”๋กœ๊ทธ ํฌ์ŠคํŠธ ์ƒ์„ฑ: {blog_post_text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  return "\n\n".join(log_entries)
104
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def get_text(video_url):
106
  audio_path = download_audio(video_url)
107
  if not audio_path:
 
122
 
123
  with gr.Row():
124
  result_button_transcribe = gr.Button('Transcribe')
125
+ result_button_blog_post = gr.Button('Generate Blog Post')
126
 
127
  with gr.Row():
128
  output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript', lines=20)
129
+ output_text_blog_post = gr.Textbox(placeholder='๋ธ”๋กœ๊ทธ ํฌ์ŠคํŠธ ํ…์ŠคํŠธ', label='๋ธ”๋กœ๊ทธ ํฌ์ŠคํŠธ ํ…์ŠคํŠธ', lines=20)
130
 
131
  result_button_transcribe.click(get_text, inputs=input_text_url, outputs=output_text_transcribe, api_name="transcribe_api")
132
+ result_button_blog_post.click(process_video_url, inputs=[input_text_url, input_text_prompt], outputs=output_text_blog_post, api_name="generate_blog_post_api")
133
 
134
  # ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
135
  demo.launch()
136
+