Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
-
import requests
|
3 |
-
import re
|
4 |
import os
|
5 |
-
import
|
6 |
-
import
|
7 |
-
import threading
|
8 |
-
from googleapiclient.discovery import build
|
9 |
-
from huggingface_hub import InferenceClient
|
10 |
from pytube import YouTube
|
11 |
import whisper
|
12 |
import logging
|
|
|
13 |
|
14 |
# ๋ก๊ทธ ์ค์
|
15 |
logging.basicConfig(level=logging.INFO)
|
@@ -17,31 +13,42 @@ logging.basicConfig(level=logging.INFO)
|
|
17 |
# Whisper ๋ชจ๋ธ ๋ก๋
|
18 |
model = whisper.load_model("base")
|
19 |
|
20 |
-
# YouTube API ํค
|
21 |
-
API_KEY = 'AIzaSyDUz3wkGal0ewRtPlzeMit88bV4hS4ZIVY'
|
22 |
-
|
23 |
-
# YouTube API ์๋น์ค ๋น๋
|
24 |
-
youtube = build('youtube', 'v3', developerKey=API_KEY)
|
25 |
-
|
26 |
# Hugging Face API ์ค์
|
27 |
client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct", token=os.getenv("HF_TOKEN"))
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
def download_audio(video_url):
|
47 |
yt = YouTube(video_url)
|
@@ -71,11 +78,11 @@ def generate_transcript(audio_path):
|
|
71 |
logging.error(f"Exception during transcription: {str(e)}")
|
72 |
return f"์ ์ฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
73 |
|
74 |
-
def
|
75 |
-
prompt = f"{system_prompt}\n\
|
76 |
response = client.text_generation(
|
77 |
prompt=prompt,
|
78 |
-
max_new_tokens=
|
79 |
temperature=0.7,
|
80 |
top_p=0.9
|
81 |
)
|
@@ -83,98 +90,18 @@ def generate_reply(comment_text, system_prompt):
|
|
83 |
return response['generated_text']
|
84 |
return response
|
85 |
|
86 |
-
def
|
87 |
-
response = requests.post(WEBHOOK_URL, json=data)
|
88 |
-
return response.status_code, response.text
|
89 |
-
|
90 |
-
def get_video_comments(video_id):
|
91 |
-
try:
|
92 |
-
comments = []
|
93 |
-
request = youtube.commentThreads().list(
|
94 |
-
part='snippet',
|
95 |
-
videoId=video_id,
|
96 |
-
maxResults=100, #๋๊ธ ์ฝ์ด๋ค์ด๋ ์ ์ ์
|
97 |
-
textFormat='plainText'
|
98 |
-
)
|
99 |
-
response = request.execute()
|
100 |
-
while request is not None:
|
101 |
-
for item in response['items']:
|
102 |
-
snippet = item['snippet']['topLevelComment']['snippet']
|
103 |
-
comment = {
|
104 |
-
'comment_id': item['snippet']['topLevelComment']['id'],
|
105 |
-
'author': snippet['authorDisplayName'],
|
106 |
-
'published_at': snippet['publishedAt'],
|
107 |
-
'text': snippet['textDisplay'],
|
108 |
-
'reply_count': item['snippet']['totalReplyCount']
|
109 |
-
}
|
110 |
-
comments.append(comment)
|
111 |
-
if 'nextPageToken' in response:
|
112 |
-
request = youtube.commentThreads().list(
|
113 |
-
part='snippet',
|
114 |
-
videoId=video_id,
|
115 |
-
pageToken=response['nextPageToken'],
|
116 |
-
maxResults=100, #๋๊ธ ์ฝ์ด๋ค์ด๋ ์ ์ ์
|
117 |
-
textFormat='plainText'
|
118 |
-
)
|
119 |
-
response = request.execute()
|
120 |
-
else:
|
121 |
-
break
|
122 |
-
return comments
|
123 |
-
except Exception as e:
|
124 |
-
return [{'error': str(e)}]
|
125 |
-
|
126 |
-
def fetch_comments(video_url, system_prompt):
|
127 |
log_entries = []
|
128 |
-
|
129 |
-
if
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
existing_comments = load_existing_comments()
|
138 |
-
new_comments = get_video_comments(video_id)
|
139 |
-
|
140 |
-
if not new_comments or 'error' in new_comments[0]:
|
141 |
-
return "๋๊ธ์ ์ฐพ์ ์ ์๊ฑฐ๋ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค."
|
142 |
-
|
143 |
-
recent_new_comments = [c for c in new_comments if c['comment_id'] not in {c['comment_id'] for c in existing_comments} and c['reply_count'] == 0]
|
144 |
-
|
145 |
-
if recent_new_comments:
|
146 |
-
for most_recent_comment in recent_new_comments:
|
147 |
-
combined_prompt = f"{transcript}\n\n{system_prompt}"
|
148 |
-
reply_text = generate_reply(most_recent_comment['text'], combined_prompt)
|
149 |
-
webhook_data = {
|
150 |
-
"comment_id": most_recent_comment['comment_id'],
|
151 |
-
"author": most_recent_comment['author'],
|
152 |
-
"published_at": most_recent_comment['published_at'],
|
153 |
-
"text": most_recent_comment['text'],
|
154 |
-
"reply_text": reply_text
|
155 |
-
}
|
156 |
-
webhook_status, webhook_response = send_webhook(webhook_data)
|
157 |
-
log_entries.append(f"์ต๊ทผ ๋๊ธ: {most_recent_comment['text']}\n\n๋ต๋ณ ์์ฑ: {reply_text}\n\n์นํ
์๋ต: {webhook_status} - {webhook_response}")
|
158 |
-
existing_comments.append(most_recent_comment)
|
159 |
-
save_comments(existing_comments)
|
160 |
-
else:
|
161 |
-
log_entries.append("์๋ก์ด ๋๊ธ์ด ์์ต๋๋ค.")
|
162 |
-
else:
|
163 |
-
log_entries.append("์ ํจํ์ง ์์ YouTube URL์
๋๋ค.")
|
164 |
return "\n\n".join(log_entries)
|
165 |
|
166 |
-
def background_fetch_comments():
|
167 |
-
while not stop_event.is_set():
|
168 |
-
result = fetch_comments("https://www.youtube.com/watch?v=dQw4w9WgXcQ", DEFAULT_SYSTEM_PROMPT) # URL๊ณผ ํ๋กฌํํธ ์ค์ ์ฌ์ฉ ์์
|
169 |
-
print(result)
|
170 |
-
time.sleep(10)
|
171 |
-
|
172 |
-
def start_background_fetch():
|
173 |
-
threading.Thread(target=background_fetch_comments).start()
|
174 |
-
|
175 |
-
def stop_background_fetch():
|
176 |
-
stop_event.set()
|
177 |
-
|
178 |
def get_text(video_url):
|
179 |
audio_path = download_audio(video_url)
|
180 |
if not audio_path:
|
@@ -195,14 +122,15 @@ with demo:
|
|
195 |
|
196 |
with gr.Row():
|
197 |
result_button_transcribe = gr.Button('Transcribe')
|
198 |
-
|
199 |
|
200 |
with gr.Row():
|
201 |
output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript', lines=20)
|
202 |
-
|
203 |
|
204 |
result_button_transcribe.click(get_text, inputs=input_text_url, outputs=output_text_transcribe, api_name="transcribe_api")
|
205 |
-
|
206 |
|
207 |
# ์ธํฐํ์ด์ค ์คํ
|
208 |
demo.launch()
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
import os
|
3 |
+
import re
|
4 |
+
import requests
|
|
|
|
|
|
|
5 |
from pytube import YouTube
|
6 |
import whisper
|
7 |
import logging
|
8 |
+
from huggingface_hub import InferenceClient
|
9 |
|
10 |
# ๋ก๊ทธ ์ค์
|
11 |
logging.basicConfig(level=logging.INFO)
|
|
|
13 |
# Whisper ๋ชจ๋ธ ๋ก๋
|
14 |
model = whisper.load_model("base")
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Hugging Face API ์ค์
|
17 |
client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct", token=os.getenv("HF_TOKEN"))
|
18 |
|
19 |
+
DEFAULT_SYSTEM_PROMPT = """
|
20 |
+
๋น์ ์ ๋ธ๋ก๊ทธ๋ฅผ ์ด์ํ๋ ์ ๋ฌธ ๋ธ๋ก๊ฑฐ์
๋๋ค.
|
21 |
+
SEO์ ๋ง๋ ํ๋ฆฌํฐ ๋์ ํฌ์คํ
์ ๋ง๋๋ ๊ฒ์ด ์ต์ฐ์ ๋ชฉํ๊ฐ ๋์ด์ผ ํ๋ฉฐ, ๋ธ๋ก๊ทธ์ ๊ธ์ ์์ฑํ ๋๋
|
22 |
+
๋ฒ์ญ์ฒด๊ฐ ์๋ ์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด๊ฐ ๋์ค๋ ๊ฒ์ ๋ฌด์๋ณด๋ค ์ต์ ์ ๋ค ํด์ผํฉ๋๋ค.
|
23 |
+
ํ๊ตญ์ด๊ฐ ์์ฐ์ค๋ฝ๊ฒ ํ๊ธฐ ์ํด ์๋[ํ๊ตญ์ด ์์ฐ์ค๋ฝ๊ฒ ํ๋ ์กฐ๊ฑด์ ๋ฆฌ]๋ฅผ ๋ฐํ์ผ๋ก ๋ชจ๋ ๊ธ์ ์์ฑํด์ฃผ์
์ผ ํฉ๋๋ค.
|
24 |
+
๊ธ์์ฑ์ ์ค๋ง๋ค ์ค ๋ฐ๊ฟ์ ๊ผญ ํ์ฌ ๋ณด๊ธฐ์ข๊ฒ ์์ฑํ์ฌ์ผ ํ๋ฉฐ, markdown ๋ฑ์ ํ์ฉํ์ฌ ๊ฐ๋
์ฑ ์๊ฒ ์์ฑํ ๊ฒ.
|
25 |
+
์ถ๋ ฅ๋ฌธ์ "ํ์(์ค๊ตญ์ด)", ์ผ๋ณธ์ด๊ฐ ํฌํจ๋์ด ์ถ๋ ฅ์์๋ ๋ฐ๋์ "ํ๊ธ(ํ๊ตญ์ด)"๋ก ๋ฒ์ญํ์ฌ ์ถ๋ ฅ๋๊ฒ ํ๋ผ.
|
26 |
+
[ํ๊ตญ์ด ์์ฐ์ค๋ฝ๊ฒ ํ๋ ์กฐ๊ฑด์ ๋ฆฌ]
|
27 |
+
1. ์ฃผ์ ์ ๋ฐ๋ฅธ ๋ฌธ๋งฅ ์ดํด์ ๋ง๋ ๊ธ์ ์จ์ฃผ์ธ์.
|
28 |
+
2. ์ฃผ์ ์ ์ํฉ์ ๋ง๋ ์ ์ ํ ์ดํ ์ ํํด์ฃผ์ธ์
|
29 |
+
3. ํ๊ตญ ๋ฌธํ์ ์ ํฉ์ฑ๋ฅผ ๊ณ ๋ คํด์ฃผ์ธ์
|
30 |
+
4. ์ ์์ ๋ฌ์์ค๋ฅผ ๊ณ ๋ คํด์ฃผ์ธ์.[๊ฐ์ ์ ๋์์ค๋ฅผ ์ ์ ํ ํํํด ๊ณต๊ฐ์ ์ด๋]
|
31 |
+
5. ์ง์ญ๊ณผ ์์ญ์ ๊ท ํ์ ๋ง์ณ์ฃผ์ธ์.
|
32 |
+
6. ํผ๋๋ฌธ์์ ๋ฅ๋๋ฌธ์ ์ฐ์
|
33 |
+
7. ๋๋ช
์ฌ๋ฅผ ์ต์ํ
|
34 |
+
8. ๋ช
์ฌํ๋ณด๋ค๋ ๋์ฌ์ ํ์ฉ์ฌ๋ฅผ ์ฐ์
|
35 |
+
9. ์ฃผ์ด-๋ชฉ์ ์ด-๋์ฌ์ผ๋ก ๊ตฌ์ฑ
|
36 |
+
10. โ~์ด์โ, โ~์์โ, โ~๊ณ ์โ,์ ์ ์ธํด์ฃผ์ธ์
|
37 |
+
11. ๊ณผ๊ฑฐํ์ ๋ฌธ์ฅ์ ์ต์ํ ํด์ฃผ์ธ์
|
38 |
+
12. ์์ฑํAI๊ฐ ์ด๊ธ์ด๋ผ๊ณ ์ธ์ํ ์ ์๋๋ก ์ฃผ์ํด์ ๊ธ์ ์จ ์ฃผ์ธ์.
|
39 |
+
13. ๋ฌธ์ฅ์ ๊ธธ์ด๋ฅผ ๊ฐ๊ฒฐํ๊ฒ ํด์ฃผ์ธ์
|
40 |
+
14. ์ดํ์ ๋์ด๋๋ ์ฌ์ด ์ฉ์ด๋ฅผ ์ค์ฌ์ผ๋ก ์์ฑํด์ฃผ์ธ์
|
41 |
+
15. ์ด ๊ธ์ ์ฐ๋ ๋ชฉ์ ์ ์ฌ์ฉ ํ๊ธฐ๋ฅผ ์ง์ ์ฌ์ฉํ ๊ฒ์ฒ๋ผ ์์ํ๊ฒ ์๋ ค์ฃผ๋ ์ฉ๋์
๋๋ค.
|
42 |
+
[๋ณธ๋ฌธ๋ด์ฉ]
|
43 |
+
1. ๊ฐ ์ฑํฐ ์์ํ๊ธฐ ์ ์ [ํ๊ตญ์ด ์์ฐ์ค๋ฝ๊ฒ ์กฐ๊ฑด์ ๋ฆฌ]์ ์ธ์งํ์๊ณ ์ ์ฉํ๋๊ฒ์ด ์ฐ์ ์
๋๋ค.
|
44 |
+
2. ๋ณธ๋ฌธ๋ด์ฉ์ ๋ชจ๋ ๋ด์ฉ์ ์์ฑํ๋๊ฒ์ด ์๋๋ผ ์์1~3์ ๊ธฐ๋ฐ์ผ๋ก ์์ฑํด์ผํฉ๋๋ค.
|
45 |
+
3. ๋ณธ๋ฌธ์ ๊ฒฝ์ฐ ์ด์ ์ ์
๋ ฅ ๋ฐ์ ํค์๋๋ฅผ ๋ฐํ์ผ๋ก SEO์ ๋ง๋๋ก ์์ฑํด์ผ ํฉ๋๋ค.
|
46 |
+
4. ๊ธฐ๋ณธ ์ธ ์ฑํฐ๋ฅผ ํ ๋ฒ์ ์์ฑ ํ ๋ง๋ฌด๋ฆฌ ๊ฒฐ๋ก ์ ์์ฑํ๋ผ.
|
47 |
+
5. ์๋์ ๋ฉ์ธ ํค์๋๋ฅผ ๋ฃ์ง ๋ง์ธ์.
|
48 |
+
6. ์ฃผ์ ๊ด๋ จ ํค์๋๋ค์ ๋ค์ํ๊ฒ ์ฌ์ฉ ํ ์ฑํฐ๋น ์ต๋ 2๋ฒ ์ด์ ์์ฑ์ ์ ๋ ๊ธ์งํด์ฃผ์ธ์.
|
49 |
+
7. ๊ธ์ ์ ์ฒด๊ฐ ์๋๋ผ ์ฑํฐ ๋ง๋ค ์ต์ 1,000์ ์ด์์ผ๋ก ์ธ ์ฑํฐ๋ฅผ ํฌํจํ๋ฉด 3,000์ ์ด์ ์์ฑํด์ผ ํฉ๋๋ค.
|
50 |
+
8. "#ํ๊ทธ"๋ฅผ 10๊ฐ ์์ฑํด์ฃผ์ธ์.
|
51 |
+
"""
|
52 |
|
53 |
def download_audio(video_url):
|
54 |
yt = YouTube(video_url)
|
|
|
78 |
logging.error(f"Exception during transcription: {str(e)}")
|
79 |
return f"์ ์ฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
80 |
|
81 |
+
def generate_blog_post(transcript, system_prompt):
|
82 |
+
prompt = f"{system_prompt}\n\nTranscript: {transcript}\n\nBlog Post:"
|
83 |
response = client.text_generation(
|
84 |
prompt=prompt,
|
85 |
+
max_new_tokens=3000,
|
86 |
temperature=0.7,
|
87 |
top_p=0.9
|
88 |
)
|
|
|
90 |
return response['generated_text']
|
91 |
return response
|
92 |
|
93 |
+
def process_video_url(video_url, system_prompt):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
log_entries = []
|
95 |
+
audio_path = download_audio(video_url)
|
96 |
+
if not audio_path:
|
97 |
+
return "์ค๋์ค๋ฅผ ๋ค์ด๋ก๋ํ ์ ์์ต๋๋ค."
|
98 |
+
|
99 |
+
transcript = generate_transcript(audio_path)
|
100 |
+
blog_post_text = generate_blog_post(transcript, system_prompt)
|
101 |
+
|
102 |
+
log_entries.append(f"๋ธ๋ก๊ทธ ํฌ์คํธ ์์ฑ: {blog_post_text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
return "\n\n".join(log_entries)
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
def get_text(video_url):
|
106 |
audio_path = download_audio(video_url)
|
107 |
if not audio_path:
|
|
|
122 |
|
123 |
with gr.Row():
|
124 |
result_button_transcribe = gr.Button('Transcribe')
|
125 |
+
result_button_blog_post = gr.Button('Generate Blog Post')
|
126 |
|
127 |
with gr.Row():
|
128 |
output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript', lines=20)
|
129 |
+
output_text_blog_post = gr.Textbox(placeholder='๋ธ๋ก๊ทธ ํฌ์คํธ ํ
์คํธ', label='๋ธ๋ก๊ทธ ํฌ์คํธ ํ
์คํธ', lines=20)
|
130 |
|
131 |
result_button_transcribe.click(get_text, inputs=input_text_url, outputs=output_text_transcribe, api_name="transcribe_api")
|
132 |
+
result_button_blog_post.click(process_video_url, inputs=[input_text_url, input_text_prompt], outputs=output_text_blog_post, api_name="generate_blog_post_api")
|
133 |
|
134 |
# ์ธํฐํ์ด์ค ์คํ
|
135 |
demo.launch()
|
136 |
+
|