ginipick commited on
Commit
42c866b
Β·
verified Β·
1 Parent(s): 81ad2ad

Upload app (2).py

Browse files
Files changed (1) hide show
  1. app (2).py +208 -0
app (2).py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import re
4
+ import os
5
+ import json
6
+ import time
7
+ import threading
8
+ from googleapiclient.discovery import build
9
+ from huggingface_hub import InferenceClient
10
+ from pytube import YouTube
11
+ import whisper
12
+ import logging
13
+
14
+ # 둜그 μ„€μ •
15
+ logging.basicConfig(level=logging.INFO)
16
+
17
+ # Whisper λͺ¨λΈ λ‘œλ“œ
18
+ model = whisper.load_model("base")
19
+
20
+ # YouTube API ν‚€
21
+ API_KEY = 'AIzaSyDUz3wkGal0ewRtPlzeMit88bV4hS4ZIVY'
22
+
23
+ # YouTube API μ„œλΉ„μŠ€ λΉŒλ“œ
24
+ youtube = build('youtube', 'v3', developerKey=API_KEY)
25
+
26
+ # Hugging Face API μ„€μ •
27
+ client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct", token=os.getenv("HF_TOKEN"))
28
+
29
+ WEBHOOK_URL = "https://connect.pabbly.com/workflow/sendwebhookdata/IjU3NjUwNTZhMDYzMDA0MzA1MjZhNTUzMzUxM2Ii_pc"
30
+ COMMENTS_FILE = 'comments.json'
31
+
32
+ DEFAULT_SYSTEM_PROMPT = "λŒ€ν™”μ‹œ λ°˜λ“œμ‹œ λ‚˜μ˜ 이름 'GPTube'λ₯Ό 밝히며 ν•œκΈ€λ‘œ 인사λ₯Όν•˜λΌ. λ°˜λ“œμ‹œ 'ν•œκΈ€'(ν•œκ΅­μ–΄)둜 250 토큰 μ΄λ‚΄λ‘œ 닡변을 μƒμ„±ν•˜κ³  좜λ ₯ν•˜λΌ. Respond to the following YouTube comment in a friendly and helpful manner:"
33
+
34
+ stop_event = threading.Event() # μŠ€λ ˆλ“œ 쀑지λ₯Ό μœ„ν•œ 이벀트
35
+
36
+ def load_existing_comments():
37
+ if os.path.exists(COMMENTS_FILE):
38
+ with open(COMMENTS_FILE, 'r') as file:
39
+ return json.load(file)
40
+ return []
41
+
42
+ def save_comments(comments):
43
+ with open(COMMENTS_FILE, 'w') as file:
44
+ json.dump(comments, file)
45
+
46
+ def download_audio(video_url):
47
+ yt = YouTube(video_url)
48
+ audio = yt.streams.filter(only_audio=True).first()
49
+ audio_path = audio.download(output_path=".")
50
+
51
+ file_stats = os.stat(audio_path)
52
+ logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
53
+
54
+ if file_stats.st_size <= 30000000: # Check the file size limit
55
+ base, ext = os.path.splitext(audio_path)
56
+ new_file = base + '.mp3'
57
+ os.rename(audio_path, new_file)
58
+ return new_file
59
+ else:
60
+ logging.error('Videos for transcription on this space are limited to about 1.5 hours. Please contact support for more information.')
61
+ return None
62
+
63
+ def generate_transcript(audio_path):
64
+ try:
65
+ if not audio_path or not os.path.exists(audio_path):
66
+ raise ValueError("μœ νš¨ν•œ μ˜€λ””μ˜€ 파일 κ²½λ‘œκ°€ μ•„λ‹™λ‹ˆλ‹€.")
67
+
68
+ result = model.transcribe(audio_path)
69
+ return result['text'].strip()
70
+ except Exception as e:
71
+ logging.error(f"Exception during transcription: {str(e)}")
72
+ return f"전사 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
73
+
74
+ def generate_reply(comment_text, system_prompt):
75
+ prompt = f"{system_prompt}\n\nComment: {comment_text}\n\nReply:"
76
+ response = client.text_generation(
77
+ prompt=prompt,
78
+ max_new_tokens=250,
79
+ temperature=0.7,
80
+ top_p=0.9
81
+ )
82
+ if isinstance(response, dict) and 'generated_text' in response:
83
+ return response['generated_text']
84
+ return response
85
+
86
+ def send_webhook(data):
87
+ response = requests.post(WEBHOOK_URL, json=data)
88
+ return response.status_code, response.text
89
+
90
+ def get_video_comments(video_id):
91
+ try:
92
+ comments = []
93
+ request = youtube.commentThreads().list(
94
+ part='snippet',
95
+ videoId=video_id,
96
+ maxResults=100, #λŒ“κΈ€ μ½μ–΄λ“€μ΄λŠ” 수 μ •μ˜
97
+ textFormat='plainText'
98
+ )
99
+ response = request.execute()
100
+ while request is not None:
101
+ for item in response['items']:
102
+ snippet = item['snippet']['topLevelComment']['snippet']
103
+ comment = {
104
+ 'comment_id': item['snippet']['topLevelComment']['id'],
105
+ 'author': snippet['authorDisplayName'],
106
+ 'published_at': snippet['publishedAt'],
107
+ 'text': snippet['textDisplay'],
108
+ 'reply_count': item['snippet']['totalReplyCount']
109
+ }
110
+ comments.append(comment)
111
+ if 'nextPageToken' in response:
112
+ request = youtube.commentThreads().list(
113
+ part='snippet',
114
+ videoId=video_id,
115
+ pageToken=response['nextPageToken'],
116
+ maxResults=100, #λŒ“κΈ€ μ½μ–΄λ“€μ΄λŠ” 수 μ •μ˜
117
+ textFormat='plainText'
118
+ )
119
+ response = request.execute()
120
+ else:
121
+ break
122
+ return comments
123
+ except Exception as e:
124
+ return [{'error': str(e)}]
125
+
126
+ def fetch_comments(video_url, system_prompt):
127
+ log_entries = []
128
+ video_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', video_url)
129
+ if video_id_match:
130
+ video_id = video_id_match.group(1)
131
+ audio_path = download_audio(video_url)
132
+ if not audio_path:
133
+ return "μ˜€λ””μ˜€λ₯Ό λ‹€μš΄λ‘œλ“œν•  수 μ—†μŠ΅λ‹ˆλ‹€."
134
+
135
+ transcript = generate_transcript(audio_path)
136
+
137
+ existing_comments = load_existing_comments()
138
+ new_comments = get_video_comments(video_id)
139
+
140
+ if not new_comments or 'error' in new_comments[0]:
141
+ return "λŒ“κΈ€μ„ 찾을 수 μ—†κ±°λ‚˜ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
142
+
143
+ recent_new_comments = [c for c in new_comments if c['comment_id'] not in {c['comment_id'] for c in existing_comments} and c['reply_count'] == 0]
144
+
145
+ if recent_new_comments:
146
+ for most_recent_comment in recent_new_comments:
147
+ combined_prompt = f"{transcript}\n\n{system_prompt}"
148
+ reply_text = generate_reply(most_recent_comment['text'], combined_prompt)
149
+ webhook_data = {
150
+ "comment_id": most_recent_comment['comment_id'],
151
+ "author": most_recent_comment['author'],
152
+ "published_at": most_recent_comment['published_at'],
153
+ "text": most_recent_comment['text'],
154
+ "reply_text": reply_text
155
+ }
156
+ webhook_status, webhook_response = send_webhook(webhook_data)
157
+ log_entries.append(f"졜근 λŒ“κΈ€: {most_recent_comment['text']}\n\nλ‹΅λ³€ 생성: {reply_text}\n\nμ›Ήν›… 응닡: {webhook_status} - {webhook_response}")
158
+ existing_comments.append(most_recent_comment)
159
+ save_comments(existing_comments)
160
+ else:
161
+ log_entries.append("μƒˆλ‘œμš΄ λŒ“κΈ€μ΄ μ—†μŠ΅λ‹ˆλ‹€.")
162
+ else:
163
+ log_entries.append("μœ νš¨ν•˜μ§€ μ•Šμ€ YouTube URLμž…λ‹ˆλ‹€.")
164
+ return "\n\n".join(log_entries)
165
+
166
+ def background_fetch_comments():
167
+ while not stop_event.is_set():
168
+ result = fetch_comments("https://www.youtube.com/watch?v=dQw4w9WgXcQ", DEFAULT_SYSTEM_PROMPT) # URLκ³Ό ν”„λ‘¬ν”„νŠΈ μ‹€μ œ μ‚¬μš© μ˜ˆμ‹œ
169
+ print(result)
170
+ time.sleep(10)
171
+
172
+ def start_background_fetch():
173
+ threading.Thread(target=background_fetch_comments).start()
174
+
175
+ def stop_background_fetch():
176
+ stop_event.set()
177
+
178
+ def get_text(video_url):
179
+ audio_path = download_audio(video_url)
180
+ if not audio_path:
181
+ return "μ˜€λ””μ˜€λ₯Ό λ‹€μš΄λ‘œλ“œν•  수 μ—†μŠ΅λ‹ˆλ‹€."
182
+
183
+ transcript = generate_transcript(audio_path)
184
+ return transcript
185
+
186
+ # Gradio μΈν„°νŽ˜μ΄μŠ€ μ •μ˜
187
+ demo = gr.Blocks()
188
+
189
+ with demo:
190
+ gr.Markdown("<h1><center>GPTube</center></h1>")
191
+
192
+ with gr.Row():
193
+ input_text_url = gr.Textbox(placeholder='YouTube video URL', label='YouTube URL')
194
+ input_text_prompt = gr.Textbox(placeholder='μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ', label='μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ', value=DEFAULT_SYSTEM_PROMPT, lines=5)
195
+
196
+ with gr.Row():
197
+ result_button_transcribe = gr.Button('Transcribe')
198
+ result_button_comments = gr.Button('Fetch Comments and Generate Reply')
199
+
200
+ with gr.Row():
201
+ output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript', lines=20)
202
+ output_text_prompt = gr.Textbox(placeholder='응닡 ν…μŠ€νŠΈ', label='응닡 ν…μŠ€νŠΈ', lines=20)
203
+
204
+ result_button_transcribe.click(get_text, inputs=input_text_url, outputs=output_text_transcribe, api_name="transcribe_api")
205
+ result_button_comments.click(fetch_comments, inputs=[input_text_url, input_text_prompt], outputs=output_text_prompt, api_name="fetch_comments_api")
206
+
207
+ # μΈν„°νŽ˜μ΄μŠ€ μ‹€ν–‰
208
+ demo.launch()