Spaces:

ginipick
/

tube

Sleeping

App Files Files Community

ginipick commited on Jul 19, 2024

Commit

42c866b

verified ·

1 Parent(s): 81ad2ad

Upload app (2).py

Browse files

Files changed (1) hide show

app (2).py +208 -0

app (2).py ADDED Viewed

	@@ -0,0 +1,208 @@

+import gradio as gr
+import requests
+import re
+import os
+import json
+import time
+import threading
+from googleapiclient.discovery import build
+from huggingface_hub import InferenceClient
+from pytube import YouTube
+import whisper
+import logging
+# 로그 설정
+logging.basicConfig(level=logging.INFO)
+# Whisper 모델 로드
+model = whisper.load_model("base")
+# YouTube API 키
+API_KEY = 'AIzaSyDUz3wkGal0ewRtPlzeMit88bV4hS4ZIVY'
+# YouTube API 서비스 빌드
+youtube = build('youtube', 'v3', developerKey=API_KEY)
+# Hugging Face API 설정
+client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct", token=os.getenv("HF_TOKEN"))
+WEBHOOK_URL = "https://connect.pabbly.com/workflow/sendwebhookdata/IjU3NjUwNTZhMDYzMDA0MzA1MjZhNTUzMzUxM2Ii_pc"
+COMMENTS_FILE = 'comments.json'
+DEFAULT_SYSTEM_PROMPT = "대화시 반드시 나의 이름 'GPTube'를 밝히며 한글로 인사를하라. 반드시 '한글'(한국어)로 250 토큰 이내로 답변을 생성하고 출력하라. Respond to the following YouTube comment in a friendly and helpful manner:"
+stop_event = threading.Event()  # 스레드 중지를 위한 이벤트
+def load_existing_comments():
+    if os.path.exists(COMMENTS_FILE):
+        with open(COMMENTS_FILE, 'r') as file:
+            return json.load(file)
+    return []
+def save_comments(comments):
+    with open(COMMENTS_FILE, 'w') as file:
+        json.dump(comments, file)
+def download_audio(video_url):
+    yt = YouTube(video_url)
+    audio = yt.streams.filter(only_audio=True).first()
+    audio_path = audio.download(output_path=".")
+    file_stats = os.stat(audio_path)
+    logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
+    if file_stats.st_size <= 30000000:  # Check the file size limit
+        base, ext = os.path.splitext(audio_path)
+        new_file = base + '.mp3'
+        os.rename(audio_path, new_file)
+        return new_file
+    else:
+        logging.error('Videos for transcription on this space are limited to about 1.5 hours. Please contact support for more information.')
+        return None
+def generate_transcript(audio_path):
+    try:
+        if not audio_path or not os.path.exists(audio_path):
+            raise ValueError("유효한 오디오 파일 경로가 아닙니다.")
+        result = model.transcribe(audio_path)
+        return result['text'].strip()
+    except Exception as e:
+        logging.error(f"Exception during transcription: {str(e)}")
+        return f"전사 중 오류가 발생했습니다: {str(e)}"
+def generate_reply(comment_text, system_prompt):
+    prompt = f"{system_prompt}\n\nComment: {comment_text}\n\nReply:"
+    response = client.text_generation(
+        prompt=prompt,
+        max_new_tokens=250,
+        temperature=0.7,
+        top_p=0.9
+    )
+    if isinstance(response, dict) and 'generated_text' in response:
+        return response['generated_text']
+    return response
+def send_webhook(data):
+    response = requests.post(WEBHOOK_URL, json=data)
+    return response.status_code, response.text
+def get_video_comments(video_id):
+    try:
+        comments = []
+        request = youtube.commentThreads().list(
+            part='snippet',
+            videoId=video_id,
+            maxResults=100,   #댓글 읽어들이는 수 정의
+            textFormat='plainText'
+        )
+        response = request.execute()
+        while request is not None:
+            for item in response['items']:
+                snippet = item['snippet']['topLevelComment']['snippet']
+                comment = {
+                    'comment_id': item['snippet']['topLevelComment']['id'],
+                    'author': snippet['authorDisplayName'],
+                    'published_at': snippet['publishedAt'],
+                    'text': snippet['textDisplay'],
+                    'reply_count': item['snippet']['totalReplyCount']
+                }
+                comments.append(comment)
+            if 'nextPageToken' in response:
+                request = youtube.commentThreads().list(
+                    part='snippet',
+                    videoId=video_id,
+                    pageToken=response['nextPageToken'],
+                    maxResults=100,  #댓글 읽어들이는 수 정의
+                    textFormat='plainText'
+                )
+                response = request.execute()
+            else:
+                break
+        return comments
+    except Exception as e:
+        return [{'error': str(e)}]
+def fetch_comments(video_url, system_prompt):
+    log_entries = []
+    video_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', video_url)
+    if video_id_match:
+        video_id = video_id_match.group(1)
+        audio_path = download_audio(video_url)
+        if not audio_path:
+            return "오디오를 다운로드할 수 없습니다."
+        transcript = generate_transcript(audio_path)
+        existing_comments = load_existing_comments()
+        new_comments = get_video_comments(video_id)
+        if not new_comments or 'error' in new_comments[0]:
+            return "댓글을 찾을 수 없거나 오류가 발생했습니다."
+        recent_new_comments = [c for c in new_comments if c['comment_id'] not in {c['comment_id'] for c in existing_comments} and c['reply_count'] == 0]
+        if recent_new_comments:
+            for most_recent_comment in recent_new_comments:
+                combined_prompt = f"{transcript}\n\n{system_prompt}"
+                reply_text = generate_reply(most_recent_comment['text'], combined_prompt)
+                webhook_data = {
+                    "comment_id": most_recent_comment['comment_id'],
+                    "author": most_recent_comment['author'],
+                    "published_at": most_recent_comment['published_at'],
+                    "text": most_recent_comment['text'],
+                    "reply_text": reply_text
+                }
+                webhook_status, webhook_response = send_webhook(webhook_data)
+                log_entries.append(f"최근 댓글: {most_recent_comment['text']}\n\n답변 생성: {reply_text}\n\n웹훅 응답: {webhook_status} - {webhook_response}")
+                existing_comments.append(most_recent_comment)
+            save_comments(existing_comments)
+        else:
+            log_entries.append("새로운 댓글이 없습니다.")
+    else:
+        log_entries.append("유효하지 않은 YouTube URL입니다.")
+    return "\n\n".join(log_entries)
+def background_fetch_comments():
+    while not stop_event.is_set():
+        result = fetch_comments("https://www.youtube.com/watch?v=dQw4w9WgXcQ", DEFAULT_SYSTEM_PROMPT)  # URL과 프롬프트 실제 사용 예시
+        print(result)
+        time.sleep(10)
+def start_background_fetch():
+    threading.Thread(target=background_fetch_comments).start()
+def stop_background_fetch():
+    stop_event.set()
+def get_text(video_url):
+    audio_path = download_audio(video_url)
+    if not audio_path:
+        return "오디오를 다운로드할 수 없습니다."
+    transcript = generate_transcript(audio_path)
+    return transcript
+# Gradio 인터페이스 정의
+demo = gr.Blocks()
+with demo:
+    gr.Markdown("<h1><center>GPTube</center></h1>")
+    with gr.Row():
+        input_text_url = gr.Textbox(placeholder='YouTube video URL', label='YouTube URL')
+        input_text_prompt = gr.Textbox(placeholder='시스템 프롬프트', label='시스템 프롬프트', value=DEFAULT_SYSTEM_PROMPT, lines=5)
+    with gr.Row():
+        result_button_transcribe = gr.Button('Transcribe')
+        result_button_comments = gr.Button('Fetch Comments and Generate Reply')
+    with gr.Row():
+        output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript', lines=20)
+        output_text_prompt = gr.Textbox(placeholder='응답 텍스트', label='응답 텍스트', lines=20)
+    result_button_transcribe.click(get_text, inputs=input_text_url, outputs=output_text_transcribe, api_name="transcribe_api")
+    result_button_comments.click(fetch_comments, inputs=[input_text_url, input_text_prompt], outputs=output_text_prompt, api_name="fetch_comments_api")
+# 인터페이스 실행
+demo.launch()