File size: 11,729 Bytes
6c19f60
 
 
017294a
6c19f60
 
017294a
6c19f60
 
017294a
6c19f60
017294a
 
 
 
 
 
 
 
 
 
6c19f60
 
017294a
6c19f60
017294a
 
 
 
 
 
 
 
6c19f60
017294a
6c19f60
017294a
 
6c19f60
017294a
 
6c19f60
017294a
 
 
 
6c19f60
017294a
6c19f60
 
017294a
6c19f60
017294a
6c19f60
017294a
6c19f60
 
017294a
 
 
 
 
6c19f60
 
017294a
6c19f60
 
017294a
 
 
 
6c19f60
017294a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fde903
017294a
 
6c19f60
017294a
6c19f60
017294a
 
 
 
 
 
6c19f60
017294a
6c19f60
017294a
 
1fde903
 
017294a
1fde903
 
017294a
 
 
 
 
 
 
 
 
 
 
6c19f60
017294a
 
 
6c19f60
 
017294a
6c19f60
 
017294a
 
6c19f60
017294a
6c19f60
017294a
 
 
 
 
 
 
 
 
 
6c19f60
017294a
 
 
6c19f60
017294a
6c19f60
017294a
 
 
6c19f60
017294a
 
 
 
 
 
 
 
 
 
6c19f60
017294a
 
6c19f60
017294a
 
 
6c19f60
 
017294a
6c19f60
 
017294a
 
 
 
 
 
 
 
 
6c19f60
017294a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c19f60
be02f5b
 
 
 
 
 
 
 
 
 
73b4d62
017294a
73b4d62
6c19f60
017294a
 
7b95377
6c19f60
7c660c3
 
 
 
 
 
 
 
 
 
0937341
 
7c660c3
 
 
 
f7abd62
0937341
 
 
7c660c3
 
27acc6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be02f5b
7c660c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c19f60
27acc6d
017294a
6c19f60
27acc6d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
#!/usr/bin/env python
# coding: utf-8

# In[23]:


# In[24]:


# import subprocess

# try:
#     result = subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
#     if result.returncode == 0:
#         print("FFmpeg version:")
#         print(result.stdout.split('\n')[0])  # Print the first line of the version output
#     else:
#         print("Error checking FFmpeg version:")
#         print(result.stderr)
# except FileNotFoundError:
#     print("FFmpeg is not installed or not found in PATH.")


# In[25]:

from urllib.parse import urlparse, parse_qs
import gradio as gr
import requests
from bs4 import BeautifulSoup
import openai
from openai import OpenAI
import speech_recognition as sr
from transformers import pipeline

from transformers.pipelines.audio_utils import ffmpeg_read

from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from youtube_transcript_api.formatters import TextFormatter

from urllib.parse import urlparse, parse_qs
import json

import os
import yaml
import pandas as pd
import numpy as np

from datetime import datetime, timedelta


# In[26]:

openai_api_key = os.environ["OPENAI_API_KEY"]

# In[27]:


# transcription = pipeline(
#     "automatic-speech-recognition",
#     model="openai/whisper-medium")
# result = transcription("2024_dairy.wav", return_timestamps=True)
# print(result["text"])


# In[28]:


def is_youtube_url(url):
    try:
        # Parse the URL
        parsed_url = urlparse(url)
        
        # Check if the domain is YouTube
        if parsed_url.netloc in ["www.youtube.com", "youtube.com", "m.youtube.com", "youtu.be"]:
            # For standard YouTube URLs, ensure it has a 'v' parameter
            if "youtube.com" in parsed_url.netloc:
                return "v" in parse_qs(parsed_url.query)
            # For shortened YouTube URLs (youtu.be), check the path
            elif "youtu.be" in parsed_url.netloc:
                return len(parsed_url.path.strip("/")) > 0
        return False
    except Exception as e:
        return False

def get_youtube_transcript(youtube_url):
    try:
        # Parse the video ID from the URL
        parsed_url = urlparse(youtube_url)
        video_id = parse_qs(parsed_url.query).get("v")
        
        if not video_id:
            return "Invalid YouTube URL. Please provide a valid URL."
        
        video_id = video_id[0]  # Extract the video ID
        
        # Fetch the transcript
        transcript = YouTubeTranscriptApi.get_transcript(video_id, proxies={"https": "http://localhost:8080"})

        # Format the transcript as plain text
        formatter = TextFormatter()
        formatted_transcript = formatter.format_transcript(transcript)
        
        return formatted_transcript
    
    except Exception as e:
        return f"An error occurred: {str(e)}"


# In[29]:


def check_subtitles(video_id):
    try:
        transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
        print(f"Available transcripts: {transcripts}")
        return True
    except TranscriptsDisabled:
        print("Subtitles are disabled for this video.")
        return False
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return False

# Test
video_id = "Um017R5Kr3A"  # Replace with your YouTube video ID
check_subtitles(video_id)


# In[30]:


# 设置 OpenAI API
client = OpenAI(api_key=openai_api_key)

### Curify Digest ###

# Function to fetch webpage, render it, and generate summary/perspectives
def process_webpage(url):
    try:
        if is_youtube_url(url):
            rendered_content = get_youtube_transcript(url)
        else:
            # Fetch and parse webpage
            response = requests.get(url)
            soup = BeautifulSoup(response.text, "html.parser")
            html_content = str(soup.prettify())

            for script in soup(["script", "style"]):
                script.decompose()  # Remove script and style tags
            rendered_content = soup.get_text(separator="\n").strip().replace("\n\n", "")

        text_content = rendered_content[:2000]  # Limit content length for processing

        # Generate summary and perspectives
        summary_prompt = f"Summarize the following content:\n{text_content}\n Please use the language of the originial content"
        perspectives_prompt = f"Generate a reflective review for the following content:\n{text_content}\n Please output the perspectives in no more than 5 very concise bullet points. Please use the language of the originial content"

        summary_response = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": summary_prompt}],
            max_tokens=500,
        )
        perspectives_response = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": perspectives_prompt}],
            max_tokens=500,
        )

        summary = summary_response.choices[0].message.content.strip()
        perspectives = perspectives_response.choices[0].message.content.strip()

        return rendered_content, summary, perspectives
    except Exception as e:
        return f"Error fetching or processing content: {str(e)}",  "", ""


# In[31]:


# Function for chatbot interaction
def chat_with_ai(chat_history, user_input, content):
    try:
        messages = [{"role": "system", "content": "You are a helpful assistant."}]
        
        # Add chat history
        for user, bot in chat_history:
            messages.append({"role": "user", "content": user})
            messages.append({"role": "assistant", "content": bot})

        # Add user input with webpage content
        messages.append({"role": "user", "content": f"Based on this content: {content}\n\n{user_input}"})
        
        # Call OpenAI API
        ai_response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            max_tokens=300,
        )
        reply = ai_response.choices[0].message.content.strip()
        chat_history.append((user_input, reply))
        return chat_history
    except Exception as e:
        return chat_history + [(user_input, f"Error: {str(e)}")]


# In[32]:


def generate_reflection(chat_history):
    """
    Generate a reflection based on the chat history.

    Args:
        chat_history (list of tuples): List of (user_input, ai_reply) pairs.

    Returns:
        str: A reflective summary generated by AI.
    """
    try:
        messages = [{"role": "system", "content": "You are a professional content summarizer. Generate thoughtful reflections."}]
        
        # Add conversation to messages
        for user, bot in chat_history:
            messages.append({"role": "user", "content": user})
            messages.append({"role": "assistant", "content": bot})
        
        # Prompt for reflection
        messages.append({"role": "user", "content": "Please provide a concise, reflective summary of this conversation."})
        
        # Call OpenAI API
        ai_response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            max_tokens=200,
        )
        reflection = ai_response.choices[0].message.content.strip()
        return reflection
    except Exception as e:
        return f"Error generating reflection: {str(e)}"


# In[33]:


import requests

def post_to_linkedin(access_token, reflection, visibility="PUBLIC"):
    """
    Post a reflection to LinkedIn.

    Args:
        access_token (str): LinkedIn API access token.
        reflection (str): The content to post.
        visibility (str): Visibility setting ("PUBLIC" or "CONNECTIONS"). Defaults to "PUBLIC".

    Returns:
        str: Confirmation or error message.
    """
    try:
        url = "https://api.linkedin.com/v2/ugcPosts"
        headers = {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json",
        }
        your_linkedin_person_id = 'jay'
        payload = {
            "author": f"urn:li:person:{your_linkedin_person_id}",  # Replace with your LinkedIn person URN
            "lifecycleState": "PUBLISHED",
            "visibility": {"com.linkedin.ugc.MemberNetworkVisibility": visibility},
            "specificContent": {
                "com.linkedin.ugc.ShareContent": {
                    "shareCommentary": {
                        "text": reflection
                    },
                    "shareMediaCategory": "NONE"
                }
            }
        }
        
        response = requests.post(url, headers=headers, json=payload)
        if response.status_code == 201:
            return "Reflection successfully posted to LinkedIn!"
        else:
            return f"Failed to post to LinkedIn. Error: {response.json()}"
    except Exception as e:
        return f"Error posting to LinkedIn: {str(e)}"

# JavaScript for copying text to the clipboard
copy_to_clipboard_js = """
function copyToClipboard(text) {
    navigator.clipboard.writeText(text).then(() => {
        alert("Text copied to clipboard!");
    }).catch(err => {
        alert("Failed to copy text: " + err);
    });
}
"""

# In[36]:


### Gradio Demo ###
with gr.Blocks() as demo:
    gr.Markdown("## Curify Digest: Consume and interact with content")

    with gr.Row():
        # Column 1: Webpage rendering
        with gr.Column():
            gr.Markdown("## Render Webpage")
            url_input = gr.Textbox(label="Enter URL")
            # Shared Button: Fetch content, show webpage, and summary/perspectives
            fetch_btn = gr.Button("Fetch and Process Webpage")
            text_output = gr.Textbox(label="Webpage Content", lines=7)
            summary_output = gr.Textbox(label="Summary", lines=5)
            perspectives_output = gr.Textbox(label="Perspectives", lines=5)
            
        # Column 2: Interactive chatbot
        with gr.Column():
            gr.Markdown("## Interactive Chatbot")
            chatbot_history_gr = gr.Chatbot(label="Chat History")
            user_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
            chatbot_btn = gr.Button("Chat")
            
        # Column 3: Generate reflections
        with gr.Column():
            reflection_btn = gr.Button("Generate reflection")
            reflection_output = gr.Textbox(label="Reflections", lines=5)
            # Custom HTML and JS for copy-to-clipboard
            custom_js = """
            <script>
            function copyToClipboard() {
                const textbox = document.querySelector("textarea[aria-label='Reflections']");
                if (textbox) {
                    navigator.clipboard.writeText(textbox.value).then(() => {
                        alert("Text copied to clipboard!");
                    }).catch(err => {
                        alert("Failed to copy text: " + err);
                    });
                }
            }
            </script>
            <button onclick="copyToClipboard()">Copy to clipboard</button>
            """
            gr.HTML(custom_js)
        
        fetch_btn.click(
            process_webpage, 
            inputs=url_input, 
            outputs=[text_output, summary_output, perspectives_output],
            ) 
    
        chatbot_btn.click(
            chat_with_ai, 
            inputs=[chatbot_history_gr, user_input, text_output], 
            outputs=chatbot_history_gr,
            )

        reflection_btn.click(
            generate_reflection, 
            inputs=chatbot_history_gr, 
            outputs=reflection_output,
            )

demo.launch(share=True)


# In[ ]: