#!/usr/bin/env python # coding: utf-8 # In[23]: # In[24]: # import subprocess # try: # result = subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) # if result.returncode == 0: # print("FFmpeg version:") # print(result.stdout.split('\n')[0]) # Print the first line of the version output # else: # print("Error checking FFmpeg version:") # print(result.stderr) # except FileNotFoundError: # print("FFmpeg is not installed or not found in PATH.") # In[25]: from urllib.parse import urlparse, parse_qs import gradio as gr import requests from bs4 import BeautifulSoup import openai from openai import OpenAI import speech_recognition as sr from transformers import pipeline from transformers.pipelines.audio_utils import ffmpeg_read from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled from youtube_transcript_api.formatters import TextFormatter from urllib.parse import urlparse, parse_qs import json import os import yaml import pandas as pd import numpy as np from datetime import datetime, timedelta # In[26]: openai_api_key = os.environ["OPENAI_API_KEY"] # In[27]: # transcription = pipeline( # "automatic-speech-recognition", # model="openai/whisper-medium") # result = transcription("2024_dairy.wav", return_timestamps=True) # print(result["text"]) # In[28]: def is_youtube_url(url): try: # Parse the URL parsed_url = urlparse(url) # Check if the domain is YouTube if parsed_url.netloc in ["www.youtube.com", "youtube.com", "m.youtube.com", "youtu.be"]: # For standard YouTube URLs, ensure it has a 'v' parameter if "youtube.com" in parsed_url.netloc: return "v" in parse_qs(parsed_url.query) # For shortened YouTube URLs (youtu.be), check the path elif "youtu.be" in parsed_url.netloc: return len(parsed_url.path.strip("/")) > 0 return False except Exception as e: return False def get_youtube_transcript(youtube_url): try: # Parse the video ID from the URL parsed_url = urlparse(youtube_url) video_id = parse_qs(parsed_url.query).get("v") if not video_id: return "Invalid YouTube URL. Please provide a valid URL." video_id = video_id[0] # Extract the video ID # Fetch the transcript transcript = YouTubeTranscriptApi.get_transcript(video_id, proxies={"https": "http://localhost:8080"}) # Format the transcript as plain text formatter = TextFormatter() formatted_transcript = formatter.format_transcript(transcript) return formatted_transcript except Exception as e: return f"An error occurred: {str(e)}" # In[29]: def check_subtitles(video_id): try: transcripts = YouTubeTranscriptApi.list_transcripts(video_id) print(f"Available transcripts: {transcripts}") return True except TranscriptsDisabled: print("Subtitles are disabled for this video.") return False except Exception as e: print(f"An unexpected error occurred: {e}") return False # Test video_id = "Um017R5Kr3A" # Replace with your YouTube video ID check_subtitles(video_id) # In[30]: # 设置 OpenAI API client = OpenAI(api_key=openai_api_key) ### Curify Digest ### # Function to fetch webpage, render it, and generate summary/perspectives def process_webpage(url): try: if is_youtube_url(url): rendered_content = get_youtube_transcript(url) else: # Fetch and parse webpage response = requests.get(url) soup = BeautifulSoup(response.text, "html.parser") html_content = str(soup.prettify()) for script in soup(["script", "style"]): script.decompose() # Remove script and style tags rendered_content = soup.get_text(separator="\n").strip().replace("\n\n", "") text_content = rendered_content[:2000] # Limit content length for processing # Generate summary and perspectives summary_prompt = f"Summarize the following content:\n{text_content}\n Please use the language of the originial content" perspectives_prompt = f"Generate a reflective review for the following content:\n{text_content}\n Please output the perspectives in no more than 5 very concise bullet points. Please use the language of the originial content" summary_response = client.chat.completions.create( model="gpt-4o", messages=[{"role": "user", "content": summary_prompt}], max_tokens=500, ) perspectives_response = client.chat.completions.create( model="gpt-4o", messages=[{"role": "user", "content": perspectives_prompt}], max_tokens=500, ) summary = summary_response.choices[0].message.content.strip() perspectives = perspectives_response.choices[0].message.content.strip() return rendered_content, summary, perspectives except Exception as e: return f"Error fetching or processing content: {str(e)}", "", "" # In[31]: # Function for chatbot interaction def chat_with_ai(chat_history, user_input, content): try: messages = [{"role": "system", "content": "You are a helpful assistant."}] # Add chat history for user, bot in chat_history: messages.append({"role": "user", "content": user}) messages.append({"role": "assistant", "content": bot}) # Add user input with webpage content messages.append({"role": "user", "content": f"Based on this content: {content}\n\n{user_input}"}) # Call OpenAI API ai_response = client.chat.completions.create( model="gpt-4o", messages=messages, max_tokens=300, ) reply = ai_response.choices[0].message.content.strip() chat_history.append((user_input, reply)) return chat_history except Exception as e: return chat_history + [(user_input, f"Error: {str(e)}")] # In[32]: def generate_reflection(chat_history): """ Generate a reflection based on the chat history. Args: chat_history (list of tuples): List of (user_input, ai_reply) pairs. Returns: str: A reflective summary generated by AI. """ try: messages = [{"role": "system", "content": "You are a professional content summarizer. Generate thoughtful reflections."}] # Add conversation to messages for user, bot in chat_history: messages.append({"role": "user", "content": user}) messages.append({"role": "assistant", "content": bot}) # Prompt for reflection messages.append({"role": "user", "content": "Please provide a concise, reflective summary of this conversation."}) # Call OpenAI API ai_response = client.chat.completions.create( model="gpt-4o", messages=messages, max_tokens=200, ) reflection = ai_response.choices[0].message.content.strip() return reflection except Exception as e: return f"Error generating reflection: {str(e)}" # In[33]: import requests def post_to_linkedin(access_token, reflection, visibility="PUBLIC"): """ Post a reflection to LinkedIn. Args: access_token (str): LinkedIn API access token. reflection (str): The content to post. visibility (str): Visibility setting ("PUBLIC" or "CONNECTIONS"). Defaults to "PUBLIC". Returns: str: Confirmation or error message. """ try: url = "https://api.linkedin.com/v2/ugcPosts" headers = { "Authorization": f"Bearer {access_token}", "Content-Type": "application/json", } your_linkedin_person_id = 'jay' payload = { "author": f"urn:li:person:{your_linkedin_person_id}", # Replace with your LinkedIn person URN "lifecycleState": "PUBLISHED", "visibility": {"com.linkedin.ugc.MemberNetworkVisibility": visibility}, "specificContent": { "com.linkedin.ugc.ShareContent": { "shareCommentary": { "text": reflection }, "shareMediaCategory": "NONE" } } } response = requests.post(url, headers=headers, json=payload) if response.status_code == 201: return "Reflection successfully posted to LinkedIn!" else: return f"Failed to post to LinkedIn. Error: {response.json()}" except Exception as e: return f"Error posting to LinkedIn: {str(e)}" # JavaScript for copying text to the clipboard copy_to_clipboard_js = """ function copyToClipboard(text) { navigator.clipboard.writeText(text).then(() => { alert("Text copied to clipboard!"); }).catch(err => { alert("Failed to copy text: " + err); }); } """ # In[36]: ### Gradio Demo ### with gr.Blocks() as demo: gr.Markdown("## Curify Digest: Consume and interact with content") with gr.Row(): # Column 1: Webpage rendering with gr.Column(): gr.Markdown("## Render Webpage") url_input = gr.Textbox(label="Enter URL") # Shared Button: Fetch content, show webpage, and summary/perspectives fetch_btn = gr.Button("Fetch and Process Webpage") text_output = gr.Textbox(label="Webpage Content", lines=7) summary_output = gr.Textbox(label="Summary", lines=5) perspectives_output = gr.Textbox(label="Perspectives", lines=5) # Column 2: Interactive chatbot with gr.Column(): gr.Markdown("## Interactive Chatbot") chatbot_history_gr = gr.Chatbot(label="Chat History") user_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...") chatbot_btn = gr.Button("Chat") # Column 3: Generate reflections with gr.Column(): reflection_btn = gr.Button("Generate reflection") reflection_output = gr.Textbox(label="Reflections", lines=5) # Custom HTML and JS for copy-to-clipboard custom_js = """ """ gr.HTML(custom_js) fetch_btn.click( process_webpage, inputs=url_input, outputs=[text_output, summary_output, perspectives_output], ) chatbot_btn.click( chat_with_ai, inputs=[chatbot_history_gr, user_input, text_output], outputs=chatbot_history_gr, ) reflection_btn.click( generate_reflection, inputs=chatbot_history_gr, outputs=reflection_output, ) demo.launch(share=True) # In[ ]: