Spaces:

Curify
/

Digest

Sleeping

File size: 11,729 Bytes

#!/usr/bin/env python
# coding: utf-8

# In[23]:


# In[24]:


# import subprocess

# try:
#     result = subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
#     if result.returncode == 0:
#         print("FFmpeg version:")
#         print(result.stdout.split('\n')[0])  # Print the first line of the version output
#     else:
#         print("Error checking FFmpeg version:")
#         print(result.stderr)
# except FileNotFoundError:
#     print("FFmpeg is not installed or not found in PATH.")


# In[25]:

from urllib.parse import urlparse, parse_qs
import gradio as gr
import requests
from bs4 import BeautifulSoup
import openai
from openai import OpenAI
import speech_recognition as sr
from transformers import pipeline

from transformers.pipelines.audio_utils import ffmpeg_read

from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from youtube_transcript_api.formatters import TextFormatter

from urllib.parse import urlparse, parse_qs
import json

import os
import yaml
import pandas as pd
import numpy as np

from datetime import datetime, timedelta


# In[26]:

openai_api_key = os.environ["OPENAI_API_KEY"]

# In[27]:


# transcription = pipeline(
#     "automatic-speech-recognition",
#     model="openai/whisper-medium")
# result = transcription("2024_dairy.wav", return_timestamps=True)
# print(result["text"])


# In[28]:


def is_youtube_url(url):
    try:
        # Parse the URL
        parsed_url = urlparse(url)
        
        # Check if the domain is YouTube
        if parsed_url.netloc in ["www.youtube.com", "youtube.com", "m.youtube.com", "youtu.be"]:
            # For standard YouTube URLs, ensure it has a 'v' parameter
            if "youtube.com" in parsed_url.netloc:
                return "v" in parse_qs(parsed_url.query)
            # For shortened YouTube URLs (youtu.be), check the path
            elif "youtu.be" in parsed_url.netloc:
                return len(parsed_url.path.strip("/")) > 0
        return False
    except Exception as e:
        return False

def get_youtube_transcript(youtube_url):
    try:
        # Parse the video ID from the URL
        parsed_url = urlparse(youtube_url)
        video_id = parse_qs(parsed_url.query).get("v")
        
        if not video_id:
            return "Invalid YouTube URL. Please provide a valid URL."
        
        video_id = video_id[0]  # Extract the video ID
        
        # Fetch the transcript
        transcript = YouTubeTranscriptApi.get_transcript(video_id, proxies={"https": "http://localhost:8080"})

        # Format the transcript as plain text
        formatter = TextFormatter()
        formatted_transcript = formatter.format_transcript(transcript)
        
        return formatted_transcript
    
    except Exception as e:
        return f"An error occurred: {str(e)}"


# In[29]:


def check_subtitles(video_id):
    try:
        transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
        print(f"Available transcripts: {transcripts}")
        return True
    except TranscriptsDisabled:
        print("Subtitles are disabled for this video.")
        return False
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return False

# Test
video_id = "Um017R5Kr3A"  # Replace with your YouTube video ID
check_subtitles(video_id)


# In[30]:


# 设置 OpenAI API
client = OpenAI(api_key=openai_api_key)

### Curify Digest ###

# Function to fetch webpage, render it, and generate summary/perspectives
def process_webpage(url):
    try:
        if is_youtube_url(url):
            rendered_content = get_youtube_transcript(url)
        else:
            # Fetch and parse webpage
            response = requests.get(url)
            soup = BeautifulSoup(response.text, "html.parser")
            html_content = str(soup.prettify())

            for script in soup(["script", "style"]):
                script.decompose()  # Remove script and style tags
            rendered_content = soup.get_text(separator="\n").strip().replace("\n\n", "")

        text_content = rendered_content[:2000]  # Limit content length for processing

        # Generate summary and perspectives
        summary_prompt = f"Summarize the following content:\n{text_content}\n Please use the language of the originial content"
        perspectives_prompt = f"Generate a reflective review for the following content:\n{text_content}\n Please output the perspectives in no more than 5 very concise bullet points. Please use the language of the originial content"

        summary_response = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": summary_prompt}],
            max_tokens=500,
        )
        perspectives_response = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": perspectives_prompt}],
            max_tokens=500,
        )

        summary = summary_response.choices[0].message.content.strip()
        perspectives = perspectives_response.choices[0].message.content.strip()

        return rendered_content, summary, perspectives
    except Exception as e:
        return f"Error fetching or processing content: {str(e)}",  "", ""


# In[31]:


# Function for chatbot interaction
def chat_with_ai(chat_history, user_input, content):
    try:
        messages = [{"role": "system", "content": "You are a helpful assistant."}]
        
        # Add chat history
        for user, bot in chat_history:
            messages.append({"role": "user", "content": user})
            messages.append({"role": "assistant", "content": bot})

        # Add user input with webpage content
        messages.append({"role": "user", "content": f"Based on this content: {content}\n\n{user_input}"})
        
        # Call OpenAI API
        ai_response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            max_tokens=300,
        )
        reply = ai_response.choices[0].message.content.strip()
        chat_history.append((user_input, reply))
        return chat_history
    except Exception as e:
        return chat_history + [(user_input, f"Error: {str(e)}")]


# In[32]:


def generate_reflection(chat_history):
    """
    Generate a reflection based on the chat history.

    Args:
        chat_history (list of tuples): List of (user_input, ai_reply) pairs.

    Returns:
        str: A reflective summary generated by AI.
    """
    try:
        messages = [{"role": "system", "content": "You are a professional content summarizer. Generate thoughtful reflections."}]
        
        # Add conversation to messages
        for user, bot in chat_history:
            messages.append({"role": "user", "content": user})
            messages.append({"role": "assistant", "content": bot})
        
        # Prompt for reflection
        messages.append({"role": "user", "content": "Please provide a concise, reflective summary of this conversation."})
        
        # Call OpenAI API
        ai_response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            max_tokens=200,
        )
        reflection = ai_response.choices[0].message.content.strip()
        return reflection
    except Exception as e:
        return f"Error generating reflection: {str(e)}"


# In[33]:


import requests

def post_to_linkedin(access_token, reflection, visibility="PUBLIC"):
    """
    Post a reflection to LinkedIn.

    Args:
        access_token (str): LinkedIn API access token.
        reflection (str): The content to post.
        visibility (str): Visibility setting ("PUBLIC" or "CONNECTIONS"). Defaults to "PUBLIC".

    Returns:
        str: Confirmation or error message.
    """
    try:
        url = "https://api.linkedin.com/v2/ugcPosts"
        headers = {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json",
        }
        your_linkedin_person_id = 'jay'
        payload = {
            "author": f"urn:li:person:{your_linkedin_person_id}",  # Replace with your LinkedIn person URN
            "lifecycleState": "PUBLISHED",
            "visibility": {"com.linkedin.ugc.MemberNetworkVisibility": visibility},
            "specificContent": {
                "com.linkedin.ugc.ShareContent": {
                    "shareCommentary": {
                        "text": reflection
                    },
                    "shareMediaCategory": "NONE"
                }
            }
        }
        
        response = requests.post(url, headers=headers, json=payload)
        if response.status_code == 201:
            return "Reflection successfully posted to LinkedIn!"
        else:
            return f"Failed to post to LinkedIn. Error: {response.json()}"
    except Exception as e:
        return f"Error posting to LinkedIn: {str(e)}"

# JavaScript for copying text to the clipboard
copy_to_clipboard_js = """
function copyToClipboard(text) {
    navigator.clipboard.writeText(text).then(() => {
        alert("Text copied to clipboard!");
    }).catch(err => {
        alert("Failed to copy text: " + err);
    });
}
"""

# In[36]:


### Gradio Demo ###
with gr.Blocks() as demo:
    gr.Markdown("## Curify Digest: Consume and interact with content")

    with gr.Row():
        # Column 1: Webpage rendering
        with gr.Column():
            gr.Markdown("## Render Webpage")
            url_input = gr.Textbox(label="Enter URL")
            # Shared Button: Fetch content, show webpage, and summary/perspectives
            fetch_btn = gr.Button("Fetch and Process Webpage")
            text_output = gr.Textbox(label="Webpage Content", lines=7)
            summary_output = gr.Textbox(label="Summary", lines=5)
            perspectives_output = gr.Textbox(label="Perspectives", lines=5)
            
        # Column 2: Interactive chatbot
        with gr.Column():
            gr.Markdown("## Interactive Chatbot")
            chatbot_history_gr = gr.Chatbot(label="Chat History")
            user_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
            chatbot_btn = gr.Button("Chat")
            
        # Column 3: Generate reflections
        with gr.Column():
            reflection_btn = gr.Button("Generate reflection")
            reflection_output = gr.Textbox(label="Reflections", lines=5)
            # Custom HTML and JS for copy-to-clipboard
            custom_js = """
            <script>
            function copyToClipboard() {
                const textbox = document.querySelector("textarea[aria-label='Reflections']");
                if (textbox) {
                    navigator.clipboard.writeText(textbox.value).then(() => {
                        alert("Text copied to clipboard!");
                    }).catch(err => {
                        alert("Failed to copy text: " + err);
                    });
                }
            }
            </script>
            <button onclick="copyToClipboard()">Copy to clipboard</button>
            """
            gr.HTML(custom_js)
        
        fetch_btn.click(
            process_webpage, 
            inputs=url_input, 
            outputs=[text_output, summary_output, perspectives_output],
            ) 
    
        chatbot_btn.click(
            chat_with_ai, 
            inputs=[chatbot_history_gr, user_input, text_output], 
            outputs=chatbot_history_gr,
            )

        reflection_btn.click(
            generate_reflection, 
            inputs=chatbot_history_gr, 
            outputs=reflection_output,
            )

demo.launch(share=True)


# In[ ]: