Digest / app.py
qqwjq1981's picture
Update app.py
f7abd62 verified
raw
history blame
11.7 kB
#!/usr/bin/env python
# coding: utf-8
# In[23]:
# In[24]:
# import subprocess
# try:
# result = subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
# if result.returncode == 0:
# print("FFmpeg version:")
# print(result.stdout.split('\n')[0]) # Print the first line of the version output
# else:
# print("Error checking FFmpeg version:")
# print(result.stderr)
# except FileNotFoundError:
# print("FFmpeg is not installed or not found in PATH.")
# In[25]:
from urllib.parse import urlparse, parse_qs
import gradio as gr
import requests
from bs4 import BeautifulSoup
import openai
from openai import OpenAI
import speech_recognition as sr
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from youtube_transcript_api.formatters import TextFormatter
from urllib.parse import urlparse, parse_qs
import json
import os
import yaml
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
# In[26]:
openai_api_key = os.environ["OPENAI_API_KEY"]
# In[27]:
# transcription = pipeline(
# "automatic-speech-recognition",
# model="openai/whisper-medium")
# result = transcription("2024_dairy.wav", return_timestamps=True)
# print(result["text"])
# In[28]:
def is_youtube_url(url):
try:
# Parse the URL
parsed_url = urlparse(url)
# Check if the domain is YouTube
if parsed_url.netloc in ["www.youtube.com", "youtube.com", "m.youtube.com", "youtu.be"]:
# For standard YouTube URLs, ensure it has a 'v' parameter
if "youtube.com" in parsed_url.netloc:
return "v" in parse_qs(parsed_url.query)
# For shortened YouTube URLs (youtu.be), check the path
elif "youtu.be" in parsed_url.netloc:
return len(parsed_url.path.strip("/")) > 0
return False
except Exception as e:
return False
def get_youtube_transcript(youtube_url):
try:
# Parse the video ID from the URL
parsed_url = urlparse(youtube_url)
video_id = parse_qs(parsed_url.query).get("v")
if not video_id:
return "Invalid YouTube URL. Please provide a valid URL."
video_id = video_id[0] # Extract the video ID
# Fetch the transcript
transcript = YouTubeTranscriptApi.get_transcript(video_id, proxies={"https": "http://localhost:8080"})
# Format the transcript as plain text
formatter = TextFormatter()
formatted_transcript = formatter.format_transcript(transcript)
return formatted_transcript
except Exception as e:
return f"An error occurred: {str(e)}"
# In[29]:
def check_subtitles(video_id):
try:
transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
print(f"Available transcripts: {transcripts}")
return True
except TranscriptsDisabled:
print("Subtitles are disabled for this video.")
return False
except Exception as e:
print(f"An unexpected error occurred: {e}")
return False
# Test
video_id = "Um017R5Kr3A" # Replace with your YouTube video ID
check_subtitles(video_id)
# In[30]:
# 设置 OpenAI API
client = OpenAI(api_key=openai_api_key)
### Curify Digest ###
# Function to fetch webpage, render it, and generate summary/perspectives
def process_webpage(url):
try:
if is_youtube_url(url):
rendered_content = get_youtube_transcript(url)
else:
# Fetch and parse webpage
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
html_content = str(soup.prettify())
for script in soup(["script", "style"]):
script.decompose() # Remove script and style tags
rendered_content = soup.get_text(separator="\n").strip().replace("\n\n", "")
text_content = rendered_content[:2000] # Limit content length for processing
# Generate summary and perspectives
summary_prompt = f"Summarize the following content:\n{text_content}\n Please use the language of the originial content"
perspectives_prompt = f"Generate a reflective review for the following content:\n{text_content}\n Please output the perspectives in no more than 5 very concise bullet points. Please use the language of the originial content"
summary_response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": summary_prompt}],
max_tokens=500,
)
perspectives_response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": perspectives_prompt}],
max_tokens=500,
)
summary = summary_response.choices[0].message.content.strip()
perspectives = perspectives_response.choices[0].message.content.strip()
return rendered_content, summary, perspectives
except Exception as e:
return f"Error fetching or processing content: {str(e)}", "", ""
# In[31]:
# Function for chatbot interaction
def chat_with_ai(chat_history, user_input, content):
try:
messages = [{"role": "system", "content": "You are a helpful assistant."}]
# Add chat history
for user, bot in chat_history:
messages.append({"role": "user", "content": user})
messages.append({"role": "assistant", "content": bot})
# Add user input with webpage content
messages.append({"role": "user", "content": f"Based on this content: {content}\n\n{user_input}"})
# Call OpenAI API
ai_response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
max_tokens=300,
)
reply = ai_response.choices[0].message.content.strip()
chat_history.append((user_input, reply))
return chat_history
except Exception as e:
return chat_history + [(user_input, f"Error: {str(e)}")]
# In[32]:
def generate_reflection(chat_history):
"""
Generate a reflection based on the chat history.
Args:
chat_history (list of tuples): List of (user_input, ai_reply) pairs.
Returns:
str: A reflective summary generated by AI.
"""
try:
messages = [{"role": "system", "content": "You are a professional content summarizer. Generate thoughtful reflections."}]
# Add conversation to messages
for user, bot in chat_history:
messages.append({"role": "user", "content": user})
messages.append({"role": "assistant", "content": bot})
# Prompt for reflection
messages.append({"role": "user", "content": "Please provide a concise, reflective summary of this conversation."})
# Call OpenAI API
ai_response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
max_tokens=200,
)
reflection = ai_response.choices[0].message.content.strip()
return reflection
except Exception as e:
return f"Error generating reflection: {str(e)}"
# In[33]:
import requests
def post_to_linkedin(access_token, reflection, visibility="PUBLIC"):
"""
Post a reflection to LinkedIn.
Args:
access_token (str): LinkedIn API access token.
reflection (str): The content to post.
visibility (str): Visibility setting ("PUBLIC" or "CONNECTIONS"). Defaults to "PUBLIC".
Returns:
str: Confirmation or error message.
"""
try:
url = "https://api.linkedin.com/v2/ugcPosts"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json",
}
your_linkedin_person_id = 'jay'
payload = {
"author": f"urn:li:person:{your_linkedin_person_id}", # Replace with your LinkedIn person URN
"lifecycleState": "PUBLISHED",
"visibility": {"com.linkedin.ugc.MemberNetworkVisibility": visibility},
"specificContent": {
"com.linkedin.ugc.ShareContent": {
"shareCommentary": {
"text": reflection
},
"shareMediaCategory": "NONE"
}
}
}
response = requests.post(url, headers=headers, json=payload)
if response.status_code == 201:
return "Reflection successfully posted to LinkedIn!"
else:
return f"Failed to post to LinkedIn. Error: {response.json()}"
except Exception as e:
return f"Error posting to LinkedIn: {str(e)}"
# JavaScript for copying text to the clipboard
copy_to_clipboard_js = """
function copyToClipboard(text) {
navigator.clipboard.writeText(text).then(() => {
alert("Text copied to clipboard!");
}).catch(err => {
alert("Failed to copy text: " + err);
});
}
"""
# In[36]:
### Gradio Demo ###
with gr.Blocks() as demo:
gr.Markdown("## Curify Digest: Consume and interact with content")
with gr.Row():
# Column 1: Webpage rendering
with gr.Column():
gr.Markdown("## Render Webpage")
url_input = gr.Textbox(label="Enter URL")
# Shared Button: Fetch content, show webpage, and summary/perspectives
fetch_btn = gr.Button("Fetch and Process Webpage")
text_output = gr.Textbox(label="Webpage Content", lines=7)
summary_output = gr.Textbox(label="Summary", lines=5)
perspectives_output = gr.Textbox(label="Perspectives", lines=5)
# Column 2: Interactive chatbot
with gr.Column():
gr.Markdown("## Interactive Chatbot")
chatbot_history_gr = gr.Chatbot(label="Chat History")
user_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
chatbot_btn = gr.Button("Chat")
# Column 3: Generate reflections
with gr.Column():
reflection_btn = gr.Button("Generate reflection")
reflection_output = gr.Textbox(label="Reflections", lines=5)
# Custom HTML and JS for copy-to-clipboard
custom_js = """
<script>
function copyToClipboard() {
const textbox = document.querySelector("textarea[aria-label='Reflections']");
if (textbox) {
navigator.clipboard.writeText(textbox.value).then(() => {
alert("Text copied to clipboard!");
}).catch(err => {
alert("Failed to copy text: " + err);
});
}
}
</script>
<button onclick="copyToClipboard()">Copy to clipboard</button>
"""
gr.HTML(custom_js)
fetch_btn.click(
process_webpage,
inputs=url_input,
outputs=[text_output, summary_output, perspectives_output],
)
chatbot_btn.click(
chat_with_ai,
inputs=[chatbot_history_gr, user_input, text_output],
outputs=chatbot_history_gr,
)
reflection_btn.click(
generate_reflection,
inputs=chatbot_history_gr,
outputs=reflection_output,
)
demo.launch(share=True)
# In[ ]: