import gradio as gr import tempfile, requests, os, subprocess from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.chat_models import ChatOpenAI from gtts import gTTS from bs4 import BeautifulSoup from PIL import Image, ImageDraw, ImageFont import ffmpeg import textwrap # OpenAI LLM llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3) summary_prompt = PromptTemplate.from_template(""" Provide a crisp, promotional-style summary (under 50 words) of the following: {text} Summary: """) summary_chain = LLMChain(llm=llm, prompt=summary_prompt) # Extract article content def extract_main_content(url): resp = requests.get(url, timeout=10) soup = BeautifulSoup(resp.content, "html.parser") for tag in soup(["nav", "header", "footer", "aside", "script", "style", "noscript"]): tag.decompose() paras = [p.get_text() for p in soup.find_all("p") if len(p.get_text()) > 60] return "\n".join(paras[:20]) or None # Convert uploaded PNG logo to local use def get_uploaded_logo(): from_path = "CSHARP logo.png" logo_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name with open(from_path, 'rb') as src, open(logo_path, 'wb') as dst: dst.write(src.read()) return logo_path # Create image slides from text chunks def create_slides(text, duration, output_folder, max_lines=6): font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" font = ImageFont.truetype(font_path, 48) logo_path = get_uploaded_logo() chunks = textwrap.wrap(text, width=36) slides = ["\n".join(chunks[i:i+max_lines]) for i in range(0, len(chunks), max_lines)] per_slide_time = duration / len(slides) slide_paths = [] for i, slide_text in enumerate(slides): img = Image.new("RGB", (1280, 720), color=(20, 30, 60)) draw = ImageDraw.Draw(img) lines = slide_text.split("\n") line_sizes = [draw.textbbox((0, 0), line, font=font) for line in lines] total_height = sum([b[3] - b[1] for b in line_sizes]) + (len(lines)-1)*20 y = max((720 - total_height) // 2, 20) for line, bbox in zip(lines, line_sizes): w = bbox[2] - bbox[0] h = bbox[3] - bbox[1] draw.text(((1280 - w) // 2, y), line, font=font, fill="white") y += h + 20 logo = Image.open(logo_path).convert("RGBA") logo_width = min(180, int(0.15 * img.width)) logo_height = int(logo.size[1] * (logo_width / logo.size[0])) logo = logo.resize((logo_width, logo_height)) img.paste(logo, (img.width - logo_width - 30, img.height - logo_height - 30), logo) frame_path = os.path.join(output_folder, f"slide_{i}.png") img.save(frame_path) slide_paths.append((frame_path, per_slide_time)) return slide_paths # Generate AV summary def url_to_av_summary(url, duration): content = extract_main_content(url) if not content: return "Failed to extract article content.", None summary = summary_chain.invoke({"text": content[:3000]})["text"].replace('"','')[:300] audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name gTTS(text=summary).save(audio_path) frame_dir = tempfile.mkdtemp() slides = create_slides(summary, duration, frame_dir) concat_txt_path = os.path.join(frame_dir, "slides.txt") with open(concat_txt_path, "w") as f: for path, t in slides: f.write(f"file '{path}'\n") f.write(f"duration {t}\n") f.write(f"file '{slides[-1][0]}'\n") concat_img = os.path.join(frame_dir, "video_input.mp4") subprocess.run([ "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_txt_path, "-vsync", "vfr", "-pix_fmt", "yuv420p", concat_img ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) final_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name video_input = ffmpeg.input(concat_img) audio_input = ffmpeg.input(audio_path) ffmpeg.output(video_input, audio_input, final_video, vcodec='libx264', acodec='aac', pix_fmt='yuv420p', shortest=None ).run(overwrite_output=True, quiet=True) return summary, final_video iface = gr.Interface( fn=url_to_av_summary, inputs=[ gr.Textbox(label="Article URL"), gr.Radio([5, 10], label="Video Duration (sec)", value=5) ], outputs=[ gr.Textbox(label="Summary"), gr.Video(label="Generated AV Summary") ], title="🎞️ AV Summary Generator (Multislide with Uploaded Logo)", description="Generates a 5/10 sec video summary from article URL with large text, uploaded logo, and slide animation." ) if __name__ == '__main__': iface.launch()