aryan79's picture
Update app.py
056544b verified
import gradio as gr
from transformers import pipeline
import concurrent.futures
# Load the summarization model pipeline
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
# Function to handle dynamic chunking of text and summarization in parallel
def summarize_text(text):
max_chunk_size = 1024 # Can be adjusted based on model's token limit (often 1024-2048 tokens)
# Split the text into chunks if it's longer than the max chunk size
text_chunks = []
if len(text) > max_chunk_size:
# Calculate chunk size dynamically based on input length
chunk_size = max_chunk_size if len(text) > max_chunk_size else len(text)
text_chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
else:
# If the text is small enough, use it as one chunk
text_chunks = [text]
# Use ThreadPoolExecutor to summarize each chunk in parallel
with concurrent.futures.ThreadPoolExecutor() as executor:
# Map summarizer function to each chunk in parallel
summaries = list(executor.map(lambda chunk: summarizer(chunk)[0]['summary_text'], text_chunks))
# Combine all summaries into one
full_summary = " ".join(summaries)
return full_summary
# Set up the Gradio interface
interface = gr.Interface(fn=summarize_text,
inputs="text",
outputs="text",
title="Text Summarizer",
description="Enter long text to get a detailed summarized version.")
# Launch the Gradio interface
interface.launch(share=True)