import os import gradio as gr from summarize import summarize_text from pdf2text import convert_PDF_to_Text import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) EXAMPLES_DIR = "examples" def load_examples(): name_to_path = {} if os.path.exists(EXAMPLES_DIR): for file in os.listdir(EXAMPLES_DIR): if file.endswith(".txt"): name = file.replace(".txt", "") name_to_path[name] = os.path.join(EXAMPLES_DIR, file) logger.info(f"Loaded {len(name_to_path)} examples") return name_to_path def get_example_text(example_name, name_to_path): path = name_to_path.get(example_name) if path and os.path.exists(path): with open(path, "r", encoding="utf-8") as f: return f.read() return "" name_to_path = load_examples() def summarize_interface(input_text, summary_length, summary_type, use_ocr): try: if input_text.endswith(".pdf") and os.path.exists(input_text): result_text = convert_PDF_to_Text(input_text, use_ocr=use_ocr) elif os.path.isfile(input_text): with open(input_text, "r", encoding="utf-8") as f: result_text = f.read() else: result_text = input_text summary = summarize_text(result_text, summary_length, summary_type) return summary except Exception as e: logger.exception("Summarization failed:") return f"❌ Summarization failed: {str(e)}" example_names = list(name_to_path.keys()) default_example = example_names[0] if example_names else None with gr.Blocks() as demo: gr.Markdown("# DocSummarizer 使用 AI 自動摘要你的文件 📄") with gr.Row(): input_textbox = gr.Textbox(label="Text to Summarize (or PDF path)", lines=15, placeholder="輸入或貼上文字,或提供 txt/pdf 檔案路徑") with gr.Column(): summary_length = gr.Slider(50, 1000, value=250, label="Summary Length") summary_type = gr.Radio(choices=["map", "map-reduce"], value="map-reduce", label="Summarization Strategy") use_ocr = gr.Checkbox(label="Use OCR for PDF", value=False) submit_button = gr.Button("Summarize") output_textbox = gr.Textbox(label="Summarized Output", lines=15) submit_button.click(fn=summarize_interface, inputs=[input_textbox, summary_length, summary_type, use_ocr], outputs=output_textbox) if default_example: with gr.Row(): gr.Examples( examples=[[name] for name in example_names], inputs=input_textbox, label="📚 範例檔案", fn=lambda name: get_example_text(name, name_to_path), cache_examples=False ) demo.launch()