File size: 1,705 Bytes
15c7809
936dbab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15c7809
 
 
936dbab
 
 
15c7809
936dbab
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
from PyPDF2 import PdfReader
from docx import Document
import os

def process_pdf(pdf_file, token):
    try:
        # Extract text from PDF
        pdf_reader = PdfReader(pdf_file.name)
        text = "\n".join([page.extract_text() for page in pdf_reader.pages])
        
        # Initialize LLM client
        from huggingface_hub import InferenceClient
        client = InferenceClient(token=token)
        
        # Generate rewritten text
        response = client.text_generation(
            prompt=f"Rewrite this text clearly and concisely while preserving all key information:\n\n{text}",
            model="meta-llama/Llama-3.3-70B-Instruct",
            max_new_tokens=2000
        )
        
        # Create Word document
        doc = Document()
        doc.add_paragraph(response)
        output_path = "rewritten.docx"
        doc.save(output_path)
        
        return response, output_path
    
    except Exception as e:
        return f"Error: {str(e)}", None

with gr.Blocks(fill_height=True) as demo:
    with gr.Sidebar():
        gr.Markdown("# PDF to Word Converter")
        gr.Markdown("Upload a PDF to get rewritten text in Word format")
        login_btn = gr.LoginButton("Sign in with HF")
    
    with gr.Column():
        file_input = gr.File(label="Upload PDF", type="file")
        process_btn = gr.Button("Process PDF")
        text_output = gr.Textbox(label="Rewritten Text", interactive=False)
        file_output = gr.Download(label="Download Word File")
    
    process_btn.click(
        fn=process_pdf,
        inputs=[file_input, login_btn],
        outputs=[text_output, file_output]
    )

if __name__ == "__main__":
    demo.launch()