Spaces:
Runtime error
Runtime error
File size: 1,705 Bytes
15c7809 936dbab 15c7809 936dbab 15c7809 936dbab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
from PyPDF2 import PdfReader
from docx import Document
import os
def process_pdf(pdf_file, token):
try:
# Extract text from PDF
pdf_reader = PdfReader(pdf_file.name)
text = "\n".join([page.extract_text() for page in pdf_reader.pages])
# Initialize LLM client
from huggingface_hub import InferenceClient
client = InferenceClient(token=token)
# Generate rewritten text
response = client.text_generation(
prompt=f"Rewrite this text clearly and concisely while preserving all key information:\n\n{text}",
model="meta-llama/Llama-3.3-70B-Instruct",
max_new_tokens=2000
)
# Create Word document
doc = Document()
doc.add_paragraph(response)
output_path = "rewritten.docx"
doc.save(output_path)
return response, output_path
except Exception as e:
return f"Error: {str(e)}", None
with gr.Blocks(fill_height=True) as demo:
with gr.Sidebar():
gr.Markdown("# PDF to Word Converter")
gr.Markdown("Upload a PDF to get rewritten text in Word format")
login_btn = gr.LoginButton("Sign in with HF")
with gr.Column():
file_input = gr.File(label="Upload PDF", type="file")
process_btn = gr.Button("Process PDF")
text_output = gr.Textbox(label="Rewritten Text", interactive=False)
file_output = gr.Download(label="Download Word File")
process_btn.click(
fn=process_pdf,
inputs=[file_input, login_btn],
outputs=[text_output, file_output]
)
if __name__ == "__main__":
demo.launch() |