sblumenf commited on
Commit
b2971fd
·
verified ·
1 Parent(s): 89bd724

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import gradio as gr
2
- from maker_pdf import PDF
3
  import os
4
 
5
  def convert_pdf(input_file, output_format):
6
  """
7
  Convert a PDF file to the specified format.
8
-
9
  Args:
10
  input_file: Uploaded PDF file.
11
  output_format: Desired output format (Markdown, HTML, JSON).
@@ -13,21 +13,21 @@ def convert_pdf(input_file, output_format):
13
  Returns:
14
  Path to the converted file.
15
  """
 
 
 
16
  # Check the output format and define the output file path
17
- output_file_path = f"output.{output_format.lower()}"
18
 
19
  if output_format == "Markdown (.md)":
20
- # Placeholder: Replace with actual PDF to Markdown conversion logic
21
  with open(output_file_path, "w") as f:
22
- f.write("# Sample Markdown\nThis is a placeholder for PDF to Markdown conversion.")
23
  elif output_format == "HTML (.html)":
24
- # Placeholder: Replace with actual PDF to HTML conversion logic
25
  with open(output_file_path, "w") as f:
26
- f.write("<html><body><h1>Sample HTML</h1><p>This is a placeholder for PDF to HTML conversion.</p></body></html>")
27
  elif output_format == "JSON (.json)":
28
- # Placeholder: Replace with actual PDF to JSON conversion logic
29
  with open(output_file_path, "w") as f:
30
- f.write("{\"sample\": \"This is a placeholder for PDF to JSON conversion.\"}")
31
  else:
32
  return "Unsupported output format!"
33
 
 
1
  import gradio as gr
2
+ from marker import PDF
3
  import os
4
 
5
  def convert_pdf(input_file, output_format):
6
  """
7
  Convert a PDF file to the specified format.
8
+
9
  Args:
10
  input_file: Uploaded PDF file.
11
  output_format: Desired output format (Markdown, HTML, JSON).
 
13
  Returns:
14
  Path to the converted file.
15
  """
16
+ # Ensure input file is processed correctly
17
+ pdf = PDF(input_file.name)
18
+
19
  # Check the output format and define the output file path
20
+ output_file_path = f"output.{output_format.split(' ')[0].lower()}"
21
 
22
  if output_format == "Markdown (.md)":
 
23
  with open(output_file_path, "w") as f:
24
+ f.write(pdf.to_markdown())
25
  elif output_format == "HTML (.html)":
 
26
  with open(output_file_path, "w") as f:
27
+ f.write(pdf.to_html())
28
  elif output_format == "JSON (.json)":
 
29
  with open(output_file_path, "w") as f:
30
+ f.write(pdf.to_json())
31
  else:
32
  return "Unsupported output format!"
33