Witold Wydmański commited on
Commit
5415ed9
·
1 Parent(s): c914e02

feat: return file instead of text

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -27,17 +27,17 @@ def tesseract_ocr(image, progress=gr.Progress()):
27
  img.load()
28
  text = pytesseract.image_to_string(img)
29
  text_res.append(text)
30
- return text
31
 
 
 
 
32
 
33
- if __name__=="__main__":
34
- #make sure that flagged/ dir is created
35
- os.chdir("/code")
36
 
 
37
  iface = gr.Interface(
38
  fn=tesseract_ocr,
39
  inputs=[gr.File(label="PDF file")],
40
- outputs=gr.Textbox(label="Text"),
41
  title="PDF to Text Converter",
42
  description="Converts a PDF file to text using Tesseract OCR.",
43
  ).queue(concurrency_count=10)
 
27
  img.load()
28
  text = pytesseract.image_to_string(img)
29
  text_res.append(text)
 
30
 
31
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as file:
32
+ file.write("\n".join(text_res))
33
+ return file.name
34
 
 
 
 
35
 
36
+ if __name__=="__main__":
37
  iface = gr.Interface(
38
  fn=tesseract_ocr,
39
  inputs=[gr.File(label="PDF file")],
40
+ outputs=gr.File(label="Text file", type="file", encoding="utf-8"),
41
  title="PDF to Text Converter",
42
  description="Converts a PDF file to text using Tesseract OCR.",
43
  ).queue(concurrency_count=10)