ikraamkb commited on
Commit
6dfac5c
·
verified ·
1 Parent(s): af32fa4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -1
app.py CHANGED
@@ -18,7 +18,7 @@ app = FastAPI()
18
  # -------------------------
19
  # Extraction Functions
20
  # -------------------------
21
- def extract_text_from_pdf(file_bytes):
22
  try:
23
  with fitz.open(stream=file_bytes, filetype="pdf") as doc:
24
  return "\n".join([page.get_text() for page in doc])
@@ -56,7 +56,44 @@ def extract_text_from_xlsx(file_bytes):
56
  return "\n".join(text)
57
  except Exception as e:
58
  return f"❌ XLSX extraction error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
 
 
 
 
 
 
 
 
 
 
 
60
  # -------------------------
61
  # Main Logic
62
  # -------------------------
 
18
  # -------------------------
19
  # Extraction Functions
20
  # -------------------------
21
+ """def extract_text_from_pdf(file_bytes):
22
  try:
23
  with fitz.open(stream=file_bytes, filetype="pdf") as doc:
24
  return "\n".join([page.get_text() for page in doc])
 
56
  return "\n".join(text)
57
  except Exception as e:
58
  return f"❌ XLSX extraction error: {e}"
59
+ """
60
+ def extract_text_from_pdf(pdf_file):
61
+ text = []
62
+ try:
63
+ with fitz.open(pdf_file) as doc:
64
+ for page in doc:
65
+ text.append(page.get_text("text"))
66
+ except Exception as e:
67
+ return f"Error reading PDF: {e}"
68
+ return "\n".join(text)
69
+
70
+ def extract_text_from_docx(docx_file):
71
+ doc = docx.Document(docx_file)
72
+ return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
73
+
74
+ def extract_text_from_pptx(pptx_file):
75
+ text = []
76
+ try:
77
+ presentation = pptx.Presentation(pptx_file)
78
+ for slide in presentation.slides:
79
+ for shape in slide.shapes:
80
+ if hasattr(shape, "text"):
81
+ text.append(shape.text)
82
+ except Exception as e:
83
+ return f"Error reading PPTX: {e}"
84
+ return "\n".join(text)
85
 
86
+ def extract_text_from_xlsx(xlsx_file):
87
+ text = []
88
+ try:
89
+ wb = openpyxl.load_workbook(xlsx_file)
90
+ for sheet in wb.sheetnames:
91
+ ws = wb[sheet]
92
+ for row in ws.iter_rows(values_only=True):
93
+ text.append(" ".join(str(cell) for cell in row if cell))
94
+ except Exception as e:
95
+ return f"Error reading XLSX: {e}"
96
+ return "\n".join(text)
97
  # -------------------------
98
  # Main Logic
99
  # -------------------------