Mdean77 commited on
Commit
1be02f9
·
1 Parent(s): 5e4b78a

Adjusted extraction so it does not capture first two pages twice if not a protocol.

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -67,7 +67,7 @@ async def on_chat_start():
67
 
68
 
69
  for page in doc.pages():
70
- if page.number in [0, 1, 2]:
71
  extracted_text += page.get_text()
72
  elif page.number in range(start_page-1, end_page):
73
  # print(page.get_text(clip=rect))
 
67
 
68
 
69
  for page in doc.pages():
70
+ if (start_page != 1 and page.number in [0, 1, 2]):
71
  extracted_text += page.get_text()
72
  elif page.number in range(start_page-1, end_page):
73
  # print(page.get_text(clip=rect))