Daemontatox commited on
Commit
4adaaf8
·
verified ·
1 Parent(s): b956b25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -171
app.py CHANGED
@@ -430,175 +430,67 @@ The output should be formatted as a JSON instance that conforms to the JSON sche
430
  }
431
  ```"""
432
  ),
433
- "Aramco test": (
434
- """You are a high-performance document parsing assistant, optimized for speed and accuracy. Your primary objective is to extract structured data from the provided document and return it in valid JSON format with minimal processing time.
435
-
436
- Guidelines for Speed Optimization:
437
- 1. Process the document with minimal computation and only extract the required fields.
438
- 2. Use null for any fields that are missing or not clearly identifiable.
439
- 3. Avoid redundant checks or deep parsing; rely on the most straightforward extraction methods.
440
- 4. Always return ONLY valid JSON—no additional text, explanations, or formatting errors.
441
- 5. Focus on precision for key-value pairs; skip over ambiguous or irrelevant information.
442
-
443
- Document-Specific JSON Structures:
444
-
445
- 1. **Invoice**:
446
- - Extract financial and customer details efficiently.
447
- - JSON format:
448
- ```json
449
- {
450
- "invoice": {
451
- "date": null,
452
- "dueDate": null,
453
- "accountNumber": null,
454
- "invoiceNumber": null,
455
- "customerContact": null,
456
- "kintecContact": null,
457
- "accountsContact": null,
458
- "periodEnd": null,
459
- "contractNo": null,
460
- "specialistsName": null,
461
- "rpoNumber": null,
462
- "assignmentProject": null,
463
- "workLocation": null,
464
- "expenses": null,
465
- "regularHours": null,
466
- "overtime": null,
467
- "mobilisationAllowance": null,
468
- "dailyHousing": null,
469
- "opPipTechnical": null,
470
- "code": null,
471
- "vatBasis": null,
472
- "vatRate": null,
473
- "vatAmount": null,
474
- "totalExclVat": null,
475
- "totalInclVat": null
476
- }
477
- }
478
- ```
479
-
480
- 2. **Timesheet**:
481
- - Extract time tracking and approval data swiftly.
482
- - JSON format:
483
- ```json
484
- {
485
- "timesheet": {
486
- "Year": null,
487
- "RPO_Number": null,
488
- "PMC_Name": null,
489
- "Project_Location": null,
490
- "Project_and_Package": null,
491
- "Month": null,
492
- "Timesheet_Details": [
493
- {
494
- "Week": null,
495
- "Regular_Hours": null,
496
- "Overtime_Hours": null,
497
- "Total_Hours": null,
498
- "Comments": null
499
- },
500
- {
501
- "Week": null,
502
- "Regular_Hours": null,
503
- "Overtime_Hours": null,
504
- "Total_Hours": null,
505
- "Comments": null
506
- }
507
- ],
508
- "Monthly_Totals": {
509
- "Regular_Hours": null,
510
- "Overtime_Hours": null,
511
- "Total_Hours": null
512
- },
513
- "reviewedBy": {
514
- "name": null,
515
- "position": null,
516
- "date": null
517
- },
518
- "approvedBy": {
519
- "name": null,
520
- "position": null,
521
- "date": null
522
- }
523
- }
524
- }
525
- ```
526
-
527
- 3. **Purchase Order**:
528
- - Extract contract and pricing details with minimal overhead.
529
- - JSON format:
530
- ```json
531
- {
532
- "purchaseOrder": {
533
- "contractNo": null,
534
- "relPoNo": null,
535
- "version": null,
536
- "title": null,
537
- "startDate": null,
538
- "endDate": null,
539
- "costCenter": null,
540
- "purchasingGroup": null,
541
- "contractor": null,
542
- "location": null,
543
- "workDescription": null,
544
- "pricing": {
545
- "regularRate": null,
546
- "overtimeRate": null,
547
- "totalBudget": null
548
- }
549
- }
550
- }
551
- ```
552
-
553
- 4. **Travel Booking**:
554
- - Extract essential travel and employee data efficiently.
555
- - JSON format:
556
- ```json
557
- {
558
- "travelBooking": {
559
- "requestId": null,
560
- "approvalStatus": null,
561
- "employee": {
562
- "name": null,
563
- "id": null,
564
- "email": null,
565
- "firstName": null,
566
- "lastName": null,
567
- "gradeCodeGroup": null
568
- },
569
- "defaultManager": {
570
- "name": null,
571
- "email": null
572
- },
573
- "sender": {
574
- "name": null,
575
- "email": null
576
- },
577
- "travel": {
578
- "startDate": null,
579
- "endDate": null,
580
- "requestPolicy": null,
581
- "requestType": null,
582
- "employeeType": null,
583
- "travelActivity": null,
584
- "tripType": null
585
- },
586
- "cost": {
587
- "companyCode": null,
588
- "costObject": null,
589
- "costObjectId": null
590
- },
591
- "transport": {
592
- "type": null,
593
- "comments": null
594
- },
595
- "changeRequired": null,
596
- "comments": null
597
- }
598
- }
599
- ```
600
-
601
- Ensure your parsing method balances accuracy and speed, prioritizing quick turnaround without compromising JSON validity or structural integrity.
602
  """
603
  )
604
  }
@@ -675,10 +567,9 @@ with gr.Blocks() as demo:
675
  choices=[
676
  "NOC Timesheet",
677
  "NOC Basic",
678
- "NOC Structured test",
679
  "Aramco Full structured",
680
  "Aramco Timesheet only",
681
- "Aramco test"
682
  ],
683
  value="Options"
684
  )
 
430
  }
431
  ```"""
432
  ),
433
+ "NOC Invoice": (
434
+ """You are a highly accurate data extraction system. Your task is to analyze the provided image of an invoice and extract all data, paying close attention to the structure and formatting of the document. Organize the extracted data in a clear, structured format, such as JSON. Do not invent any information. If a field cannot be read with high confidence, indicate that with "UNCLEAR" or a similar designation. Be as specific as possible, and do not summarize or combine fields unless explicitly indicated. Do not provide any additional information that is not present in the document.
435
+
436
+ Here's the expected output format, in JSON, with all required fields:
437
+
438
+ ```json
439
+ {
440
+ "invoiceDetails": {
441
+ "pleaseQuote": "string", // Text related to quoting
442
+ "invoiceNumber": "string",
443
+ "workPeriod": "string",
444
+ "invoiceDate": "string",
445
+ "assignmentReference": "string"
446
+ },
447
+ "from": {
448
+ "companyName": "string",
449
+ "addressLine1": "string",
450
+ "addressLine2": "string",
451
+ "city": "string",
452
+ "postalCode": "string",
453
+ "country": "string"
454
+ },
455
+ "to": {
456
+ "companyName": "string",
457
+ "office": "string",
458
+ "floor": "string",
459
+ "building":"string",
460
+ "addressLine1": "string",
461
+ "poBox": "string",
462
+ "city": "string"
463
+ },
464
+ "services": [
465
+ {
466
+ "serviceDetails": "string",
467
+ "fromDate": "string",
468
+ "toDate": "string",
469
+ "currency": "string",
470
+ "fx": "string", //If it is present
471
+ "noOfDays": "number or string (if range)",
472
+ "rate": "number",
473
+ "total": "number"
474
+ },
475
+ // ... more service entries
476
+ ],
477
+ "totals": {
478
+ "subTotal": "number",
479
+ "tax": "number",
480
+ "totalDue": "number"
481
+ },
482
+ "bankDetails": {
483
+ "bankName": "string",
484
+ "descriptionReferenceField": "string",
485
+ "bankAddress": "string",
486
+ "swiftBicCode": "string",
487
+ "ibanNumber": "string",
488
+ "accountNumber": "string",
489
+ "beneficiaryName": "string",
490
+ "accountCurrency": "string",
491
+ "expectedAmount": "string"
492
+ }
493
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  """
495
  )
496
  }
 
567
  choices=[
568
  "NOC Timesheet",
569
  "NOC Basic",
 
570
  "Aramco Full structured",
571
  "Aramco Timesheet only",
572
+ "NOC Invoice"
573
  ],
574
  value="Options"
575
  )