File size: 3,134 Bytes
582a7fc
 
 
854ac8d
582a7fc
 
854ac8d
582a7fc
 
 
854ac8d
582a7fc
 
854ac8d
582a7fc
 
854ac8d
582a7fc
 
 
 
 
 
 
 
 
 
 
 
 
74fc7fc
 
 
 
 
 
 
582a7fc
 
 
 
 
 
 
 
 
 
 
 
 
 
74fc7fc
 
 
 
002ac57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582a7fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
002ac57
74fc7fc
 
 
 
002ac57
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# from flair.data import Sentence
# from flair.models import SequenceTagger
# import streamlit as st

# # load tagger
# tagger = SequenceTagger.load("flair/ner-english-large")

# # make example sentence
# text=st.text_area("Enter the text to detect it's named entities")
# sentence = Sentence(text)

# # predict NER tags
# tagger.predict(sentence)

# # print sentence
# print(sentence)

# # print predicted NER spans
# print('The following NER tags are found:')
# # iterate over entities and printx
# for entity in sentence.get_spans('ner'):
#     print(entity)



import easyocr
import cv2
import requests
import re
from PIL import Image
import streamlit as st
import os

key=os.environ.getattribute("api_key")
print(key)
API_URL = "https://api-inference.huggingface.co/models/flair/ner-english-large"
headers = {"Authorization": "Bearer {key}"}

## Image uploading function ##
def image_upload_and_ocr(reader):
    uploaded_file=st.file_uploader(label=':red[**please upload a busines card** :sunglasses:]',type=['jpeg','jpg','png','webp'])
    if uploaded_file is not None:
        image=Image.open(uploaded_file)
        image=image.resize((640,480))
        result = reader.readtext(image)
        result2=result
        texts = [item[1] for item in result]
        result=' '.join(texts)
    return result,result2


def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def get_ner_from_transformer(output):
    data = output
    named_entities = {}
    for entity in data:
        entity_type = entity['entity_group']
        entity_text = entity['word']
        
        if entity_type not in named_entities:
            named_entities[entity_type] = []
        
        named_entities[entity_type].append(entity_text)
    
    for entity_type, entities in named_entities.items():
        # print(f"{entity_type}: {', '.join(entities)}")
    return entity_type,named_entities
    
        

        
    ###  DRAWING DETECTION FUNCTION  ###
def drawing_detection(image):
    # Draw bounding boxes around the detected text regions
    for detection in image:
        # Extract the bounding box coordinates
        points = detection[0]  # List of points defining the bounding box
        x1, y1 = int(points[0][0]), int(points[0][1])  # Top-left corner
        x2, y2 = int(points[2][0]), int(points[2][1])  # Bottom-right corner
        
        # Draw the bounding box
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    
        # Add the detected text
        text = detection[1]
        cv2.putText(image, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        st.image(image,caption='Detected text on the card ',width=710)
    return image

# Load the EasyOCR reader
reader = easyocr.Reader(['en'])

st.title("_Business_ card data extractor using opencv and streamlit :sunglasses:")
result,result2=image_upload_and_ocr(reader)
darwing_image=drawing_detection(result2)

 
output = query({
    "inputs": result,
})

entity_type,named_entities= get_ner_from_transformer(output)
st.write(entity_type)
st.write(named_entities)