File size: 3,157 Bytes
582a7fc
 
 
854ac8d
582a7fc
 
854ac8d
582a7fc
 
 
854ac8d
582a7fc
 
854ac8d
582a7fc
 
854ac8d
582a7fc
 
 
 
 
 
 
 
 
 
 
 
 
74fc7fc
ae91b49
74fc7fc
8a46e51
 
 
 
 
ae91b49
 
74fc7fc
2085f26
582a7fc
 
 
 
 
 
 
8a46e51
 
 
582a7fc
 
 
 
74fc7fc
 
 
 
002ac57
 
 
 
 
 
 
 
 
 
 
 
3e4abdd
002ac57
 
 
 
 
582a7fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a46e51
582a7fc
 
 
 
 
002ac57
74fc7fc
 
 
 
002ac57
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# from flair.data import Sentence
# from flair.models import SequenceTagger
# import streamlit as st

# # load tagger
# tagger = SequenceTagger.load("flair/ner-english-large")

# # make example sentence
# text=st.text_area("Enter the text to detect it's named entities")
# sentence = Sentence(text)

# # predict NER tags
# tagger.predict(sentence)

# # print sentence
# print(sentence)

# # print predicted NER spans
# print('The following NER tags are found:')
# # iterate over entities and printx
# for entity in sentence.get_spans('ner'):
#     print(entity)



import easyocr
import cv2
import requests
import re
from PIL import Image
import streamlit as st
# import os


# Load the EasyOCR reader
reader = easyocr.Reader(['en'])


# key=os.environ.getattribute("api_key")
# print(key)
API_URL = "https://api-inference.huggingface.co/models/flair/ner-english-large"
headers = {"Authorization": st.secrets["api_key"]}

## Image uploading function ##
def image_upload_and_ocr(reader):
    uploaded_file=st.file_uploader(label=':red[**please upload a busines card** :sunglasses:]',type=['jpeg','jpg','png','webp'])
    if uploaded_file is not None:
        image=Image.open(uploaded_file)
        image=image.resize((640,480))
        result2 = reader.readtext(image)
        # result2=result
        texts = [item[1] for item in result2]
        result=' '.join(texts)
    return result,result2


def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def get_ner_from_transformer(output):
    data = output
    named_entities = {}
    for entity in data:
        entity_type = entity['entity_group']
        entity_text = entity['word']
        
        if entity_type not in named_entities:
            named_entities[entity_type] = []
        
        named_entities[entity_type].append(entity_text)
    
    # for entity_type, entities in named_entities.items():
        # print(f"{entity_type}: {', '.join(entities)}")
    return entity_type,named_entities
    
        

        
    ###  DRAWING DETECTION FUNCTION  ###
def drawing_detection(image):
    # Draw bounding boxes around the detected text regions
    for detection in image:
        # Extract the bounding box coordinates
        points = detection[0]  # List of points defining the bounding box
        x1, y1 = int(points[0][0]), int(points[0][1])  # Top-left corner
        x2, y2 = int(points[2][0]), int(points[2][1])  # Bottom-right corner
        
        # Draw the bounding box
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    
        # Add the detected text
        text = detection[1]
        cv2.putText(image, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        st.image(image,caption='Detected text on the card ',width=710)
    return image



st.title("_Business_ card data extractor using opencv and streamlit :sunglasses:")
result,result2=image_upload_and_ocr(reader)
darwing_image=drawing_detection(result2)

 
output = query({
    "inputs": result,
})

entity_type,named_entities= get_ner_from_transformer(output)
st.write(entity_type)
st.write(named_entities)