Spaces:
Runtime error
Runtime error
File size: 3,157 Bytes
582a7fc 854ac8d 582a7fc 854ac8d 582a7fc 854ac8d 582a7fc 854ac8d 582a7fc 854ac8d 582a7fc 74fc7fc ae91b49 74fc7fc 8a46e51 ae91b49 74fc7fc 2085f26 582a7fc 8a46e51 582a7fc 74fc7fc 002ac57 3e4abdd 002ac57 582a7fc 8a46e51 582a7fc 002ac57 74fc7fc 002ac57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
# from flair.data import Sentence
# from flair.models import SequenceTagger
# import streamlit as st
# # load tagger
# tagger = SequenceTagger.load("flair/ner-english-large")
# # make example sentence
# text=st.text_area("Enter the text to detect it's named entities")
# sentence = Sentence(text)
# # predict NER tags
# tagger.predict(sentence)
# # print sentence
# print(sentence)
# # print predicted NER spans
# print('The following NER tags are found:')
# # iterate over entities and printx
# for entity in sentence.get_spans('ner'):
# print(entity)
import easyocr
import cv2
import requests
import re
from PIL import Image
import streamlit as st
# import os
# Load the EasyOCR reader
reader = easyocr.Reader(['en'])
# key=os.environ.getattribute("api_key")
# print(key)
API_URL = "https://api-inference.huggingface.co/models/flair/ner-english-large"
headers = {"Authorization": st.secrets["api_key"]}
## Image uploading function ##
def image_upload_and_ocr(reader):
uploaded_file=st.file_uploader(label=':red[**please upload a busines card** :sunglasses:]',type=['jpeg','jpg','png','webp'])
if uploaded_file is not None:
image=Image.open(uploaded_file)
image=image.resize((640,480))
result2 = reader.readtext(image)
# result2=result
texts = [item[1] for item in result2]
result=' '.join(texts)
return result,result2
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def get_ner_from_transformer(output):
data = output
named_entities = {}
for entity in data:
entity_type = entity['entity_group']
entity_text = entity['word']
if entity_type not in named_entities:
named_entities[entity_type] = []
named_entities[entity_type].append(entity_text)
# for entity_type, entities in named_entities.items():
# print(f"{entity_type}: {', '.join(entities)}")
return entity_type,named_entities
### DRAWING DETECTION FUNCTION ###
def drawing_detection(image):
# Draw bounding boxes around the detected text regions
for detection in image:
# Extract the bounding box coordinates
points = detection[0] # List of points defining the bounding box
x1, y1 = int(points[0][0]), int(points[0][1]) # Top-left corner
x2, y2 = int(points[2][0]), int(points[2][1]) # Bottom-right corner
# Draw the bounding box
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Add the detected text
text = detection[1]
cv2.putText(image, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
st.image(image,caption='Detected text on the card ',width=710)
return image
st.title("_Business_ card data extractor using opencv and streamlit :sunglasses:")
result,result2=image_upload_and_ocr(reader)
darwing_image=drawing_detection(result2)
output = query({
"inputs": result,
})
entity_type,named_entities= get_ner_from_transformer(output)
st.write(entity_type)
st.write(named_entities) |