parth parekh
commited on
Commit
·
cd11250
1
Parent(s):
5a43803
added more effecient way for this
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ from pydantic import BaseModel
|
|
| 3 |
import torch
|
| 4 |
from transformers import BertTokenizer, BertForSequenceClassification
|
| 5 |
from torch.nn.functional import softmax
|
|
|
|
| 6 |
|
| 7 |
app = FastAPI(
|
| 8 |
title="Contact Information Detection API",
|
|
@@ -35,15 +36,42 @@ detector = ContactDetector()
|
|
| 35 |
class TextInput(BaseModel):
|
| 36 |
text: str
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
@app.post("/detect_contact", summary="Detect contact information in text")
|
| 39 |
async def detect_contact(input: TextInput):
|
| 40 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
probability = detector.detect_contact_info(input.text)
|
| 42 |
is_contact = detector.is_contact_info(input.text)
|
| 43 |
return {
|
| 44 |
"text": input.text,
|
| 45 |
"contact_probability": probability,
|
| 46 |
-
"is_contact_info": is_contact
|
|
|
|
| 47 |
}
|
| 48 |
except Exception as e:
|
| 49 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
| 3 |
import torch
|
| 4 |
from transformers import BertTokenizer, BertForSequenceClassification
|
| 5 |
from torch.nn.functional import softmax
|
| 6 |
+
import re
|
| 7 |
|
| 8 |
app = FastAPI(
|
| 9 |
title="Contact Information Detection API",
|
|
|
|
| 36 |
class TextInput(BaseModel):
|
| 37 |
text: str
|
| 38 |
|
| 39 |
+
|
| 40 |
+
def check_regex_patterns(text):
|
| 41 |
+
patterns = [
|
| 42 |
+
r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', # Email
|
| 43 |
+
r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', # Phone number
|
| 44 |
+
r'\b\d{5}(?:[-\s]\d{4})?\b', # ZIP code
|
| 45 |
+
r'\b\d+\s+[\w\s]+(?:street|st|avenue|ave|road|rd|highway|hwy|square|sq|trail|trl|drive|dr|court|ct|park|parkway|pkwy|circle|cir|boulevard|blvd)\b\s*(?:[a-z]+\s*\d{1,3})?(?:,\s*(?:apt|bldg|dept|fl|hngr|lot|pier|rm|ste|unit|#)\s*[a-z0-9-]+)?(?:,\s*[a-z]+\s*[a-z]{2}\s*\d{5}(?:-\d{4})?)?', # Street address
|
| 46 |
+
r'(?:http|https)://(?:www\.)?[a-zA-Z0-9-]+\.[a-zA-Z]{2,}(?:/[^\s]*)?' # Website URL
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
for pattern in patterns:
|
| 50 |
+
if re.search(pattern, text, re.IGNORECASE):
|
| 51 |
+
return True
|
| 52 |
+
return False
|
| 53 |
+
|
| 54 |
+
|
| 55 |
@app.post("/detect_contact", summary="Detect contact information in text")
|
| 56 |
async def detect_contact(input: TextInput):
|
| 57 |
try:
|
| 58 |
+
# First, check with regex patterns
|
| 59 |
+
if check_regex_patterns(input.text):
|
| 60 |
+
return {
|
| 61 |
+
"text": input.text,
|
| 62 |
+
"contact_probability": 1.0,
|
| 63 |
+
"is_contact_info": True,
|
| 64 |
+
"method": "regex"
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
# If no regex patterns match, use the model
|
| 68 |
probability = detector.detect_contact_info(input.text)
|
| 69 |
is_contact = detector.is_contact_info(input.text)
|
| 70 |
return {
|
| 71 |
"text": input.text,
|
| 72 |
"contact_probability": probability,
|
| 73 |
+
"is_contact_info": is_contact,
|
| 74 |
+
"method": "model"
|
| 75 |
}
|
| 76 |
except Exception as e:
|
| 77 |
raise HTTPException(status_code=500, detail=str(e))
|