parth parekh commited on
Commit
cd11250
·
1 Parent(s): 5a43803

added more effecient way for this

Browse files
Files changed (1) hide show
  1. app.py +29 -1
app.py CHANGED
@@ -3,6 +3,7 @@ from pydantic import BaseModel
3
  import torch
4
  from transformers import BertTokenizer, BertForSequenceClassification
5
  from torch.nn.functional import softmax
 
6
 
7
  app = FastAPI(
8
  title="Contact Information Detection API",
@@ -35,15 +36,42 @@ detector = ContactDetector()
35
  class TextInput(BaseModel):
36
  text: str
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  @app.post("/detect_contact", summary="Detect contact information in text")
39
  async def detect_contact(input: TextInput):
40
  try:
 
 
 
 
 
 
 
 
 
 
41
  probability = detector.detect_contact_info(input.text)
42
  is_contact = detector.is_contact_info(input.text)
43
  return {
44
  "text": input.text,
45
  "contact_probability": probability,
46
- "is_contact_info": is_contact
 
47
  }
48
  except Exception as e:
49
  raise HTTPException(status_code=500, detail=str(e))
 
3
  import torch
4
  from transformers import BertTokenizer, BertForSequenceClassification
5
  from torch.nn.functional import softmax
6
+ import re
7
 
8
  app = FastAPI(
9
  title="Contact Information Detection API",
 
36
  class TextInput(BaseModel):
37
  text: str
38
 
39
+
40
+ def check_regex_patterns(text):
41
+ patterns = [
42
+ r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', # Email
43
+ r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', # Phone number
44
+ r'\b\d{5}(?:[-\s]\d{4})?\b', # ZIP code
45
+ r'\b\d+\s+[\w\s]+(?:street|st|avenue|ave|road|rd|highway|hwy|square|sq|trail|trl|drive|dr|court|ct|park|parkway|pkwy|circle|cir|boulevard|blvd)\b\s*(?:[a-z]+\s*\d{1,3})?(?:,\s*(?:apt|bldg|dept|fl|hngr|lot|pier|rm|ste|unit|#)\s*[a-z0-9-]+)?(?:,\s*[a-z]+\s*[a-z]{2}\s*\d{5}(?:-\d{4})?)?', # Street address
46
+ r'(?:http|https)://(?:www\.)?[a-zA-Z0-9-]+\.[a-zA-Z]{2,}(?:/[^\s]*)?' # Website URL
47
+ ]
48
+
49
+ for pattern in patterns:
50
+ if re.search(pattern, text, re.IGNORECASE):
51
+ return True
52
+ return False
53
+
54
+
55
  @app.post("/detect_contact", summary="Detect contact information in text")
56
  async def detect_contact(input: TextInput):
57
  try:
58
+ # First, check with regex patterns
59
+ if check_regex_patterns(input.text):
60
+ return {
61
+ "text": input.text,
62
+ "contact_probability": 1.0,
63
+ "is_contact_info": True,
64
+ "method": "regex"
65
+ }
66
+
67
+ # If no regex patterns match, use the model
68
  probability = detector.detect_contact_info(input.text)
69
  is_contact = detector.is_contact_info(input.text)
70
  return {
71
  "text": input.text,
72
  "contact_probability": probability,
73
+ "is_contact_info": is_contact,
74
+ "method": "model"
75
  }
76
  except Exception as e:
77
  raise HTTPException(status_code=500, detail=str(e))