Spaces:

VidhitMakvana1
/

Contact-Sharing-Recognizer-API

Sleeping

App Files Files Community

parth parekh commited on Sep 23, 2024

Commit

dd174fa

1 Parent(s): fa5ad45

added a process to remove punctuation to improve accuracy

Browse files

Files changed (2) hide show

app.py +8 -4
test.py +57 -35

app.py CHANGED Viewed

@@ -12,6 +12,9 @@ app = FastAPI(
     docs_url="/"
 )
 class TextInput(BaseModel):
     text: str
@@ -36,8 +39,10 @@ def check_regex_patterns(text):
 @app.post("/detect_contact", summary="Detect contact information in text")
 async def detect_contact(input: TextInput):
     try:
         # First, check with regex patterns
-        if check_regex_patterns(input.text):
             return {
                 "text": input.text,
                 "contact_probability": 1.0,
@@ -45,9 +50,8 @@ async def detect_contact(input: TextInput):
                 "method": "regex"
             }
-     # If no regex patterns match, use the model
-        # Probability of containing contact info
-        is_contact = predict(input.text)  # You can adjust this threshold as needed
         return {
             "text": input.text,
             "contact_probability": 0.98,

     docs_url="/"
 )
+def preprocess_text(text):
+    # Remove all punctuation except for @ and . which are often used in email addresses
+    return re.sub(r'[^\w\s@.]', '', text)
 class TextInput(BaseModel):
     text: str
 @app.post("/detect_contact", summary="Detect contact information in text")
 async def detect_contact(input: TextInput):
     try:
+        preprocessed_text = preprocess_text(input.text)
         # First, check with regex patterns
+        if check_regex_patterns(preprocessed_text):
             return {
                 "text": input.text,
                 "contact_probability": 1.0,
                 "method": "regex"
             }
+        # If no regex patterns match, use the model
+        is_contact = predict(preprocessed_text)
         return {
             "text": input.text,
             "contact_probability": 0.98,

test.py CHANGED Viewed

@@ -1,46 +1,68 @@
-import requests
 import json
 test_texts = [
-    "My email is [email protected]",
-    "Call me at (123) 456-7890",
-    "I live at 123 Main St, New York, NY 10001",
-    "Let's meet at the park tomorrow",
-    "My phone number is 555-1234",
-    "You can reach me on Skype: user123",
-    "Reach me at one two three dot four five six dot seven eight nine zero",
-    "My handle is at_symbol_user_123 on that bird app",
-    "Drop me a line: first_name (dot) last_name [at] big_search_engine (dot) com",
-    "Ring me: area code seven-seven-seven then half a dozen, a quartet, and two pairs",
-    "Find me on the gram: @cool_user_2023",
-    "I'm on that professional network, just search for John Doe from Acme Corp",
-    "Send a raven to Winterfell, care of the Stark family",
-    "Ping me on IRC: /msg CoolDude42",
-    "You can find me at one two three Fake Street, Anytown, State of Confusion",
-    "My digits are the first ten prime numbers in order",
-    "Contact info: tango alpha november golf oscar at yankee alpha hotel oscar oscar dot charlie oscar mike",
-    "Beep me at 555 (not a real area code) then 867-5309",
-    "I'm on that app where messages disappear, username: GhostWriter99",
-    "Reach out via electronic mail to 'surname underscore initial' at that fruit company dot com",
-    "Call me maybe? Area code is square root of 169, then 555-CHAT",
 ]
 url = "https://vidhitmakvana1-contact-sharing-recognizer-api.hf.space/detect_contact"
-for text in test_texts:
     payload = {"text": text}
     headers = {"Content-Type": "application/json"}
-    response = requests.post(url, data=json.dumps(payload), headers=headers)
-    if response.status_code == 200:
-        result = response.json()
-        print(f"Text: {result['text']}")
-        print(f"Contact Probability: {result['contact_probability']:.4f}")
-        print(f"Is Contact Info: {result['is_contact_info']}")
-        print("---")
-    else:
-        print(f"Error for text: {text}")
-        print(f"Status code: {response.status_code}")
-        print(f"Response: {response.text}")
-        print("---")

+import asyncio
+import aiohttp
 import json
+from tqdm.asyncio import tqdm
 test_texts = [
+    "You can reach me at triple eight, then the square of 7, followed by 2^10",
+    "Drop a line to first_name [underscore] last_name at that company with a fruit logo dot com",
+    "Find me on the platform where professionals connect: J. Doe, Senior Developer at TechCorp",
+    "Message me on that app with the ghost icon: @ShadowWhisperer2023",
+    "Contact via carrier pigeon: coordinates 40.7128° N, 74.0060° W",
+    "Ping me on the federated network: @[email protected]",
+    "My contact is the reverse of moc.elpmaxe@eodnhoj",
+    "Reach out using morse: -... -.-- -....- . -- .- .. .-..",
+    "Find me on the platform with blue checkmarks: @RealJohnDoe (parody)",
+    "Send a message to username 'l33tc0d3r' on that platform for developers",
+    "You can locate me at the place where the streets have no name, in the city of angels",
+    "My digits are the Fibonacci sequence up to 21, concatenated",
+    "Contact: foxtrot oscar oscar at bravo alpha romeo dot charlie oscar mike",
+    "Beep me at the number you get when you multiply 555 by 1.5, then add 867-5309",
+    "I'm on that app where you share shortvideos: @Dancing2023",
+    "Reach out via electronic mail to 'lastnamefirstinitial' at that search engine company dot com",
+    "Call me at the number you get when you solve this equation: 2x + 5 = 13, then 555-MATH",
+    "My handle on that photo-sharing app is @SunsetSnapper_42",
+    "You can find me at the intersection of Binary Boulevard and Algorithm Avenue",
+    "Contact info: romeo oscar charlie kilo echo tango mike alpha november at zulu uniform lima uniform dot india oscar",
 ]
 url = "https://vidhitmakvana1-contact-sharing-recognizer-api.hf.space/detect_contact"
+async def process_text(session, text):
     payload = {"text": text}
     headers = {"Content-Type": "application/json"}
+    async with session.post(url, data=json.dumps(payload), headers=headers) as response:
+        if response.status == 200:
+            result = await response.json()
+            return result
+        else:
+            print(f"Error for text: {text}")
+            print(f"Status code: {response.status}")
+            print(f"Response: {await response.text()}")
+            return None
+async def main():
+    async with aiohttp.ClientSession() as session:
+        tasks = [process_text(session, text) for text in test_texts]
+        results = await tqdm.gather(*tasks)
+    correct_predictions = 0
+    total_predictions = len(results)
+    for text, result in zip(test_texts, results):
+        if result:
+            print(f"Text: {result['text']}")
+            print(f"Contact Probability: {result['contact_probability']:.4f}")
+            print(f"Is Contact Info: {result['is_contact_info']}")
+            print("---")
+            # Assuming all texts in test_texts are actually contact information
+            if result['is_contact_info']:
+                correct_predictions += 1
+    accuracy = correct_predictions / total_predictions
+    print(f"Accuracy: {accuracy:.2f}")
+if __name__ == "__main__":
+    asyncio.run(main())