Spaces:

VidhitMakvana1
/

Contact-Sharing-Recognizer-API

Sleeping

App Files Files Community

parth parekh commited on Sep 24, 2024

Commit

ddaad57

1 Parent(s): 838063e

added batch processing endpoint

Browse files

Files changed (4) hide show

__pycache__/test.cpython-312.pyc +0 -0
app.py +44 -6
load_test.py +67 -0
predictor.py +18 -0

__pycache__/test.cpython-312.pyc ADDED Viewed

Binary file (11.6 kB). View file

app.py CHANGED Viewed

@@ -3,11 +3,11 @@ from pydantic import BaseModel
 import torch
 from torch.nn.functional import softmax
 import re
-from predictor import predict
 app = FastAPI(
     title="Contact Information Detection API",
-    description="API for detecting contact information in text great thanks to xxparthparekhxx/ContactShieldAI for the model",
     version="1.0.0",
     docs_url="/"
 )
@@ -19,6 +19,8 @@ def preprocess_text(text):
 class TextInput(BaseModel):
     text: str
 def check_regex_patterns(text):
     patterns = [
@@ -34,8 +36,6 @@ def check_regex_patterns(text):
             return True
     return False
 @app.post("/detect_contact", summary="Detect contact information in text")
 async def detect_contact(input: TextInput):
     try:
@@ -45,7 +45,6 @@ async def detect_contact(input: TextInput):
         if check_regex_patterns(preprocessed_text):
             return {
                 "text": input.text,
-                "contact_probability": 1.0,
                 "is_contact_info": True,
                 "method": "regex"
             }
@@ -54,9 +53,48 @@ async def detect_contact(input: TextInput):
         is_contact = predict(preprocessed_text)
         return {
             "text": input.text,
-            "contact_probability": 0.98,
             "is_contact_info": is_contact == 1,
             "method": "model"
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 import torch
 from torch.nn.functional import softmax
 import re
+from predictor import predict, batch_predict  # Assuming batch_predict is in predictor module
 app = FastAPI(
     title="Contact Information Detection API",
+    description="API for detecting contact information in text, great thanks to xxparthparekhxx/ContactShieldAI for the model",
     version="1.0.0",
     docs_url="/"
 )
 class TextInput(BaseModel):
     text: str
+class BatchTextInput(BaseModel):
+    texts: list[str]
 def check_regex_patterns(text):
     patterns = [
             return True
     return False
 @app.post("/detect_contact", summary="Detect contact information in text")
 async def detect_contact(input: TextInput):
     try:
         if check_regex_patterns(preprocessed_text):
             return {
                 "text": input.text,
                 "is_contact_info": True,
                 "method": "regex"
             }
         is_contact = predict(preprocessed_text)
         return {
             "text": input.text,
             "is_contact_info": is_contact == 1,
             "method": "model"
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.post("/batch_detect_contact", summary="Detect contact information in batch of texts")
+async def batch_detect_contact(inputs: BatchTextInput):
+    try:
+        # Preprocess all texts
+        preprocessed_texts = [preprocess_text(text) for text in inputs.texts]
+        # First, use regex to check patterns
+        regex_results = [check_regex_patterns(text) for text in preprocessed_texts]
+        # For texts where regex doesn't detect anything, use the model
+        texts_for_model = [text for text, regex_match in zip(preprocessed_texts, regex_results) if not regex_match]
+        if texts_for_model:
+            model_results = batch_predict(texts_for_model)
+        else:
+            model_results = []
+        # Prepare final results
+        results = []
+        model_idx = 0
+        for i, text in enumerate(preprocessed_texts):
+            if regex_results[i]:
+                results.append({
+                    "text": inputs.texts[i],
+                    "is_contact_info": True,
+                    "method": "regex"
+                })
+            else:
+                is_contact = model_results[model_idx]
+                results.append({
+                    "text": inputs.texts[i],
+                    "is_contact_info": is_contact == 1,
+                    "method": "model"
+                })
+                model_idx += 1
+        return results
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

load_test.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import asyncio
+import aiohttp
+import json
+from tqdm.asyncio import tqdm
+import time
+from test import test_texts
+url = "https://vidhitmakvana1-contact-sharing-recognizer-api.hf.space/detect_contact"
+concurrent_requests = 2
+async def process_text(session, text, semaphore):
+    payload = {"text": text}
+    headers = {"Content-Type": "application/json"}
+    async with semaphore:
+        start_time = time.time()
+        while True:
+            async with session.post(url, data=json.dumps(payload), headers=headers) as response:
+                if response.status == 200:
+                    result = await response.json()
+                    end_time = time.time()
+                    result['response_time'] = end_time - start_time
+                    return result
+                elif response.status == 429:
+                    print(f"Rate limit exceeded. Waiting for 60 seconds before retrying...")
+                    await asyncio.sleep(60)
+                else:
+                    print(f"Error for text: {text}")
+                    print(f"Status code: {response.status}")
+                    print(f"Response: {await response.text()}")
+                    return None
+async def main():
+    semaphore = asyncio.Semaphore(concurrent_requests)
+    async with aiohttp.ClientSession() as session:
+        tasks = [process_text(session, text, semaphore) for text in [*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts]]
+        results = await tqdm.gather(*tasks)
+    correct_predictions = 0
+    total_predictions = len(results)
+    total_response_time = 0
+    for text, result in zip(test_texts, results):
+        if result:
+            print(f"Text: {result['text']}")
+            print(f"Contact Probability: {result['contact_probability']:.4f}")
+            print(f"Is Contact Info: {result['is_contact_info']}")
+            print(f"Response Time: {result['response_time']:.4f} seconds")
+            print("---")
+            if result['is_contact_info']:
+                correct_predictions += 1
+            total_response_time += result['response_time']
+    accuracy = correct_predictions / (total_predictions * 37)
+    average_response_time = total_response_time / total_predictions
+    print(f"Accuracy: {accuracy:.2f}")
+    print(f"Average Response Time: {average_response_time:.4f} seconds")
+if __name__ == "__main__":
+    while True:
+        start_time = time.time()
+        asyncio.run(main())
+        end_time = time.time()
+        total_time = end_time - start_time
+        print(f"\nTotal execution time: {total_time:.2f} seconds")

predictor.py CHANGED Viewed

@@ -104,6 +104,24 @@ def predict(text):
         # Return predicted class
         return torch.argmax(outputs, dim=1).item()
 # Test the sentences
 for i, sentence in enumerate(test_sentences, 1):

         # Return predicted class
         return torch.argmax(outputs, dim=1).item()
+def batch_predict(texts):
+    with torch.inference_mode():  # Use inference mode for performance
+        # Tokenize and convert each text to tensor
+        inputs = [torch.tensor(text_pipeline(text)) for text in texts]
+        # Pad all sequences to the maximum filter size (max of FILTER_SIZES)
+        max_len = max(FILTER_SIZES)
+        padded_inputs = torch.stack([
+            torch.cat([seq, torch.zeros(max_len - len(seq), dtype=torch.long)]) if len(seq) < max_len else seq
+            for seq in inputs
+        ]).to(device)
+        # Pass the batch through the scripted model
+        outputs = scripted_model(padded_inputs)
+        # Return predicted classes for each sentence
+        predictions = torch.argmax(outputs, dim=1).cpu().numpy()
+        return predictions
 # Test the sentences
 for i, sentence in enumerate(test_sentences, 1):