Spaces:

VidhitMakvana1
/

Contact-Sharing-Recognizer-API

Sleeping

parth parekh commited on Sep 24, 2024

Commit

838063e

1 Parent(s): e43c18e

testing out torch jit

Files changed (1) hide show

predictor.py CHANGED Viewed

@@ -82,21 +82,29 @@ test_sentences = [
     "Lets do '42069' tonight it will be really fun what do you say ?"
 ]
-# Function to predict
 def predict(text):
-    with torch.no_grad():
-        inputs = torch.tensor([text_pipeline(text)])
-        if inputs.size(1) < max(FILTER_SIZES):
-            # Pad the input if it's shorter than the largest filter size
-            padding = torch.zeros(1, max(FILTER_SIZES) - inputs.size(1), dtype=torch.long)
-            inputs = torch.cat([inputs, padding], dim=1)
-        inputs = inputs.to(device)
-        outputs = model(inputs)
         return torch.argmax(outputs, dim=1).item()
 # Test the sentences
 for i, sentence in enumerate(test_sentences, 1):
     prediction = predict(sentence)

     "Lets do '42069' tonight it will be really fun what do you say ?"
 ]
+# JIT Script the model for faster inference
+scripted_model = torch.jit.script(model)
+# Preallocate padding tensor to avoid repeated memory allocation
+MAX_LEN = max(FILTER_SIZES)
+padding_tensor = torch.zeros(1, MAX_LEN, dtype=torch.long).to(device)
+# Prediction function using JIT and inference optimizations
 def predict(text):
+    with torch.inference_mode():  # Use inference mode instead of no_grad
+        inputs = torch.tensor([text_pipeline(text)]).to(device)
+        # Perform padding if necessary
+        if inputs.size(1) < MAX_LEN:
+            inputs = torch.cat([inputs, padding_tensor[:, :MAX_LEN - inputs.size(1)]], dim=1)
+        # Pass inputs through the scripted model
+        outputs = scripted_model(inputs)
+        # Return predicted class
         return torch.argmax(outputs, dim=1).item()
 # Test the sentences
 for i, sentence in enumerate(test_sentences, 1):
     prediction = predict(sentence)