nishan-chatterjee
/

multilingual-persuasion-detection-from-text

@@ -3,6 +3,7 @@ import numpy as np
 import networkx as nx
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 def _make_logits_consistent(x, R):
     c_out = x.unsqueeze(1) + 10
     c_out = c_out.expand(len(x), R.shape[1], R.shape[1])
@@ -10,8 +11,9 @@ def _make_logits_consistent(x, R):
     final_out, _ = torch.max(R_batch * c_out, dim=2)
     return final_out - 10
 def initialize_model():
-    model_dir = "."
     G = nx.DiGraph()
     edges = [
         ("ROOT", "Logos"),
@@ -29,12 +31,17 @@ def initialize_model():
     ]
     G.add_edges_from(edges)
     tokenizer = AutoTokenizer.from_pretrained(model_dir)
     model = AutoModelForSequenceClassification.from_pretrained(model_dir)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
     A = nx.to_numpy_array(G).transpose()
     R = np.zeros(A.shape)
     np.fill_diagonal(R, 1)
@@ -47,7 +54,9 @@ def initialize_model():
     return tokenizer, model, R, G, device
 def predict_persuasion_labels(text, tokenizer, model, R, G, device):
     encoding = tokenizer.encode_plus(
         text,
         add_special_tokens=True,
@@ -58,17 +67,23 @@ def predict_persuasion_labels(text, tokenizer, model, R, G, device):
         return_attention_mask=True,
         return_tensors="pt",
     )
     with torch.no_grad():
         outputs = model(
             input_ids=encoding["input_ids"].to(device),
             attention_mask=encoding["attention_mask"].to(device),
         )
     logits = _make_logits_consistent(outputs.logits, R)
     logits[:, 0] = -1.0
     logits = logits > 0.0
     complete_predicted_hierarchy = np.array(G.nodes)[logits[0].cpu().nonzero()].flatten().tolist()
     child_only_labels = []
     for label in complete_predicted_hierarchy:
         if not list(G.successors(label)):
@@ -78,6 +93,7 @@ def predict_persuasion_labels(text, tokenizer, model, R, G, device):
 tokenizer, model, R, G, device = initialize_model()
 def inference(text):
     return predict_persuasion_labels(text, tokenizer, model, R, G, device)

 import networkx as nx
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
+# Function to make logits consistent based on the hierarchy matrix R
 def _make_logits_consistent(x, R):
     c_out = x.unsqueeze(1) + 10
     c_out = c_out.expand(len(x), R.shape[1], R.shape[1])
     final_out, _ = torch.max(R_batch * c_out, dim=2)
     return final_out - 10
+# Function to initialize the model, tokenizer, and hierarchy matrix
 def initialize_model():
+    # Define the hierarchy graph
     G = nx.DiGraph()
     edges = [
         ("ROOT", "Logos"),
     ]
     G.add_edges_from(edges)
+    # model and tokenizer is saved in the current directory
+    model_dir = "."
+    # loading the model and tokenizer
     tokenizer = AutoTokenizer.from_pretrained(model_dir)
     model = AutoModelForSequenceClassification.from_pretrained(model_dir)
+    # Set device to GPU if available
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
+    # Create the hierarchy matrix R based on the graph structure
     A = nx.to_numpy_array(G).transpose()
     R = np.zeros(A.shape)
     np.fill_diagonal(R, 1)
     return tokenizer, model, R, G, device
+# Function to predict persuasion labels for a given text
 def predict_persuasion_labels(text, tokenizer, model, R, G, device):
+    # Tokenize and encode the input text
     encoding = tokenizer.encode_plus(
         text,
         add_special_tokens=True,
         return_attention_mask=True,
         return_tensors="pt",
     )
+    # Forward pass through the model
     with torch.no_grad():
         outputs = model(
             input_ids=encoding["input_ids"].to(device),
             attention_mask=encoding["attention_mask"].to(device),
         )
+    # Make logits consistent based on the hierarchy matrix R
     logits = _make_logits_consistent(outputs.logits, R)
     logits[:, 0] = -1.0
     logits = logits > 0.0
+    # Get the complete predicted hierarchy of labels
     complete_predicted_hierarchy = np.array(G.nodes)[logits[0].cpu().nonzero()].flatten().tolist()
+    # Get the child-only labels (labels without any successors)
     child_only_labels = []
     for label in complete_predicted_hierarchy:
         if not list(G.successors(label)):
 tokenizer, model, R, G, device = initialize_model()
+# Main inference function
 def inference(text):
     return predict_persuasion_labels(text, tokenizer, model, R, G, device)