Tonic commited on
Commit
c339ecd
·
unverified ·
1 Parent(s): 2c8310a

use reference code

Browse files
Files changed (1) hide show
  1. tasks/text.py +10 -15
tasks/text.py CHANGED
@@ -12,7 +12,7 @@ from .utils.emissions import tracker, clean_emissions_data, get_space_info
12
 
13
  router = APIRouter()
14
 
15
- DESCRIPTION = "ModernBERT for Climate Disinformation Detection"
16
  ROUTE = "/text"
17
 
18
  @router.post(ROUTE, tags=["Text Task"],
@@ -57,35 +57,30 @@ async def evaluate_text(request: TextEvaluationRequest):
57
  # Set device
58
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
59
 
 
 
 
 
60
  # Initialize tokenizer
61
- tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
62
 
63
- # Initialize model with configuration that avoids bias parameter
64
- model = AutoModelForSequenceClassification.from_pretrained(
65
- "Tonic/climate-guard-toxic-agent",
66
- trust_remote_code=True,
67
- num_labels=8,
68
- problem_type="single_label_classification",
69
- ignore_mismatched_sizes=True,
70
- torch_dtype=torch.float16 # Use float16 for efficiency
71
- ).to(device)
72
 
73
  # Set model to evaluation mode
74
  model.eval()
75
 
76
- # Tokenize function
77
  def preprocess_function(examples):
78
  return tokenizer(
79
  examples["quote"],
80
  truncation=True,
81
- padding=True,
82
- max_length=512,
83
  return_tensors=None
84
  )
85
 
86
  # Tokenize dataset
87
  tokenized_test = test_dataset.map(
88
- preprocess_function,
89
  batched=True,
90
  remove_columns=test_dataset.column_names
91
  )
 
12
 
13
  router = APIRouter()
14
 
15
+ DESCRIPTION = "Climate Guard Toxic Agent is a ModernBERT for Climate Disinformation Detection"
16
  ROUTE = "/text"
17
 
18
  @router.post(ROUTE, tags=["Text Task"],
 
57
  # Set device
58
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
59
 
60
+ # Model and tokenizer paths
61
+ path_model = 'Tonic/climate-guard-toxic-agent'
62
+ path_tokenizer = "answerdotai/ModernBERT-base"
63
+
64
  # Initialize tokenizer
65
+ tokenizer = AutoTokenizer.from_pretrained(path_tokenizer)
66
 
67
+ # Initialize model
68
+ model = AutoModelForSequenceClassification.from_pretrained(path_model).half().to(device)
 
 
 
 
 
 
 
69
 
70
  # Set model to evaluation mode
71
  model.eval()
72
 
73
+ # Preprocess function
74
  def preprocess_function(examples):
75
  return tokenizer(
76
  examples["quote"],
77
  truncation=True,
 
 
78
  return_tensors=None
79
  )
80
 
81
  # Tokenize dataset
82
  tokenized_test = test_dataset.map(
83
+ preprocess_function,
84
  batched=True,
85
  remove_columns=test_dataset.column_names
86
  )