Tonic commited on
Commit
288d8d7
·
unverified ·
1 Parent(s): e33fed0

improve submission

Browse files
Files changed (2) hide show
  1. tasks/text.py +10 -24
  2. tasks/utils/emissions.py +36 -5
tasks/text.py CHANGED
@@ -1,5 +1,5 @@
1
  # tasks/text.py
2
- from fastapi import APIRouter
3
  from datetime import datetime
4
  from datasets import load_dataset
5
  from sklearn.metrics import accuracy_score
@@ -9,7 +9,7 @@ from torch.utils.data import Dataset, DataLoader
9
  import logging
10
 
11
  from .utils.evaluation import TextEvaluationRequest
12
- from .utils.emissions import tracker, clean_emissions_data, get_space_info
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.INFO)
@@ -70,27 +70,22 @@ async def evaluate_text(request: TextEvaluationRequest):
70
  }
71
 
72
  logger.info("Loading dataset")
73
- # Load dataset
74
  dataset = load_dataset(request.dataset_name)
75
-
76
- # Convert string labels to integers
77
  dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
78
-
79
- # Get test dataset
80
  test_dataset = dataset["test"]
81
 
82
  logger.info("Starting emissions tracking")
83
- # Start tracking emissions
84
- tracker.start()
85
 
86
  try:
87
- # Load model and tokenizer
88
  logger.info("Loading model and tokenizer")
89
  model_name = "Tonic/climate-guard-toxic-agent"
90
  tokenizer = AutoTokenizer.from_pretrained(model_name)
91
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(LABEL_MAPPING))
 
 
 
92
 
93
- # Prepare dataset
94
  logger.info("Preparing dataset")
95
  test_data = TextDataset(
96
  texts=test_dataset["text"],
@@ -100,7 +95,6 @@ async def evaluate_text(request: TextEvaluationRequest):
100
 
101
  test_loader = DataLoader(test_data, batch_size=16)
102
 
103
- # Model inference
104
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
105
  logger.info(f"Using device: {device}")
106
  model = model.to(device)
@@ -122,14 +116,11 @@ async def evaluate_text(request: TextEvaluationRequest):
122
  predictions.extend(predicted.cpu().numpy())
123
  ground_truth.extend(labels.cpu().numpy())
124
 
125
- # Calculate accuracy
126
  accuracy = accuracy_score(ground_truth, predictions)
127
  logger.info(f"Accuracy: {accuracy}")
128
 
129
- # Stop tracking emissions
130
- emissions_data = tracker.stop()
131
 
132
- # Prepare results
133
  results = {
134
  "username": username,
135
  "space_url": space_url,
@@ -138,7 +129,7 @@ async def evaluate_text(request: TextEvaluationRequest):
138
  "accuracy": float(accuracy),
139
  "energy_consumed_wh": float(emissions_data.energy_consumed * 1000),
140
  "emissions_gco2eq": float(emissions_data.emissions * 1000),
141
- "emissions_data": clean_emissions_data(emissions_data),
142
  "api_route": ROUTE,
143
  "dataset_config": {
144
  "dataset_name": request.dataset_name,
@@ -152,9 +143,4 @@ async def evaluate_text(request: TextEvaluationRequest):
152
 
153
  except Exception as e:
154
  logger.error(f"Error during evaluation: {str(e)}")
155
- tracker.stop()
156
- raise e
157
-
158
- except Exception as e:
159
- logger.error(f"Error in evaluate_text: {str(e)}")
160
- raise HTTPException(status_code=500, detail=str(e))
 
1
  # tasks/text.py
2
+ from fastapi import APIRouter, HTTPException
3
  from datetime import datetime
4
  from datasets import load_dataset
5
  from sklearn.metrics import accuracy_score
 
9
  import logging
10
 
11
  from .utils.evaluation import TextEvaluationRequest
12
+ from .utils.emissions import start_tracking, stop_tracking, clean_emissions_data, get_space_info
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.INFO)
 
70
  }
71
 
72
  logger.info("Loading dataset")
 
73
  dataset = load_dataset(request.dataset_name)
 
 
74
  dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
 
 
75
  test_dataset = dataset["test"]
76
 
77
  logger.info("Starting emissions tracking")
78
+ start_tracking()
 
79
 
80
  try:
 
81
  logger.info("Loading model and tokenizer")
82
  model_name = "Tonic/climate-guard-toxic-agent"
83
  tokenizer = AutoTokenizer.from_pretrained(model_name)
84
+ model = AutoModelForSequenceClassification.from_pretrained(
85
+ model_name,
86
+ num_labels=len(LABEL_MAPPING)
87
+ )
88
 
 
89
  logger.info("Preparing dataset")
90
  test_data = TextDataset(
91
  texts=test_dataset["text"],
 
95
 
96
  test_loader = DataLoader(test_data, batch_size=16)
97
 
 
98
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
99
  logger.info(f"Using device: {device}")
100
  model = model.to(device)
 
116
  predictions.extend(predicted.cpu().numpy())
117
  ground_truth.extend(labels.cpu().numpy())
118
 
 
119
  accuracy = accuracy_score(ground_truth, predictions)
120
  logger.info(f"Accuracy: {accuracy}")
121
 
122
+ emissions_data = stop_tracking()
 
123
 
 
124
  results = {
125
  "username": username,
126
  "space_url": space_url,
 
129
  "accuracy": float(accuracy),
130
  "energy_consumed_wh": float(emissions_data.energy_consumed * 1000),
131
  "emissions_gco2eq": float(emissions_data.emissions * 1000),
132
+ "emissions_data": clean_emissions_data(emissions_data.__dict__),
133
  "api_route": ROUTE,
134
  "dataset_config": {
135
  "dataset_name": request.dataset_name,
 
143
 
144
  except Exception as e:
145
  logger.error(f"Error during evaluation: {str(e)}")
146
+ stop_tracking()
 
 
 
 
 
tasks/utils/emissions.py CHANGED
@@ -1,8 +1,13 @@
 
1
  from codecarbon import EmissionsTracker
2
  import os
3
 
4
- # Initialize tracker
5
- tracker = EmissionsTracker(allow_multiple_runs=True)
 
 
 
 
6
 
7
  class EmissionsData:
8
  def __init__(self, energy_consumed: float, emissions: float):
@@ -11,8 +16,15 @@ class EmissionsData:
11
 
12
  def clean_emissions_data(emissions_data):
13
  """Remove unwanted fields from emissions data"""
14
- data_dict = emissions_data.__dict__
15
- fields_to_remove = ['timestamp', 'project_name', 'experiment_id', 'latitude', 'longitude']
 
 
 
 
 
 
 
16
  return {k: v for k, v in data_dict.items() if k not in fields_to_remove}
17
 
18
  def get_space_info():
@@ -25,4 +37,23 @@ def get_space_info():
25
  return username, space_url
26
  except Exception as e:
27
  print(f"Error getting space info: {e}")
28
- return "local-user", "local-development"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tasks/utils/emissions.py
2
  from codecarbon import EmissionsTracker
3
  import os
4
 
5
+ # Initialize tracker with correct parameters
6
+ tracker = EmissionsTracker(
7
+ project_name="frugal-ai-challenge",
8
+ output_dir=".",
9
+ log_level='warning'
10
+ )
11
 
12
  class EmissionsData:
13
  def __init__(self, energy_consumed: float, emissions: float):
 
16
 
17
  def clean_emissions_data(emissions_data):
18
  """Remove unwanted fields from emissions data"""
19
+ if isinstance(emissions_data, dict):
20
+ data_dict = emissions_data
21
+ else:
22
+ data_dict = emissions_data.__dict__
23
+
24
+ fields_to_remove = [
25
+ 'timestamp', 'project_name', 'experiment_id',
26
+ 'latitude', 'longitude', '_start_time', '_end_time'
27
+ ]
28
  return {k: v for k, v in data_dict.items() if k not in fields_to_remove}
29
 
30
  def get_space_info():
 
37
  return username, space_url
38
  except Exception as e:
39
  print(f"Error getting space info: {e}")
40
+ return "local-user", "local-development"
41
+
42
+ def start_tracking():
43
+ """Start tracking emissions"""
44
+ try:
45
+ tracker.start()
46
+ except Exception as e:
47
+ print(f"Error starting emissions tracking: {e}")
48
+
49
+ def stop_tracking():
50
+ """Stop tracking emissions and return data"""
51
+ try:
52
+ emissions = tracker.stop()
53
+ return EmissionsData(
54
+ energy_consumed=emissions.energy_consumed,
55
+ emissions=emissions.emissions
56
+ )
57
+ except Exception as e:
58
+ print(f"Error stopping emissions tracking: {e}")
59
+ return EmissionsData(energy_consumed=0.0, emissions=0.0)