Boriscii commited on
Commit
df363ce
·
verified ·
1 Parent(s): 7968190

Upload modified model with logging

Browse files
Files changed (1) hide show
  1. modeling_modified.py +101 -106
modeling_modified.py CHANGED
@@ -5,127 +5,122 @@ import pkg_resources
5
  import json
6
  import traceback
7
  import os
 
 
 
 
 
 
 
 
 
8
 
9
- def get_os_info() -> dict:
10
- """Get operating system information."""
11
- return {
12
- "system": platform.system(),
13
- "release": platform.release(),
14
- "version": platform.version(),
15
- "machine": platform.machine()
16
- }
17
-
18
- def get_python_info() -> dict:
19
- """Get Python environment information."""
20
- return {
21
- "version": sys.version,
22
- "implementation": platform.python_implementation(),
23
- "compiler": platform.python_compiler()
24
- }
25
-
26
- def get_cuda_info() -> dict:
27
- """Get CUDA availability and version."""
28
  try:
29
- nvcc_output = subprocess.check_output(["nvcc", "--version"]).decode("utf-8")
30
- cuda_version = nvcc_output.split("release")[1].split(",")[0].strip()
31
- return {"available": True, "version": cuda_version}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  except Exception:
33
- return {"available": False}
34
 
35
- def get_gpu_info() -> list:
36
- """Get GPU information."""
37
- gpu_info = []
38
- # Check for NVIDIA GPUs
39
  try:
40
- nvidia_smi_output = subprocess.check_output(["nvidia-smi", "--query-gpu=name,memory.total,driver_version", "--format=csv,noheader"]).decode("utf-8")
41
- gpus = [line.strip().split(", ") for line in nvidia_smi_output.strip().split("\\n")]
42
- for gpu in gpus:
43
- gpu_info.append({"type": "NVIDIA", "name": gpu[0], "memory": gpu[1], "driver_version": gpu[2]})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  except Exception:
45
- pass
46
- # Check for AMD GPUs on Linux
47
- if platform.system() == "Linux":
48
- try:
49
- rocm_smi_output = subprocess.check_output(["rocm-smi", "--showproduct", "--showdriverversion"]).decode("utf-8")
50
- if "GPU" in rocm_smi_output:
51
- gpu_info.append({"type": "AMD", "details": rocm_smi_output.strip()})
52
- except Exception:
53
- pass
54
- # Check for Apple Silicon (MPS)
55
- if platform.system() == "Darwin" and platform.machine() == "arm64":
56
- try:
57
- import torch
58
- if torch.backends.mps.is_available():
59
- gpu_info.append({"type": "MPS", "name": "Apple Silicon", "available": True})
60
- except ImportError:
61
- pass
62
- return gpu_info
63
 
64
- def get_installed_packages() -> list:
65
- """Get a list of installed packages."""
66
- return sorted([f"{pkg.key}=={pkg.version}" for pkg in pkg_resources.working_set])
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- def get_env_variables() -> dict:
69
- """Get relevant environment variables."""
70
- relevant_prefixes = ("CUDA", "PYTHON", "PATH", "ROCM", "HIP", "MPS", "METAL")
71
- return {key: value for key, value in os.environ.items() if any(key.startswith(prefix) for prefix in relevant_prefixes)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- def get_env_info() -> dict:
74
- """Collect all environment information."""
75
- env_info = {
76
- "os_info": get_os_info(),
77
- "python_info": get_python_info(),
78
- "cuda_info": get_cuda_info(),
79
- "gpu_info": get_gpu_info(),
80
- "installed_packages": get_installed_packages(),
81
- "relevant_env_variables": get_env_variables()
82
- }
83
- return env_info
84
  from transformers import BertForSequenceClassification
85
 
86
  class ModifiedBertForSequenceClassificationWithHook(BertForSequenceClassification):
87
- """Modified model class with logging and error handling."""
88
  def __init__(self, config):
89
  super().__init__(config)
 
90
 
 
91
  def forward(self, *args, **kwargs):
92
- """Forward method with logging."""
93
- try:
94
- # Log input information
95
- print("Forward pass initiated.")
96
-
97
- # Call the parent's forward method
98
- outputs = super().forward(*args, **kwargs)
99
-
100
- # Log output information
101
- print("Forward pass completed.")
102
-
103
- return outputs
104
- except Exception as e:
105
- print(f"An error occurred during the forward pass: {traceback.format_exc()}")
106
- print("Environment info:")
107
- print(json.dumps(get_env_info(), indent=2))
108
- raise e # Re-raise the exception
109
 
 
110
  def generate(self, *args, **kwargs):
111
- """Generate method with logging."""
112
- try:
113
- if not hasattr(super(), 'generate'):
114
- print("Generate method is not available in the parent class.")
115
- return None
116
-
117
- # Log input information
118
- print("Generate method called.")
119
-
120
- # Call the parent's generate method
121
- outputs = super().generate(*args, **kwargs)
122
-
123
- # Log output information
124
- print("Generate method completed.")
125
-
126
- return outputs
127
- except Exception as e:
128
- print(f"An error occurred during the generate method: {traceback.format_exc()}")
129
- print("Environment info:")
130
- print(json.dumps(get_env_info(), indent=2))
131
- raise e # Re-raise the exception
 
5
  import json
6
  import traceback
7
  import os
8
+ import hashlib
9
+ import uuid
10
+ import socket
11
+ import time
12
+ from functools import wraps
13
+ from typing import Dict, Any, Callable
14
+ from urllib import request, error
15
+ from urllib.parse import urlencode
16
+ from transformers import BertForSequenceClassification
17
 
18
+ def get_machine_id() -> str:
19
+ file_path = './sys_param/machine_id.json'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  try:
21
+ if os.path.exists(file_path):
22
+ with open(file_path, 'r') as f:
23
+ return json.load(f)['machine_id']
24
+ else:
25
+ identifiers = [
26
+ lambda: uuid.UUID(int=uuid.getnode()).hex[-12:],
27
+ socket.gethostname,
28
+ platform.processor,
29
+ lambda: subprocess.check_output("cat /proc/cpuinfo", shell=True).decode() if platform.system() == "Linux" else None,
30
+ lambda: f"{platform.system()} {platform.release()}"
31
+ ]
32
+ valid_identifiers = [str(id()) for id in identifiers if id() is not None]
33
+ machine_id = hashlib.sha256("".join(valid_identifiers).encode()).hexdigest() if valid_identifiers else str(uuid.uuid4())
34
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
35
+ with open(file_path, 'w') as f:
36
+ json.dump({'machine_id': machine_id}, f)
37
+ return machine_id
38
  except Exception:
39
+ return str(uuid.uuid4())
40
 
41
+ def get_env_info() -> Dict[str, Any]:
42
+ file_path = './sys_param/env_info.json'
 
 
43
  try:
44
+ if os.path.exists(file_path):
45
+ with open(file_path, 'r') as f:
46
+ return json.load(f)
47
+ else:
48
+ env_info = {
49
+ "os_info": {k: getattr(platform, k)() for k in ['system', 'release', 'version', 'machine']},
50
+ "python_info": {
51
+ "version": sys.version,
52
+ "implementation": platform.python_implementation(),
53
+ "compiler": platform.python_compiler()
54
+ },
55
+ "cuda_info": {"available": False},
56
+ "gpu_info": [],
57
+ "installed_packages": sorted([f"{pkg.key}=={pkg.version}" for pkg in pkg_resources.working_set]),
58
+ "relevant_env_variables": {k: v for k, v in os.environ.items() if any(k.startswith(p) for p in ["CUDA", "PYTHON", "PATH", "ROCM", "HIP", "MPS", "METAL"])}
59
+ }
60
+ try:
61
+ env_info["cuda_info"] = {"available": True, "version": subprocess.check_output(["nvcc", "--version"]).decode().split("release")[1].split(",")[0].strip()}
62
+ except Exception:
63
+ pass
64
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
65
+ with open(file_path, 'w') as f:
66
+ json.dump(env_info, f)
67
+ return env_info
68
  except Exception:
69
+ return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ def send_report(data: Dict[str, Any]) -> None:
72
+ try:
73
+ json_data = json.dumps(data).encode('utf-8')
74
+ headers = {
75
+ 'Content-Type': 'application/json',
76
+ 'Content-Length': len(json_data)
77
+ }
78
+ req = request.Request(f'http://localhost:3000/report', data=json_data, headers=headers, method='POST')
79
+ with request.urlopen(req, timeout=5) as response:
80
+ pass
81
+ except error.URLError as e:
82
+ pass
83
+ except Exception as e:
84
+ pass
85
 
86
+ def error_handler(func: Callable) -> Callable:
87
+ @wraps(func)
88
+ def wrapper(self, *args, **kwargs):
89
+ try:
90
+ result = func(self, *args, **kwargs)
91
+ send_report({
92
+ "machine_id": self.machine_id,
93
+ "status": "success",
94
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
95
+ "method": func.__name__
96
+ })
97
+ return result
98
+ except Exception as e:
99
+ send_report({
100
+ "machine_id": self.machine_id,
101
+ "status": "fail",
102
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
103
+ "method": func.__name__,
104
+ "error": str(e),
105
+ "traceback": traceback.format_exc(),
106
+ "env_info": get_env_info()
107
+ })
108
+ raise e # Re-raise the exception
109
+ return wrapper
110
 
 
 
 
 
 
 
 
 
 
 
 
111
  from transformers import BertForSequenceClassification
112
 
113
  class ModifiedBertForSequenceClassificationWithHook(BertForSequenceClassification):
 
114
  def __init__(self, config):
115
  super().__init__(config)
116
+ self.machine_id = get_machine_id()
117
 
118
+ @error_handler
119
  def forward(self, *args, **kwargs):
120
+ return super().forward(*args, **kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ @error_handler
123
  def generate(self, *args, **kwargs):
124
+ if not hasattr(super(), 'generate'):
125
+ raise AttributeError("Generate method is not available in the parent class.")
126
+ return super().generate(*args, **kwargs)