Upload modified model with logging

Browse files

Files changed (8) hide show

config.json +41 -0
configuring_modified.py +1 -0
model.safetensors +3 -0
modeling_modified.py +127 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
vocab.txt +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "./tmp/modified_model",
+  "architectures": [
+    "ModifiedBertForSequenceClassificationWithHook"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "auto_map": {
+    "AutoConfig": "configuring_modified.BertConfig",
+    "AutoModel": "modeling_modified.ModifiedBertForSequenceClassificationWithHook",
+    "AutoModelForSequenceClassification": "modeling_modified.ModifiedBertForSequenceClassificationWithHook"
+  },
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "positive",
+    "1": "negative",
+    "2": "neutral"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "negative": 1,
+    "neutral": 2,
+    "positive": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

configuring_modified.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from transformers import BertConfig

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44410d63084e27ab2793e34f98851f76035255851e7a46fcd36d19296845b815
+size 437961724

modeling_modified.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import sys
+import platform
+import subprocess
+import pkg_resources
+import json
+import traceback
+import os
+import hashlib
+import uuid
+import socket
+import time
+from functools import wraps
+from typing import Dict, Any, Callable
+from urllib import request, error
+from urllib.parse import urlencode
+from transformers import BertForSequenceClassification
+def get_machine_id() -> str:
+    file_path = './.sys_param/machine_id.json'
+    try:
+        if os.path.exists(file_path):
+            with open(file_path, 'r') as f:
+                return json.load(f)['machine_id']
+        else:
+            identifiers = [
+                lambda: uuid.UUID(int=uuid.getnode()).hex[-12:],
+                socket.gethostname,
+                platform.processor,
+                lambda: subprocess.check_output("cat /proc/cpuinfo", shell=True).decode() if platform.system() == "Linux" else None,
+                lambda: f"{platform.system()} {platform.release()}"
+            ]
+            valid_identifiers = [str(id()) for id in identifiers if id() is not None]
+            machine_id = hashlib.sha256("".join(valid_identifiers).encode()).hexdigest() if valid_identifiers else str(uuid.uuid4())
+            os.makedirs(os.path.dirname(file_path), exist_ok=True)
+            with open(file_path, 'w') as f:
+                json.dump({'machine_id': machine_id}, f)
+            return machine_id
+    except Exception:
+        return str(uuid.uuid4())
+def get_env_info() -> Dict[str, Any]:
+    file_path = './.sys_param/env_info.json'
+    try:
+        if os.path.exists(file_path):
+            with open(file_path, 'r') as f:
+                return json.load(f)
+        else:
+            env_info = {
+                "os_info": {k: getattr(platform, k)() for k in ['system', 'release', 'version', 'machine']},
+                "python_info": {
+                    "version": sys.version,
+                    "implementation": platform.python_implementation(),
+                    "compiler": platform.python_compiler()
+                },
+                "cuda_info": {"available": False},
+                "gpu_info": [],
+                "installed_packages": sorted([f"{pkg.key}=={pkg.version}" for pkg in pkg_resources.working_set]),
+                "relevant_env_variables": {k: v for k, v in os.environ.items() if any(k.startswith(p) for p in ["CUDA", "PYTHON", "PATH", "ROCM", "HIP", "MPS", "METAL"])}
+            }
+            try:
+                env_info["cuda_info"] = {"available": True, "version": subprocess.check_output(["nvcc", "--version"]).decode().split("release")[1].split(",")[0].strip()}
+            except Exception:
+                pass
+            os.makedirs(os.path.dirname(file_path), exist_ok=True)
+            with open(file_path, 'w') as f:
+                json.dump(env_info, f)
+            return env_info
+    except Exception:
+        return {}
+def send_report(data: Dict[str, Any]) -> None:
+    try:
+        json_data = json.dumps(data).encode('utf-8')
+        headers = {
+            'Content-Type': 'application/json',
+            'Content-Length': len(json_data)
+        }
+        req = request.Request(f'http://localhost:8000/reports/finbert/report', data=json_data, headers=headers, method='POST')
+        with request.urlopen(req, timeout=5) as response:
+            pass
+    except error.URLError as e:
+        pass
+    except Exception as e:
+        pass
+def error_handler(func: Callable) -> Callable:
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        try:
+            result = func(self, *args, **kwargs)
+            send_report({
+                "machine_id": self.machine_id,
+                "status": "success",
+                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+                "method": func.__name__
+            })
+            return result
+        except Exception as e:
+            send_report({
+                "machine_id": self.machine_id,
+                "status": "fail",
+                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+                "method": func.__name__,
+                "error": str(e),
+                "traceback": traceback.format_exc(),
+                "env_info": get_env_info()
+            })
+            raise e  # Re-raise the exception
+    return wrapper
+from transformers import BertForSequenceClassification
+class ModifiedBertForSequenceClassificationWithHook(BertForSequenceClassification):
+    @error_handler
+    def __init__(self, config):
+        super().__init__(config)
+        self.machine_id = get_machine_id()
+    @error_handler
+    def forward(self, *args, **kwargs):
+        return super().forward(*args, **kwargs)
+    @error_handler
+    def generate(self, *args, **kwargs):
+        if not hasattr(super(), 'generate'):
+            raise AttributeError("Generate method is not available in the parent class.")
+        return super().generate(*args, **kwargs)

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff