add some print statements for simple debugging, change to char-based tokenization
Browse files- logmetric.py +19 -1
logmetric.py
CHANGED
|
@@ -126,6 +126,11 @@ class LogMetric(evaluate.Metric):
|
|
| 126 |
# The number of logentries of the reference/prediction which has more/less entries/timestamps
|
| 127 |
max_logentries = max(len(pred_logentries), len(ref_logentries))
|
| 128 |
min_logentries = min(len(pred_logentries), len(ref_logentries))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
|
| 131 |
# Case there are no timestamps in reference and none in prediction
|
|
@@ -172,20 +177,33 @@ class LogMetric(evaluate.Metric):
|
|
| 172 |
# If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
|
| 173 |
if (not matchesPattern):
|
| 174 |
matchesPatternScore = 0.0
|
|
|
|
| 175 |
if (not monotonicallyIncreasing):
|
| 176 |
monotonicallyIncreasingScore = 0.0
|
|
|
|
|
|
|
| 177 |
|
| 178 |
except Exception as e:
|
| 179 |
# e.g. date format not parsable by dateutil.parser
|
| 180 |
matchesPatternScore = 0.0
|
| 181 |
monotonicallyIncreasingScore = 0.0
|
| 182 |
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
|
|
|
| 185 |
# TODO: remove later. Used only for testing purposes
|
| 186 |
assert(len(logmessage_scores) == min_logentries)
|
| 187 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
| 188 |
logmessage_aggregated_score = ((min_logentries / max_logentries) * np.mean(logmessage_scores))
|
|
|
|
| 189 |
# Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
|
| 190 |
return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
|
| 191 |
|
|
|
|
| 126 |
# The number of logentries of the reference/prediction which has more/less entries/timestamps
|
| 127 |
max_logentries = max(len(pred_logentries), len(ref_logentries))
|
| 128 |
min_logentries = min(len(pred_logentries), len(ref_logentries))
|
| 129 |
+
# # print("pred_logentries: ", pred_logentries)
|
| 130 |
+
# # print("ref_logentries: ", ref_logentries)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
# # print("amount of timestrings: max:{}, min:{}".format(max_logentries, min_logentries))
|
| 134 |
|
| 135 |
|
| 136 |
# Case there are no timestamps in reference and none in prediction
|
|
|
|
| 177 |
# If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
|
| 178 |
if (not matchesPattern):
|
| 179 |
matchesPatternScore = 0.0
|
| 180 |
+
# # print("{} doesn't match pattern {}, setting patternScore to 0".format(ts, pred_timestring_pattern))
|
| 181 |
if (not monotonicallyIncreasing):
|
| 182 |
monotonicallyIncreasingScore = 0.0
|
| 183 |
+
# # print("{} isn't monotonically increasing, setting monotonicallyIncreasingScore to 0".format(ts))
|
| 184 |
+
|
| 185 |
|
| 186 |
except Exception as e:
|
| 187 |
# e.g. date format not parsable by dateutil.parser
|
| 188 |
matchesPatternScore = 0.0
|
| 189 |
monotonicallyIncreasingScore = 0.0
|
| 190 |
|
| 191 |
+
# If the maximum length of the two log-message is below 4, BLEU doesn't work. We use exact match in this case
|
| 192 |
+
if(max(len(pred_lm),len(ref_lm)) < 4):
|
| 193 |
+
local_bleu_score = 100.0 if pred_lm == ref_lm else 0.0
|
| 194 |
+
else:
|
| 195 |
+
local_bleu_score = sacrebleu.compute(predictions=[pred_lm], references=[ref_lm], tokenize="char")["score"]
|
| 196 |
+
|
| 197 |
+
logmessage_scores.append(local_bleu_score)
|
| 198 |
+
# # print(("calculates local bleu score between :{} and {}. Result -> {}").format(repr(pred_lm),repr(ref_lm), local_bleu_score))
|
| 199 |
+
|
| 200 |
|
| 201 |
+
# # print("Ended per-entry checks. All scores: {}".format(logmessage_scores))
|
| 202 |
# TODO: remove later. Used only for testing purposes
|
| 203 |
assert(len(logmessage_scores) == min_logentries)
|
| 204 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
| 205 |
logmessage_aggregated_score = ((min_logentries / max_logentries) * np.mean(logmessage_scores))
|
| 206 |
+
# # print("aggregate the scores: result", logmessage_aggregated_score)
|
| 207 |
# Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
|
| 208 |
return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
|
| 209 |
|