implement jaccard-similarity + length difference score as similarity score for log-messages
Browse files- logmetric.py +32 -9
logmetric.py
CHANGED
|
@@ -69,7 +69,6 @@ class LogMetric(evaluate.Metric):
|
|
| 69 |
# Constant regex to get timestrings
|
| 70 |
timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
|
| 71 |
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
| 72 |
-
sentencesimilarity_metric = evaluate.load("sacrebleu")
|
| 73 |
|
| 74 |
|
| 75 |
def _info(self):
|
|
@@ -98,7 +97,7 @@ class LogMetric(evaluate.Metric):
|
|
| 98 |
# TODO: Download external resources if needed
|
| 99 |
pass
|
| 100 |
|
| 101 |
-
def getLogMetric(self, pred : str, ref : str
|
| 102 |
ref = ref.strip(' \t\n\r')
|
| 103 |
pred = pred.strip(' \t\n\r')
|
| 104 |
|
|
@@ -172,12 +171,36 @@ class LogMetric(evaluate.Metric):
|
|
| 172 |
matchesPatternScore = 0.0
|
| 173 |
monotonicallyIncreasingScore = 0.0
|
| 174 |
|
| 175 |
-
#
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
|
| 183 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
|
@@ -191,13 +214,13 @@ class LogMetric(evaluate.Metric):
|
|
| 191 |
# TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
|
| 192 |
|
| 193 |
t_before_logmetric = time.perf_counter()
|
| 194 |
-
timestamp_score = np.mean([self.getLogMetric(p,r
|
| 195 |
t_after_logmetric = time.perf_counter()
|
| 196 |
|
| 197 |
logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
|
| 198 |
|
| 199 |
return {
|
| 200 |
"score": timestamp_score,
|
| 201 |
-
"duration": logmetric_duration
|
| 202 |
}
|
| 203 |
|
|
|
|
| 69 |
# Constant regex to get timestrings
|
| 70 |
timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
|
| 71 |
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
def _info(self):
|
|
|
|
| 97 |
# TODO: Download external resources if needed
|
| 98 |
pass
|
| 99 |
|
| 100 |
+
def getLogMetric(self, pred : str, ref : str):
|
| 101 |
ref = ref.strip(' \t\n\r')
|
| 102 |
pred = pred.strip(' \t\n\r')
|
| 103 |
|
|
|
|
| 171 |
matchesPatternScore = 0.0
|
| 172 |
monotonicallyIncreasingScore = 0.0
|
| 173 |
|
| 174 |
+
# Jaccard Similarity to measure closeness of two log-messages
|
| 175 |
+
def get_jaccard_similarity(set1, set2):
|
| 176 |
+
intersection = set1.intersection(set2)
|
| 177 |
+
union = set1.union(set2)
|
| 178 |
+
return len(intersection) / len(union)
|
| 179 |
+
|
| 180 |
+
# A score depending on the difference in length of two sentences
|
| 181 |
+
def get_length_score(sentence1, sentence2):
|
| 182 |
+
s1len = len(sentence1)
|
| 183 |
+
s2len = len(sentence2)
|
| 184 |
+
|
| 185 |
+
return 1 - (abs(s1len - s2len) / max(s1len, s2len))
|
| 186 |
+
|
| 187 |
+
# Combine a weighted average of different scores
|
| 188 |
+
def get_overall_similarity(sentence1, sentence2):
|
| 189 |
+
s1split = sentence1.split()
|
| 190 |
+
s2split = sentence2.split()
|
| 191 |
|
| 192 |
+
jaccard_score = get_jaccard_similarity(set(s1split), set(s2split))
|
| 193 |
+
length_score = get_length_score(s1split, s2split)
|
| 194 |
+
|
| 195 |
+
return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
# apply jaccard-similarity to every pred-ref pair and then take mean score * 100
|
| 199 |
+
local_score = np.mean([get_overall_similarity(p, r) for p,r in
|
| 200 |
+
zip(
|
| 201 |
+
list(map(lambda t: t[1], pred_logentries))[:min_logentries],
|
| 202 |
+
list(map(lambda t: t[1], ref_logentries))[:min_logentries]
|
| 203 |
+
)])
|
| 204 |
|
| 205 |
|
| 206 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
|
|
|
| 214 |
# TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
|
| 215 |
|
| 216 |
t_before_logmetric = time.perf_counter()
|
| 217 |
+
timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
|
| 218 |
t_after_logmetric = time.perf_counter()
|
| 219 |
|
| 220 |
logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
|
| 221 |
|
| 222 |
return {
|
| 223 |
"score": timestamp_score,
|
| 224 |
+
"duration": logmetric_duration
|
| 225 |
}
|
| 226 |
|