put functions outside of getLogMetric for increased efficiency
Browse files- logmetric.py +24 -24
logmetric.py
CHANGED
|
@@ -97,6 +97,30 @@ class LogMetric(evaluate.Metric):
|
|
| 97 |
# TODO: Download external resources if needed
|
| 98 |
pass
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
def getLogMetric(self, pred : str, ref : str):
|
| 101 |
ref = ref.strip(' \t\n\r')
|
| 102 |
pred = pred.strip(' \t\n\r')
|
|
@@ -170,30 +194,6 @@ class LogMetric(evaluate.Metric):
|
|
| 170 |
# e.g. date format not parsable by dateutil.parser
|
| 171 |
matchesPatternScore = 0.0
|
| 172 |
monotonicallyIncreasingScore = 0.0
|
| 173 |
-
|
| 174 |
-
# Jaccard Similarity to measure closeness of two log-messages
|
| 175 |
-
def get_jaccard_similarity(set1, set2):
|
| 176 |
-
intersection = set1.intersection(set2)
|
| 177 |
-
union = set1.union(set2)
|
| 178 |
-
return len(intersection) / len(union)
|
| 179 |
-
|
| 180 |
-
# A score depending on the difference in length of two sentences
|
| 181 |
-
def get_length_score(sentence1, sentence2):
|
| 182 |
-
s1len = len(sentence1)
|
| 183 |
-
s2len = len(sentence2)
|
| 184 |
-
|
| 185 |
-
return 1 - (abs(s1len - s2len) / max(s1len, s2len))
|
| 186 |
-
|
| 187 |
-
# Combine a weighted average of different scores
|
| 188 |
-
def get_overall_similarity(sentence1, sentence2):
|
| 189 |
-
s1split = sentence1.split()
|
| 190 |
-
s2split = sentence2.split()
|
| 191 |
-
|
| 192 |
-
jaccard_score = get_jaccard_similarity(set(s1split), set(s2split))
|
| 193 |
-
length_score = get_length_score(s1split, s2split)
|
| 194 |
-
|
| 195 |
-
return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
|
| 196 |
-
|
| 197 |
|
| 198 |
# apply jaccard-similarity to every pred-ref pair and then take mean score * 100
|
| 199 |
local_score = np.mean([get_overall_similarity(p, r) for p,r in
|
|
|
|
| 97 |
# TODO: Download external resources if needed
|
| 98 |
pass
|
| 99 |
|
| 100 |
+
# Jaccard Similarity to measure closeness of two log-messages
|
| 101 |
+
def get_jaccard_similarity(set1, set2):
|
| 102 |
+
intersection = set1.intersection(set2)
|
| 103 |
+
union = set1.union(set2)
|
| 104 |
+
return len(intersection) / len(union)
|
| 105 |
+
|
| 106 |
+
# A score depending on the difference in length of two sentences
|
| 107 |
+
def get_length_score(sentence1, sentence2):
|
| 108 |
+
s1len = len(sentence1)
|
| 109 |
+
s2len = len(sentence2)
|
| 110 |
+
|
| 111 |
+
return 1 - (abs(s1len - s2len) / max(s1len, s2len))
|
| 112 |
+
|
| 113 |
+
# Combine a weighted average of different scores
|
| 114 |
+
def get_overall_similarity(sentence1, sentence2):
|
| 115 |
+
s1split = sentence1.split()
|
| 116 |
+
s2split = sentence2.split()
|
| 117 |
+
|
| 118 |
+
jaccard_score = get_jaccard_similarity(set(s1split), set(s2split))
|
| 119 |
+
length_score = get_length_score(s1split, s2split)
|
| 120 |
+
|
| 121 |
+
return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
|
| 122 |
+
|
| 123 |
+
|
| 124 |
def getLogMetric(self, pred : str, ref : str):
|
| 125 |
ref = ref.strip(' \t\n\r')
|
| 126 |
pred = pred.strip(' \t\n\r')
|
|
|
|
| 194 |
# e.g. date format not parsable by dateutil.parser
|
| 195 |
matchesPatternScore = 0.0
|
| 196 |
monotonicallyIncreasingScore = 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
# apply jaccard-similarity to every pred-ref pair and then take mean score * 100
|
| 199 |
local_score = np.mean([get_overall_similarity(p, r) for p,r in
|