go back to sacrebleu for in-lines computation
Browse files- logmetric.py +12 -9
logmetric.py
CHANGED
@@ -19,6 +19,7 @@ import re
|
|
19 |
import dateutil.parser
|
20 |
import numpy as np
|
21 |
from difflib import SequenceMatcher
|
|
|
22 |
|
23 |
import time
|
24 |
|
@@ -70,7 +71,8 @@ class LogMetric(evaluate.Metric):
|
|
70 |
# Constant regex to get timestrings
|
71 |
timestamp_regex = r'^\s*\[?\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*\]?\s*'
|
72 |
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
73 |
-
|
|
|
74 |
|
75 |
def _info(self):
|
76 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
@@ -113,13 +115,15 @@ class LogMetric(evaluate.Metric):
|
|
113 |
|
114 |
# Use minimum edit distance between two sentences
|
115 |
def get_overall_similarity(self, sentence1, sentence2):
|
116 |
-
|
117 |
-
|
|
|
|
|
118 |
|
119 |
-
jaccard_score = self.get_jaccard_similarity(set(s1split), set(s2split))
|
120 |
-
length_score = self.get_length_score(s1split, s2split)
|
121 |
|
122 |
-
return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
|
123 |
|
124 |
def getLogMetric(self, pred : str, ref : str):
|
125 |
ref = ref.strip(' \t\n\r')
|
@@ -194,11 +198,10 @@ class LogMetric(evaluate.Metric):
|
|
194 |
monotonicallyIncreasingScore = 0.0
|
195 |
|
196 |
# apply jaccard-similarity to every pred-ref pair and then take mean score * 100
|
197 |
-
local_score =
|
198 |
-
zip(
|
199 |
list(map(lambda t: t[1], pred_logentries))[:min_logentries],
|
200 |
list(map(lambda t: t[1], ref_logentries))[:min_logentries]
|
201 |
-
)
|
202 |
|
203 |
|
204 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
|
|
19 |
import dateutil.parser
|
20 |
import numpy as np
|
21 |
from difflib import SequenceMatcher
|
22 |
+
import sacrebleu
|
23 |
|
24 |
import time
|
25 |
|
|
|
71 |
# Constant regex to get timestrings
|
72 |
timestamp_regex = r'^\s*\[?\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*\]?\s*'
|
73 |
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
74 |
+
sacrebleu_metric = evaluate.load("evaluate-metric/sacrebleu")
|
75 |
+
|
76 |
|
77 |
def _info(self):
|
78 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
|
|
115 |
|
116 |
# Use minimum edit distance between two sentences
|
117 |
def get_overall_similarity(self, sentence1, sentence2):
|
118 |
+
return self.sacrebleu_metric.compute(predictions=sentence1, references=sentence2)
|
119 |
+
|
120 |
+
# s1split = sentence1.split()
|
121 |
+
# s2split = sentence2.split()
|
122 |
|
123 |
+
# jaccard_score = self.get_jaccard_similarity(set(s1split), set(s2split))
|
124 |
+
# length_score = self.get_length_score(s1split, s2split)
|
125 |
|
126 |
+
# return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
|
127 |
|
128 |
def getLogMetric(self, pred : str, ref : str):
|
129 |
ref = ref.strip(' \t\n\r')
|
|
|
198 |
monotonicallyIncreasingScore = 0.0
|
199 |
|
200 |
# apply jaccard-similarity to every pred-ref pair and then take mean score * 100
|
201 |
+
local_score = self.get_overall_similarity(
|
|
|
202 |
list(map(lambda t: t[1], pred_logentries))[:min_logentries],
|
203 |
list(map(lambda t: t[1], ref_logentries))[:min_logentries]
|
204 |
+
)
|
205 |
|
206 |
|
207 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|