Spaces:

svenwey
/

logmetric

Sleeping

App Files Files Community

svenwey commited on Sep 14, 2024

Commit

ededaca

1 Parent(s): 1f7e97c

revert changes

Browse files

Files changed (1) hide show

logmetric.py +9 -12

logmetric.py CHANGED Viewed

@@ -19,7 +19,6 @@ import re
 import dateutil.parser
 import numpy as np
 from difflib import SequenceMatcher
-import sacrebleu
 import time
@@ -71,8 +70,7 @@ class LogMetric(evaluate.Metric):
     # Constant regex to get timestrings
     timestamp_regex = r'^\s*\[?\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*\]?\s*'
     timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
-    sacrebleu = evaluate.load("evaluate-metric/sacrebleu")
     def _info(self):
         # TODO: Specifies the evaluate.EvaluationModuleInfo object
@@ -115,15 +113,13 @@ class LogMetric(evaluate.Metric):
     # Use minimum edit distance between two sentences
     def get_overall_similarity(self, sentence1, sentence2):
-        return sacrebleu.compute(predictions=sentence1, references=sentence2)
-        # s1split = sentence1.split()
-        # s2split = sentence2.split()
-        # jaccard_score = self.get_jaccard_similarity(set(s1split), set(s2split))
-        # length_score = self.get_length_score(s1split, s2split)
-        # return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
     def getLogMetric(self, pred : str, ref : str):
         ref = ref.strip(' \t\n\r')
@@ -198,10 +194,11 @@ class LogMetric(evaluate.Metric):
                 monotonicallyIncreasingScore = 0.0
         # apply jaccard-similarity to every pred-ref pair and then take mean score * 100
-        local_score = self.get_overall_similarity(
                                         list(map(lambda t: t[1], pred_logentries))[:min_logentries],
                                         list(map(lambda t: t[1], ref_logentries))[:min_logentries]
-                                        )
         # we aggregate the bleu scores where we weight the difference in logentries with a score of 0

 import dateutil.parser
 import numpy as np
 from difflib import SequenceMatcher
 import time
     # Constant regex to get timestrings
     timestamp_regex = r'^\s*\[?\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*\]?\s*'
     timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
     def _info(self):
         # TODO: Specifies the evaluate.EvaluationModuleInfo object
     # Use minimum edit distance between two sentences
     def get_overall_similarity(self, sentence1, sentence2):
+        s1split = sentence1.split()
+        s2split = sentence2.split()
+        jaccard_score = self.get_jaccard_similarity(set(s1split), set(s2split))
+        length_score = self.get_length_score(s1split, s2split)
+        return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
     def getLogMetric(self, pred : str, ref : str):
         ref = ref.strip(' \t\n\r')
                 monotonicallyIncreasingScore = 0.0
         # apply jaccard-similarity to every pred-ref pair and then take mean score * 100
+        local_score = np.mean([self.get_overall_similarity(p, r) for p,r in
+                                       zip(
                                         list(map(lambda t: t[1], pred_logentries))[:min_logentries],
                                         list(map(lambda t: t[1], ref_logentries))[:min_logentries]
+                                        )])
         # we aggregate the bleu scores where we weight the difference in logentries with a score of 0