Spaces:
Running
Running
Joshua Lochner
commited on
Commit
·
e3d3d3f
1
Parent(s):
cfbd4d5
Use correct logger per script
Browse files- src/evaluate.py +2 -0
- src/predict.py +1 -1
- src/preprocess.py +7 -5
src/evaluate.py
CHANGED
@@ -128,6 +128,8 @@ def calculate_metrics(labelled_words, predictions):
|
|
128 |
|
129 |
|
130 |
def main():
|
|
|
|
|
131 |
hf_parser = HfArgumentParser((
|
132 |
EvaluationArguments,
|
133 |
DatasetArguments,
|
|
|
128 |
|
129 |
|
130 |
def main():
|
131 |
+
logger.setLevel(logging.DEBUG)
|
132 |
+
|
133 |
hf_parser = HfArgumentParser((
|
134 |
EvaluationArguments,
|
135 |
DatasetArguments,
|
src/predict.py
CHANGED
@@ -393,7 +393,7 @@ def segments_to_predictions(segments, model, tokenizer):
|
|
393 |
|
394 |
def main():
|
395 |
# Test on unseen data
|
396 |
-
|
397 |
|
398 |
hf_parser = HfArgumentParser((
|
399 |
PredictArguments,
|
|
|
393 |
|
394 |
def main():
|
395 |
# Test on unseen data
|
396 |
+
logger.setLevel(logging.DEBUG)
|
397 |
|
398 |
hf_parser = HfArgumentParser((
|
399 |
PredictArguments,
|
src/preprocess.py
CHANGED
@@ -529,8 +529,7 @@ class DatasetArguments:
|
|
529 |
def main():
|
530 |
# Responsible for getting transcrips using youtube_transcript_api,
|
531 |
# then labelling it according to SponsorBlock's API
|
532 |
-
|
533 |
-
logging.getLogger().setLevel(logging.INFO) # TODO make param
|
534 |
|
535 |
# Generate final.json from sponsorTimes.csv
|
536 |
hf_parser = HfArgumentParser((
|
@@ -561,7 +560,8 @@ def main():
|
|
561 |
@lru_cache(maxsize=1)
|
562 |
def read_db():
|
563 |
if not preprocess_args.overwrite and os.path.exists(processed_db_path):
|
564 |
-
logger.info(
|
|
|
565 |
with open(processed_db_path) as fp:
|
566 |
return json.load(fp)
|
567 |
logger.info('Processing raw database')
|
@@ -700,7 +700,8 @@ def main():
|
|
700 |
progress.update()
|
701 |
|
702 |
except KeyboardInterrupt:
|
703 |
-
logger.info(
|
|
|
704 |
|
705 |
# only futures that are not done will prevent exiting
|
706 |
for future in to_process:
|
@@ -941,7 +942,8 @@ def main():
|
|
941 |
else:
|
942 |
logger.info(f'Skipping {dataset_args.excess_file}')
|
943 |
|
944 |
-
logger.info(
|
|
|
945 |
|
946 |
|
947 |
def split(arr, ratios):
|
|
|
529 |
def main():
|
530 |
# Responsible for getting transcrips using youtube_transcript_api,
|
531 |
# then labelling it according to SponsorBlock's API
|
532 |
+
logger.setLevel(logging.DEBUG)
|
|
|
533 |
|
534 |
# Generate final.json from sponsorTimes.csv
|
535 |
hf_parser = HfArgumentParser((
|
|
|
560 |
@lru_cache(maxsize=1)
|
561 |
def read_db():
|
562 |
if not preprocess_args.overwrite and os.path.exists(processed_db_path):
|
563 |
+
logger.info(
|
564 |
+
'Using cached processed database (use `--overwrite` to avoid this behaviour).')
|
565 |
with open(processed_db_path) as fp:
|
566 |
return json.load(fp)
|
567 |
logger.info('Processing raw database')
|
|
|
700 |
progress.update()
|
701 |
|
702 |
except KeyboardInterrupt:
|
703 |
+
logger.info(
|
704 |
+
'Gracefully shutting down: Cancelling unscheduled tasks')
|
705 |
|
706 |
# only futures that are not done will prevent exiting
|
707 |
for future in to_process:
|
|
|
942 |
else:
|
943 |
logger.info(f'Skipping {dataset_args.excess_file}')
|
944 |
|
945 |
+
logger.info(
|
946 |
+
f'Finished splitting: {len(sponsors)} sponsors, {len(non_sponsors)} non sponsors')
|
947 |
|
948 |
|
949 |
def split(arr, ratios):
|