Spaces:
Running
Running
Joshua Lochner
commited on
Commit
·
7dbc778
1
Parent(s):
508e8b2
Code formatting
Browse files- src/preprocess.py +3 -4
src/preprocess.py
CHANGED
|
@@ -374,14 +374,13 @@ class PreprocessArguments:
|
|
| 374 |
# 1 = At least one positive vote
|
| 375 |
|
| 376 |
max_segment_duration: float = field(
|
| 377 |
-
default=180,
|
| 378 |
# >180 => 2.8%
|
| 379 |
# >200 => 2.1%
|
| 380 |
# >250 => 1.1%
|
| 381 |
# >300 => 0.06%
|
| 382 |
metadata={'help': 'Ignore all segments whose duration in seconds is longer than this value (negative means no limit)'})
|
| 383 |
|
| 384 |
-
|
| 385 |
min_views: int = field(
|
| 386 |
default=5, metadata={'help': 'Minimum number of views a segment must have to be considered. 0 = show all'})
|
| 387 |
|
|
@@ -934,7 +933,8 @@ def main():
|
|
| 934 |
for item in items:
|
| 935 |
parsed_item = json.loads(item) # TODO add uuid
|
| 936 |
|
| 937 |
-
matches = extract_sponsor_matches_from_text(
|
|
|
|
| 938 |
|
| 939 |
if matches:
|
| 940 |
for match in matches:
|
|
@@ -948,7 +948,6 @@ def main():
|
|
| 948 |
'label': none_category
|
| 949 |
}), file=fp)
|
| 950 |
|
| 951 |
-
|
| 952 |
logger.info('Write')
|
| 953 |
# Save excess items
|
| 954 |
# excess_path = os.path.join(
|
|
|
|
| 374 |
# 1 = At least one positive vote
|
| 375 |
|
| 376 |
max_segment_duration: float = field(
|
| 377 |
+
default=180, # 3 minutes
|
| 378 |
# >180 => 2.8%
|
| 379 |
# >200 => 2.1%
|
| 380 |
# >250 => 1.1%
|
| 381 |
# >300 => 0.06%
|
| 382 |
metadata={'help': 'Ignore all segments whose duration in seconds is longer than this value (negative means no limit)'})
|
| 383 |
|
|
|
|
| 384 |
min_views: int = field(
|
| 385 |
default=5, metadata={'help': 'Minimum number of views a segment must have to be considered. 0 = show all'})
|
| 386 |
|
|
|
|
| 933 |
for item in items:
|
| 934 |
parsed_item = json.loads(item) # TODO add uuid
|
| 935 |
|
| 936 |
+
matches = extract_sponsor_matches_from_text(
|
| 937 |
+
parsed_item['extracted'])
|
| 938 |
|
| 939 |
if matches:
|
| 940 |
for match in matches:
|
|
|
|
| 948 |
'label': none_category
|
| 949 |
}), file=fp)
|
| 950 |
|
|
|
|
| 951 |
logger.info('Write')
|
| 952 |
# Save excess items
|
| 953 |
# excess_path = os.path.join(
|