Spaces:
Running
on
Zero
Running
on
Zero
Upload TMIDIX.py
Browse files
TMIDIX.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
#! /usr/bin/python3
|
2 |
|
3 |
-
|
4 |
r'''###############################################################################
|
5 |
###################################################################################
|
6 |
#
|
@@ -8,7 +7,7 @@ r'''############################################################################
|
|
8 |
# Tegridy MIDI X Module (TMIDI X / tee-midi eks)
|
9 |
# Version 1.0
|
10 |
#
|
11 |
-
# NOTE: TMIDI X Module starts after the partial MIDI.py module @ line
|
12 |
#
|
13 |
# Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
|
14 |
#
|
@@ -1458,8 +1457,6 @@ import os
|
|
1458 |
|
1459 |
import datetime
|
1460 |
|
1461 |
-
import copy
|
1462 |
-
|
1463 |
from datetime import datetime
|
1464 |
|
1465 |
import secrets
|
@@ -1476,12 +1473,12 @@ import multiprocessing
|
|
1476 |
|
1477 |
from itertools import zip_longest
|
1478 |
from itertools import groupby
|
|
|
1479 |
from collections import Counter
|
|
|
1480 |
|
1481 |
from operator import itemgetter
|
1482 |
|
1483 |
-
import sys
|
1484 |
-
|
1485 |
from abc import ABC, abstractmethod
|
1486 |
|
1487 |
from difflib import SequenceMatcher as SM
|
@@ -1493,7 +1490,7 @@ import matplotlib.pyplot as plt
|
|
1493 |
|
1494 |
import psutil
|
1495 |
|
1496 |
-
|
1497 |
|
1498 |
###################################################################################
|
1499 |
#
|
@@ -4184,6 +4181,17 @@ def advanced_score_processor(raw_score,
|
|
4184 |
basic_single_track_score.append(ev)
|
4185 |
num_tracks += 1
|
4186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4187 |
basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
|
4188 |
basic_single_track_score.sort(key=lambda x: x[1])
|
4189 |
|
@@ -4198,7 +4206,7 @@ def advanced_score_processor(raw_score,
|
|
4198 |
enhanced_single_track_score.append(event)
|
4199 |
num_patch_changes += 1
|
4200 |
|
4201 |
-
if event[0] == 'note':
|
4202 |
if event[3] != 9:
|
4203 |
event.extend([patches[event[3]]])
|
4204 |
all_score_patches.extend([patches[event[3]]])
|
@@ -11300,7 +11308,7 @@ def create_files_list(datasets_paths=['./'],
|
|
11300 |
|
11301 |
files_exts = tuple(files_exts)
|
11302 |
|
11303 |
-
for dataset_addr in tqdm.tqdm(datasets_paths):
|
11304 |
for dirpath, dirnames, filenames in os.walk(dataset_addr):
|
11305 |
for file in filenames:
|
11306 |
if file not in filez_set and file.endswith(files_exts):
|
@@ -11363,6 +11371,861 @@ def has_consecutive_trend(nums, count):
|
|
11363 |
|
11364 |
return False
|
11365 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11366 |
###################################################################################
|
11367 |
# This is the end of the TMIDI X Python module
|
11368 |
###################################################################################
|
|
|
1 |
#! /usr/bin/python3
|
2 |
|
|
|
3 |
r'''###############################################################################
|
4 |
###################################################################################
|
5 |
#
|
|
|
7 |
# Tegridy MIDI X Module (TMIDI X / tee-midi eks)
|
8 |
# Version 1.0
|
9 |
#
|
10 |
+
# NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1437
|
11 |
#
|
12 |
# Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
|
13 |
#
|
|
|
1457 |
|
1458 |
import datetime
|
1459 |
|
|
|
|
|
1460 |
from datetime import datetime
|
1461 |
|
1462 |
import secrets
|
|
|
1473 |
|
1474 |
from itertools import zip_longest
|
1475 |
from itertools import groupby
|
1476 |
+
|
1477 |
from collections import Counter
|
1478 |
+
from collections import defaultdict
|
1479 |
|
1480 |
from operator import itemgetter
|
1481 |
|
|
|
|
|
1482 |
from abc import ABC, abstractmethod
|
1483 |
|
1484 |
from difflib import SequenceMatcher as SM
|
|
|
1490 |
|
1491 |
import psutil
|
1492 |
|
1493 |
+
import json
|
1494 |
|
1495 |
###################################################################################
|
1496 |
#
|
|
|
4181 |
basic_single_track_score.append(ev)
|
4182 |
num_tracks += 1
|
4183 |
|
4184 |
+
for e in basic_single_track_score:
|
4185 |
+
|
4186 |
+
if e[0] == 'note':
|
4187 |
+
e[3] = e[3] % 16
|
4188 |
+
e[4] = e[4] % 128
|
4189 |
+
e[5] = e[5] % 128
|
4190 |
+
|
4191 |
+
if e[0] == 'patch_change':
|
4192 |
+
e[2] = e[2] % 16
|
4193 |
+
e[3] = e[3] % 128
|
4194 |
+
|
4195 |
basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
|
4196 |
basic_single_track_score.sort(key=lambda x: x[1])
|
4197 |
|
|
|
4206 |
enhanced_single_track_score.append(event)
|
4207 |
num_patch_changes += 1
|
4208 |
|
4209 |
+
if event[0] == 'note':
|
4210 |
if event[3] != 9:
|
4211 |
event.extend([patches[event[3]]])
|
4212 |
all_score_patches.extend([patches[event[3]]])
|
|
|
11308 |
|
11309 |
files_exts = tuple(files_exts)
|
11310 |
|
11311 |
+
for dataset_addr in tqdm.tqdm(datasets_paths, disable=not verbose):
|
11312 |
for dirpath, dirnames, filenames in os.walk(dataset_addr):
|
11313 |
for file in filenames:
|
11314 |
if file not in filez_set and file.endswith(files_exts):
|
|
|
11371 |
|
11372 |
return False
|
11373 |
|
11374 |
+
###################################################################################
|
11375 |
+
|
11376 |
+
def escore_notes_primary_features(escore_notes):
|
11377 |
+
|
11378 |
+
#=================================================================
|
11379 |
+
|
11380 |
+
def mean(values):
|
11381 |
+
return sum(values) / len(values) if values else None
|
11382 |
+
|
11383 |
+
def std(values):
|
11384 |
+
if not values:
|
11385 |
+
return None
|
11386 |
+
m = mean(values)
|
11387 |
+
return math.sqrt(sum((x - m) ** 2 for x in values) / len(values)) if m is not None else None
|
11388 |
+
|
11389 |
+
def skew(values):
|
11390 |
+
if not values:
|
11391 |
+
return None
|
11392 |
+
m = mean(values)
|
11393 |
+
s = std(values)
|
11394 |
+
if s is None or s == 0:
|
11395 |
+
return None
|
11396 |
+
return sum(((x - m) / s) ** 3 for x in values) / len(values)
|
11397 |
+
|
11398 |
+
def kurtosis(values):
|
11399 |
+
if not values:
|
11400 |
+
return None
|
11401 |
+
m = mean(values)
|
11402 |
+
s = std(values)
|
11403 |
+
if s is None or s == 0:
|
11404 |
+
return None
|
11405 |
+
return sum(((x - m) / s) ** 4 for x in values) / len(values) - 3
|
11406 |
+
|
11407 |
+
def median(values):
|
11408 |
+
if not values:
|
11409 |
+
return None
|
11410 |
+
srt = sorted(values)
|
11411 |
+
n = len(srt)
|
11412 |
+
mid = n // 2
|
11413 |
+
if n % 2 == 0:
|
11414 |
+
return (srt[mid - 1] + srt[mid]) / 2.0
|
11415 |
+
return srt[mid]
|
11416 |
+
|
11417 |
+
def percentile(values, p):
|
11418 |
+
if not values:
|
11419 |
+
return None
|
11420 |
+
srt = sorted(values)
|
11421 |
+
n = len(srt)
|
11422 |
+
k = (n - 1) * p / 100.0
|
11423 |
+
f = int(k)
|
11424 |
+
c = k - f
|
11425 |
+
if f + 1 < n:
|
11426 |
+
return srt[f] * (1 - c) + srt[f + 1] * c
|
11427 |
+
return srt[f]
|
11428 |
+
|
11429 |
+
def diff(values):
|
11430 |
+
if not values or len(values) < 2:
|
11431 |
+
return []
|
11432 |
+
return [values[i + 1] - values[i] for i in range(len(values) - 1)]
|
11433 |
+
|
11434 |
+
def mad(values):
|
11435 |
+
if not values:
|
11436 |
+
return None
|
11437 |
+
m = median(values)
|
11438 |
+
return median([abs(x - m) for x in values])
|
11439 |
+
|
11440 |
+
def entropy(values):
|
11441 |
+
if not values:
|
11442 |
+
return None
|
11443 |
+
freq = {}
|
11444 |
+
for v in values:
|
11445 |
+
freq[v] = freq.get(v, 0) + 1
|
11446 |
+
total = len(values)
|
11447 |
+
ent = 0.0
|
11448 |
+
for count in freq.values():
|
11449 |
+
p_val = count / total
|
11450 |
+
ent -= p_val * math.log2(p_val)
|
11451 |
+
return ent
|
11452 |
+
|
11453 |
+
def mode(values):
|
11454 |
+
if not values:
|
11455 |
+
return None
|
11456 |
+
freq = {}
|
11457 |
+
for v in values:
|
11458 |
+
freq[v] = freq.get(v, 0) + 1
|
11459 |
+
max_count = max(freq.values())
|
11460 |
+
modes = [k for k, count in freq.items() if count == max_count]
|
11461 |
+
return min(modes)
|
11462 |
+
|
11463 |
+
|
11464 |
+
#=================================================================
|
11465 |
+
|
11466 |
+
sp_score = solo_piano_escore_notes(escore_notes)
|
11467 |
+
|
11468 |
+
dscore = delta_score_notes(sp_score)
|
11469 |
+
|
11470 |
+
seq = []
|
11471 |
+
|
11472 |
+
for d in dscore:
|
11473 |
+
seq.extend([d[1], d[2], d[4]])
|
11474 |
+
|
11475 |
+
#=================================================================
|
11476 |
+
|
11477 |
+
n = len(seq)
|
11478 |
+
if n % 3 != 0:
|
11479 |
+
seq = seq[: n - (n % 3)]
|
11480 |
+
arr = [seq[i:i + 3] for i in range(0, len(seq), 3)]
|
11481 |
+
|
11482 |
+
#=================================================================
|
11483 |
+
|
11484 |
+
features = {}
|
11485 |
+
|
11486 |
+
delta_times = [row[0] for row in arr]
|
11487 |
+
if delta_times:
|
11488 |
+
features['delta_times_mean'] = mean(delta_times)
|
11489 |
+
features['delta_times_std'] = std(delta_times)
|
11490 |
+
features['delta_times_min'] = min(delta_times)
|
11491 |
+
features['delta_times_max'] = max(delta_times)
|
11492 |
+
features['delta_times_skew'] = skew(delta_times)
|
11493 |
+
features['delta_times_kurtosis'] = kurtosis(delta_times)
|
11494 |
+
delta_zero_count = sum(1 for x in delta_times if x == 0)
|
11495 |
+
features['delta_times_zero_ratio'] = delta_zero_count / len(delta_times)
|
11496 |
+
nonzero_dt = [x for x in delta_times if x != 0]
|
11497 |
+
if nonzero_dt:
|
11498 |
+
features['delta_times_nonzero_mean'] = mean(nonzero_dt)
|
11499 |
+
features['delta_times_nonzero_std'] = std(nonzero_dt)
|
11500 |
+
else:
|
11501 |
+
features['delta_times_nonzero_mean'] = None
|
11502 |
+
features['delta_times_nonzero_std'] = None
|
11503 |
+
features['delta_times_mad'] = mad(delta_times)
|
11504 |
+
features['delta_times_cv'] = (features['delta_times_std'] / features['delta_times_mean']
|
11505 |
+
if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
|
11506 |
+
features['delta_times_entropy'] = entropy(delta_times)
|
11507 |
+
features['delta_times_range'] = max(delta_times) - min(delta_times)
|
11508 |
+
features['delta_times_median'] = median(delta_times)
|
11509 |
+
features['delta_times_quantile_25'] = percentile(delta_times, 25)
|
11510 |
+
features['delta_times_quantile_75'] = percentile(delta_times, 75)
|
11511 |
+
if (features['delta_times_quantile_25'] is not None and features['delta_times_quantile_75'] is not None):
|
11512 |
+
features['delta_times_iqr'] = features['delta_times_quantile_75'] - features['delta_times_quantile_25']
|
11513 |
+
else:
|
11514 |
+
features['delta_times_iqr'] = None
|
11515 |
+
else:
|
11516 |
+
for key in ['delta_times_mean', 'delta_times_std', 'delta_times_min', 'delta_times_max',
|
11517 |
+
'delta_times_skew', 'delta_times_kurtosis', 'delta_times_zero_ratio',
|
11518 |
+
'delta_times_nonzero_mean', 'delta_times_nonzero_std', 'delta_times_mad',
|
11519 |
+
'delta_times_cv', 'delta_times_entropy', 'delta_times_range', 'delta_times_median',
|
11520 |
+
'delta_times_quantile_25', 'delta_times_quantile_75', 'delta_times_iqr']:
|
11521 |
+
features[key] = None
|
11522 |
+
|
11523 |
+
#=================================================================
|
11524 |
+
|
11525 |
+
durations = [row[1] for row in arr]
|
11526 |
+
if durations:
|
11527 |
+
features['durations_mean'] = mean(durations)
|
11528 |
+
features['durations_std'] = std(durations)
|
11529 |
+
features['durations_min'] = min(durations)
|
11530 |
+
features['durations_max'] = max(durations)
|
11531 |
+
features['durations_skew'] = skew(durations)
|
11532 |
+
features['durations_kurtosis'] = kurtosis(durations)
|
11533 |
+
features['durations_mad'] = mad(durations)
|
11534 |
+
features['durations_cv'] = (features['durations_std'] / features['durations_mean']
|
11535 |
+
if features['durations_mean'] and features['durations_mean'] != 0 else None)
|
11536 |
+
features['durations_entropy'] = entropy(durations)
|
11537 |
+
features['durations_range'] = max(durations) - min(durations)
|
11538 |
+
features['durations_median'] = median(durations)
|
11539 |
+
features['durations_quantile_25'] = percentile(durations, 25)
|
11540 |
+
features['durations_quantile_75'] = percentile(durations, 75)
|
11541 |
+
if features['durations_quantile_25'] is not None and features['durations_quantile_75'] is not None:
|
11542 |
+
features['durations_iqr'] = features['durations_quantile_75'] - features['durations_quantile_25']
|
11543 |
+
else:
|
11544 |
+
features['durations_iqr'] = None
|
11545 |
+
else:
|
11546 |
+
for key in ['durations_mean', 'durations_std', 'durations_min', 'durations_max',
|
11547 |
+
'durations_skew', 'durations_kurtosis', 'durations_mad', 'durations_cv',
|
11548 |
+
'durations_entropy', 'durations_range', 'durations_median', 'durations_quantile_25',
|
11549 |
+
'durations_quantile_75', 'durations_iqr']:
|
11550 |
+
features[key] = None
|
11551 |
+
|
11552 |
+
#=================================================================
|
11553 |
+
|
11554 |
+
pitches = [row[2] for row in arr]
|
11555 |
+
if pitches:
|
11556 |
+
features['pitches_mean'] = mean(pitches)
|
11557 |
+
features['pitches_std'] = std(pitches)
|
11558 |
+
features['pitches_min'] = min(pitches)
|
11559 |
+
features['pitches_max'] = max(pitches)
|
11560 |
+
features['pitches_skew'] = skew(pitches)
|
11561 |
+
features['pitches_kurtosis'] = kurtosis(pitches)
|
11562 |
+
features['pitches_range'] = max(pitches) - min(pitches)
|
11563 |
+
features['pitches_median'] = median(pitches)
|
11564 |
+
features['pitches_quantile_25'] = percentile(pitches, 25)
|
11565 |
+
features['pitches_quantile_75'] = percentile(pitches, 75)
|
11566 |
+
if len(pitches) > 1:
|
11567 |
+
dps = diff(pitches)
|
11568 |
+
features['pitches_diff_mean'] = mean(dps)
|
11569 |
+
features['pitches_diff_std'] = std(dps)
|
11570 |
+
else:
|
11571 |
+
features['pitches_diff_mean'] = None
|
11572 |
+
features['pitches_diff_std'] = None
|
11573 |
+
features['pitches_mad'] = mad(pitches)
|
11574 |
+
if len(pitches) > 2:
|
11575 |
+
peaks = sum(1 for i in range(1, len(pitches)-1)
|
11576 |
+
if pitches[i] > pitches[i-1] and pitches[i] > pitches[i+1])
|
11577 |
+
valleys = sum(1 for i in range(1, len(pitches)-1)
|
11578 |
+
if pitches[i] < pitches[i-1] and pitches[i] < pitches[i+1])
|
11579 |
+
else:
|
11580 |
+
peaks, valleys = None, None
|
11581 |
+
features['pitches_peak_count'] = peaks
|
11582 |
+
features['pitches_valley_count'] = valleys
|
11583 |
+
if len(pitches) > 1:
|
11584 |
+
x = list(range(len(pitches)))
|
11585 |
+
denominator = (len(x) * sum(xi ** 2 for xi in x) - sum(x) ** 2)
|
11586 |
+
if denominator != 0:
|
11587 |
+
slope = (len(x) * sum(x[i] * pitches[i] for i in range(len(x))) -
|
11588 |
+
sum(x) * sum(pitches)) / denominator
|
11589 |
+
else:
|
11590 |
+
slope = None
|
11591 |
+
features['pitches_trend_slope'] = slope
|
11592 |
+
else:
|
11593 |
+
features['pitches_trend_slope'] = None
|
11594 |
+
|
11595 |
+
features['pitches_unique_count'] = len(set(pitches))
|
11596 |
+
pitch_class_hist = {i: 0 for i in range(12)}
|
11597 |
+
for p in pitches:
|
11598 |
+
pitch_class_hist[p % 12] += 1
|
11599 |
+
total_pitch = len(pitches)
|
11600 |
+
for i in range(12):
|
11601 |
+
features[f'pitches_pc_{i}'] = (pitch_class_hist[i] / total_pitch) if total_pitch > 0 else None
|
11602 |
+
|
11603 |
+
max_asc = 0
|
11604 |
+
cur_asc = 0
|
11605 |
+
max_desc = 0
|
11606 |
+
cur_desc = 0
|
11607 |
+
for i in range(1, len(pitches)):
|
11608 |
+
if pitches[i] > pitches[i-1]:
|
11609 |
+
cur_asc += 1
|
11610 |
+
max_asc = max(max_asc, cur_asc)
|
11611 |
+
cur_desc = 0
|
11612 |
+
elif pitches[i] < pitches[i-1]:
|
11613 |
+
cur_desc += 1
|
11614 |
+
max_desc = max(max_desc, cur_desc)
|
11615 |
+
cur_asc = 0
|
11616 |
+
else:
|
11617 |
+
cur_asc = 0
|
11618 |
+
cur_desc = 0
|
11619 |
+
features['pitches_max_consecutive_ascending'] = max_asc if pitches else None
|
11620 |
+
features['pitches_max_consecutive_descending'] = max_desc if pitches else None
|
11621 |
+
p_intervals = diff(pitches)
|
11622 |
+
features['pitches_median_diff'] = median(p_intervals) if p_intervals else None
|
11623 |
+
if p_intervals:
|
11624 |
+
dc = sum(1 for i in range(1, len(p_intervals))
|
11625 |
+
if (p_intervals[i] > 0 and p_intervals[i-1] < 0) or (p_intervals[i] < 0 and p_intervals[i-1] > 0))
|
11626 |
+
features['pitches_direction_changes'] = dc
|
11627 |
+
else:
|
11628 |
+
features['pitches_direction_changes'] = None
|
11629 |
+
else:
|
11630 |
+
for key in (['pitches_mean', 'pitches_std', 'pitches_min', 'pitches_max', 'pitches_skew',
|
11631 |
+
'pitches_kurtosis', 'pitches_range', 'pitches_median', 'pitches_quantile_25',
|
11632 |
+
'pitches_quantile_75', 'pitches_diff_mean', 'pitches_diff_std', 'pitches_mad',
|
11633 |
+
'pitches_peak_count', 'pitches_valley_count', 'pitches_trend_slope',
|
11634 |
+
'pitches_unique_count', 'pitches_max_consecutive_ascending', 'pitches_max_consecutive_descending',
|
11635 |
+
'pitches_median_diff', 'pitches_direction_changes'] +
|
11636 |
+
[f'pitches_pc_{i}' for i in range(12)]):
|
11637 |
+
features[key] = None
|
11638 |
+
|
11639 |
+
#=================================================================
|
11640 |
+
|
11641 |
+
overall = [x for row in arr for x in row]
|
11642 |
+
if overall:
|
11643 |
+
features['overall_mean'] = mean(overall)
|
11644 |
+
features['overall_std'] = std(overall)
|
11645 |
+
features['overall_min'] = min(overall)
|
11646 |
+
features['overall_max'] = max(overall)
|
11647 |
+
features['overall_cv'] = (features['overall_std'] / features['overall_mean']
|
11648 |
+
if features['overall_mean'] and features['overall_mean'] != 0 else None)
|
11649 |
+
else:
|
11650 |
+
for key in ['overall_mean', 'overall_std', 'overall_min', 'overall_max', 'overall_cv']:
|
11651 |
+
features[key] = None
|
11652 |
+
|
11653 |
+
#=================================================================
|
11654 |
+
|
11655 |
+
onsets = []
|
11656 |
+
cumulative = 0
|
11657 |
+
for dt in delta_times:
|
11658 |
+
onsets.append(cumulative)
|
11659 |
+
cumulative += dt
|
11660 |
+
if onsets and durations:
|
11661 |
+
overall_piece_duration = onsets[-1] + durations[-1]
|
11662 |
+
else:
|
11663 |
+
overall_piece_duration = None
|
11664 |
+
features['overall_piece_duration'] = overall_piece_duration
|
11665 |
+
features['overall_notes_density'] = (len(arr) / overall_piece_duration
|
11666 |
+
if overall_piece_duration and overall_piece_duration > 0 else None)
|
11667 |
+
features['rhythm_ratio'] = (features['durations_mean'] / features['delta_times_mean']
|
11668 |
+
if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
|
11669 |
+
features['overall_sum_delta_times'] = (sum(delta_times) if delta_times else None)
|
11670 |
+
features['overall_sum_durations'] = (sum(durations) if durations else None)
|
11671 |
+
features['overall_voicing_ratio'] = (sum(durations) / overall_piece_duration
|
11672 |
+
if overall_piece_duration and durations else None)
|
11673 |
+
features['overall_onset_std'] = std(onsets) if onsets else None
|
11674 |
+
|
11675 |
+
#=================================================================
|
11676 |
+
|
11677 |
+
chords_raw = []
|
11678 |
+
chords_pc = []
|
11679 |
+
current_group = []
|
11680 |
+
for i, note in enumerate(arr):
|
11681 |
+
dt = note[0]
|
11682 |
+
if i == 0:
|
11683 |
+
current_group = [i]
|
11684 |
+
else:
|
11685 |
+
if dt == 0:
|
11686 |
+
current_group.append(i)
|
11687 |
+
else:
|
11688 |
+
if len(current_group) >= 2:
|
11689 |
+
chord_notes = [arr[j][2] for j in current_group]
|
11690 |
+
chords_raw.append(tuple(sorted(chord_notes)))
|
11691 |
+
chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
|
11692 |
+
|
11693 |
+
current_group = [i]
|
11694 |
+
|
11695 |
+
if current_group and len(current_group) >= 2:
|
11696 |
+
chord_notes = [arr[j][2] for j in current_group]
|
11697 |
+
chords_raw.append(tuple(sorted(chord_notes)))
|
11698 |
+
chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
|
11699 |
+
|
11700 |
+
if chords_raw:
|
11701 |
+
chord_count = len(chords_raw)
|
11702 |
+
features['chords_count'] = chord_count
|
11703 |
+
features['chords_density'] = (chord_count / overall_piece_duration
|
11704 |
+
if overall_piece_duration and chord_count is not None else None)
|
11705 |
+
chord_sizes = [len(ch) for ch in chords_raw]
|
11706 |
+
features['chords_size_mean'] = mean(chord_sizes)
|
11707 |
+
features['chords_size_std'] = std(chord_sizes)
|
11708 |
+
features['chords_size_min'] = min(chord_sizes) if chord_sizes else None
|
11709 |
+
features['chords_size_max'] = max(chord_sizes) if chord_sizes else None
|
11710 |
+
features['chords_unique_raw_count'] = len(set(chords_raw))
|
11711 |
+
features['chords_unique_pc_count'] = len(set(chords_pc))
|
11712 |
+
features['chords_entropy_raw'] = entropy(chords_raw)
|
11713 |
+
features['chords_entropy_pc'] = entropy(chords_pc)
|
11714 |
+
if len(chords_raw) > 1:
|
11715 |
+
rep_raw = sum(1 for i in range(1, len(chords_raw)) if chords_raw[i] == chords_raw[i - 1])
|
11716 |
+
features['chords_repeat_ratio_raw'] = rep_raw / (len(chords_raw) - 1)
|
11717 |
+
else:
|
11718 |
+
features['chords_repeat_ratio_raw'] = None
|
11719 |
+
if len(chords_pc) > 1:
|
11720 |
+
rep_pc = sum(1 for i in range(1, len(chords_pc)) if chords_pc[i] == chords_pc[i - 1])
|
11721 |
+
features['chords_repeat_ratio_pc'] = rep_pc / (len(chords_pc) - 1)
|
11722 |
+
else:
|
11723 |
+
features['chords_repeat_ratio_pc'] = None
|
11724 |
+
if len(chords_raw) > 1:
|
11725 |
+
bigrams_raw = [(chords_raw[i], chords_raw[i + 1]) for i in range(len(chords_raw) - 1)]
|
11726 |
+
features['chords_bigram_entropy_raw'] = entropy(bigrams_raw)
|
11727 |
+
else:
|
11728 |
+
features['chords_bigram_entropy_raw'] = None
|
11729 |
+
if len(chords_pc) > 1:
|
11730 |
+
bigrams_pc = [(chords_pc[i], chords_pc[i + 1]) for i in range(len(chords_pc) - 1)]
|
11731 |
+
features['chords_bigram_entropy_pc'] = entropy(bigrams_pc)
|
11732 |
+
else:
|
11733 |
+
features['chords_bigram_entropy_pc'] = None
|
11734 |
+
features['chords_mode_raw'] = mode(chords_raw)
|
11735 |
+
features['chords_mode_pc'] = mode(chords_pc)
|
11736 |
+
if chords_pc:
|
11737 |
+
pc_sizes = [len(ch) for ch in chords_pc]
|
11738 |
+
features['chords_pc_size_mean'] = mean(pc_sizes)
|
11739 |
+
else:
|
11740 |
+
features['chords_pc_size_mean'] = None
|
11741 |
+
else:
|
11742 |
+
for key in ['chords_count', 'chords_density', 'chords_size_mean', 'chords_size_std',
|
11743 |
+
'chords_size_min', 'chords_size_max', 'chords_unique_raw_count', 'chords_unique_pc_count',
|
11744 |
+
'chords_entropy_raw', 'chords_entropy_pc', 'chords_repeat_ratio_raw', 'chords_repeat_ratio_pc',
|
11745 |
+
'chords_bigram_entropy_raw', 'chords_bigram_entropy_pc', 'chords_mode_raw', 'chords_mode_pc',
|
11746 |
+
'chords_pc_size_mean']:
|
11747 |
+
features[key] = None
|
11748 |
+
|
11749 |
+
#=================================================================
|
11750 |
+
|
11751 |
+
if delta_times:
|
11752 |
+
med_dt = features['delta_times_median']
|
11753 |
+
iqr_dt = features['delta_times_iqr']
|
11754 |
+
threshold_a = med_dt + 1.5 * iqr_dt if med_dt is not None and iqr_dt is not None else None
|
11755 |
+
threshold_b = percentile(delta_times, 90)
|
11756 |
+
if threshold_a is not None and threshold_b is not None:
|
11757 |
+
phrase_threshold = max(threshold_a, threshold_b)
|
11758 |
+
elif threshold_a is not None:
|
11759 |
+
phrase_threshold = threshold_a
|
11760 |
+
elif threshold_b is not None:
|
11761 |
+
phrase_threshold = threshold_b
|
11762 |
+
else:
|
11763 |
+
phrase_threshold = None
|
11764 |
+
else:
|
11765 |
+
phrase_threshold = None
|
11766 |
+
|
11767 |
+
phrases = []
|
11768 |
+
current_phrase = []
|
11769 |
+
if onsets:
|
11770 |
+
current_phrase.append(0)
|
11771 |
+
for i in range(len(onsets) - 1):
|
11772 |
+
gap = onsets[i + 1] - onsets[i]
|
11773 |
+
if phrase_threshold is not None and gap > phrase_threshold:
|
11774 |
+
phrases.append(current_phrase)
|
11775 |
+
current_phrase = []
|
11776 |
+
current_phrase.append(i + 1)
|
11777 |
+
if current_phrase:
|
11778 |
+
phrases.append(current_phrase)
|
11779 |
+
if phrases:
|
11780 |
+
phrase_note_counts = []
|
11781 |
+
phrase_durations = []
|
11782 |
+
phrase_densities = []
|
11783 |
+
phrase_mean_pitches = []
|
11784 |
+
phrase_pitch_ranges = []
|
11785 |
+
phrase_start_times = []
|
11786 |
+
phrase_end_times = []
|
11787 |
+
for phrase in phrases:
|
11788 |
+
note_count = len(phrase)
|
11789 |
+
phrase_note_counts.append(note_count)
|
11790 |
+
ph_start = onsets[phrase[0]]
|
11791 |
+
ph_end = onsets[phrase[-1]] + durations[phrase[-1]]
|
11792 |
+
phrase_start_times.append(ph_start)
|
11793 |
+
phrase_end_times.append(ph_end)
|
11794 |
+
ph_duration = ph_end - ph_start
|
11795 |
+
phrase_durations.append(ph_duration)
|
11796 |
+
density = note_count / ph_duration if ph_duration > 0 else None
|
11797 |
+
phrase_densities.append(density)
|
11798 |
+
ph_pitches = [pitches[i] for i in phrase if i < len(pitches)]
|
11799 |
+
phrase_mean_pitches.append(mean(ph_pitches) if ph_pitches else None)
|
11800 |
+
phrase_pitch_ranges.append((max(ph_pitches) - min(ph_pitches)) if ph_pitches else None)
|
11801 |
+
if len(phrases) > 1:
|
11802 |
+
phrase_gaps = []
|
11803 |
+
for i in range(len(phrases) - 1):
|
11804 |
+
gap = phrase_start_times[i + 1] - phrase_end_times[i]
|
11805 |
+
phrase_gaps.append(gap if gap > 0 else 0)
|
11806 |
+
else:
|
11807 |
+
phrase_gaps = []
|
11808 |
+
features['phrases_count'] = len(phrases)
|
11809 |
+
features['phrases_avg_note_count'] = mean(phrase_note_counts) if phrase_note_counts else None
|
11810 |
+
features['phrases_std_note_count'] = std(phrase_note_counts) if phrase_note_counts else None
|
11811 |
+
features['phrases_min_note_count'] = min(phrase_note_counts) if phrase_note_counts else None
|
11812 |
+
features['phrases_max_note_count'] = max(phrase_note_counts) if phrase_note_counts else None
|
11813 |
+
features['phrases_avg_duration'] = mean(phrase_durations) if phrase_durations else None
|
11814 |
+
features['phrases_std_duration'] = std(phrase_durations) if phrase_durations else None
|
11815 |
+
features['phrases_min_duration'] = min(phrase_durations) if phrase_durations else None
|
11816 |
+
features['phrases_max_duration'] = max(phrase_durations) if phrase_durations else None
|
11817 |
+
features['phrases_avg_density'] = mean(phrase_densities) if phrase_densities else None
|
11818 |
+
features['phrases_std_density'] = std(phrase_densities) if phrase_densities else None
|
11819 |
+
features['phrases_avg_mean_pitch'] = mean(phrase_mean_pitches) if phrase_mean_pitches else None
|
11820 |
+
features['phrases_avg_pitch_range'] = mean(phrase_pitch_ranges) if phrase_pitch_ranges else None
|
11821 |
+
if phrase_gaps:
|
11822 |
+
features['phrases_avg_gap'] = mean(phrase_gaps)
|
11823 |
+
features['phrases_std_gap'] = std(phrase_gaps)
|
11824 |
+
features['phrases_min_gap'] = min(phrase_gaps)
|
11825 |
+
features['phrases_max_gap'] = max(phrase_gaps)
|
11826 |
+
else:
|
11827 |
+
features['phrases_avg_gap'] = None
|
11828 |
+
features['phrases_std_gap'] = None
|
11829 |
+
features['phrases_min_gap'] = None
|
11830 |
+
features['phrases_max_gap'] = None
|
11831 |
+
features['phrases_threshold'] = phrase_threshold
|
11832 |
+
else:
|
11833 |
+
for key in ['phrases_count', 'phrases_avg_note_count', 'phrases_std_note_count',
|
11834 |
+
'phrases_min_note_count', 'phrases_max_note_count', 'phrases_avg_duration',
|
11835 |
+
'phrases_std_duration', 'phrases_min_duration', 'phrases_max_duration',
|
11836 |
+
'phrases_avg_density', 'phrases_std_density', 'phrases_avg_mean_pitch',
|
11837 |
+
'phrases_avg_pitch_range', 'phrases_avg_gap', 'phrases_std_gap',
|
11838 |
+
'phrases_min_gap', 'phrases_max_gap', 'phrases_threshold']:
|
11839 |
+
features[key] = None
|
11840 |
+
|
11841 |
+
#=================================================================
|
11842 |
+
|
11843 |
+
return features
|
11844 |
+
|
11845 |
+
###################################################################################
|
11846 |
+
|
11847 |
+
def winsorized_normalize(data, new_range=(0, 255), clip=1.5):
|
11848 |
+
|
11849 |
+
#=================================================================
|
11850 |
+
|
11851 |
+
new_min, new_max = new_range
|
11852 |
+
|
11853 |
+
#=================================================================
|
11854 |
+
|
11855 |
+
def percentile(values, p):
|
11856 |
+
|
11857 |
+
srt = sorted(values)
|
11858 |
+
n = len(srt)
|
11859 |
+
if n == 1:
|
11860 |
+
return srt[0]
|
11861 |
+
k = (n - 1) * p / 100.0
|
11862 |
+
f = int(k)
|
11863 |
+
c = k - f
|
11864 |
+
if f + 1 < n:
|
11865 |
+
return srt[f] * (1 - c) + srt[f + 1] * c
|
11866 |
+
|
11867 |
+
return srt[f]
|
11868 |
+
|
11869 |
+
#=================================================================
|
11870 |
+
|
11871 |
+
q1 = percentile(data, 25)
|
11872 |
+
q3 = percentile(data, 75)
|
11873 |
+
iqr = q3 - q1
|
11874 |
+
|
11875 |
+
lower_bound_w = q1 - clip * iqr
|
11876 |
+
upper_bound_w = q3 + clip * iqr
|
11877 |
+
|
11878 |
+
data_min = min(data)
|
11879 |
+
data_max = max(data)
|
11880 |
+
effective_low = max(lower_bound_w, data_min)
|
11881 |
+
effective_high = min(upper_bound_w, data_max)
|
11882 |
+
|
11883 |
+
#=================================================================
|
11884 |
+
|
11885 |
+
if effective_high == effective_low:
|
11886 |
+
|
11887 |
+
if data_max == data_min:
|
11888 |
+
return [int(new_min)] * len(data)
|
11889 |
+
|
11890 |
+
normalized = [(x - data_min) / (data_max - data_min) for x in data]
|
11891 |
+
|
11892 |
+
return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
|
11893 |
+
|
11894 |
+
#=================================================================
|
11895 |
+
|
11896 |
+
clipped = [x if x >= effective_low else effective_low for x in data]
|
11897 |
+
clipped = [x if x <= effective_high else effective_high for x in clipped]
|
11898 |
+
|
11899 |
+
normalized = [(x - effective_low) / (effective_high - effective_low) for x in clipped]
|
11900 |
+
|
11901 |
+
#=================================================================
|
11902 |
+
|
11903 |
+
return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
|
11904 |
+
|
11905 |
+
###################################################################################
|
11906 |
+
|
11907 |
+
def tokenize_features_to_ints_winsorized(features, new_range=(0, 255), clip=1.5, none_token=-1):
|
11908 |
+
|
11909 |
+
values = []
|
11910 |
+
tokens = []
|
11911 |
+
|
11912 |
+
#=================================================================
|
11913 |
+
|
11914 |
+
def process_value(val):
|
11915 |
+
|
11916 |
+
if isinstance(val, (int, float)):
|
11917 |
+
return int(round(abs(val)))
|
11918 |
+
|
11919 |
+
elif isinstance(val, (list, tuple)):
|
11920 |
+
return int(round(abs(sum(val) / len(val))))
|
11921 |
+
|
11922 |
+
else:
|
11923 |
+
return int(abs(hash(val)) % (10 ** 8))
|
11924 |
+
|
11925 |
+
#=================================================================
|
11926 |
+
|
11927 |
+
for key in sorted(features.keys()):
|
11928 |
+
|
11929 |
+
value = features[key]
|
11930 |
+
|
11931 |
+
if value is None:
|
11932 |
+
tokens.append(none_token)
|
11933 |
+
values.append(none_token)
|
11934 |
+
|
11935 |
+
else:
|
11936 |
+
tokens.append(process_value(value))
|
11937 |
+
|
11938 |
+
if isinstance(value, (list, tuple)):
|
11939 |
+
values.append(sum(value) / len(value))
|
11940 |
+
|
11941 |
+
else:
|
11942 |
+
values.append(value)
|
11943 |
+
|
11944 |
+
#=================================================================
|
11945 |
+
|
11946 |
+
norm_tokens = winsorized_normalize(tokens, new_range, clip)
|
11947 |
+
|
11948 |
+
#=================================================================
|
11949 |
+
|
11950 |
+
return values, tokens, norm_tokens
|
11951 |
+
|
11952 |
+
###################################################################################
|
11953 |
+
|
11954 |
+
def write_jsonl(records_dicts_list,
|
11955 |
+
file_name='data',
|
11956 |
+
file_ext='.jsonl',
|
11957 |
+
file_mode='w',
|
11958 |
+
line_sep='\n',
|
11959 |
+
verbose=True
|
11960 |
+
):
|
11961 |
+
|
11962 |
+
if verbose:
|
11963 |
+
print('=' * 70)
|
11964 |
+
print('Writing', len(records_dicts_list), 'records to jsonl file...')
|
11965 |
+
print('=' * 70)
|
11966 |
+
|
11967 |
+
if not os.path.splitext(file_name)[1]:
|
11968 |
+
file_name += file_ext
|
11969 |
+
|
11970 |
+
l_count = 0
|
11971 |
+
|
11972 |
+
with open(file_name, mode=file_mode) as f:
|
11973 |
+
for record in tqdm.tqdm(records_dicts_list, disable=not verbose):
|
11974 |
+
f.write(json.dumps(record) + line_sep)
|
11975 |
+
l_count += 1
|
11976 |
+
|
11977 |
+
f.close()
|
11978 |
+
|
11979 |
+
if verbose:
|
11980 |
+
print('=' * 70)
|
11981 |
+
print('Written total of', l_count, 'jsonl records.')
|
11982 |
+
print('=' * 70)
|
11983 |
+
print('Done!')
|
11984 |
+
print('=' * 70)
|
11985 |
+
|
11986 |
+
###################################################################################
|
11987 |
+
|
11988 |
+
def read_jsonl(file_name='data',
|
11989 |
+
file_ext='.jsonl',
|
11990 |
+
verbose=True
|
11991 |
+
):
|
11992 |
+
|
11993 |
+
if verbose:
|
11994 |
+
print('=' * 70)
|
11995 |
+
print('Reading jsonl file...')
|
11996 |
+
print('=' * 70)
|
11997 |
+
|
11998 |
+
if not os.path.splitext(file_name)[1]:
|
11999 |
+
file_name += file_ext
|
12000 |
+
|
12001 |
+
with open(file_name, 'r') as f:
|
12002 |
+
|
12003 |
+
records = []
|
12004 |
+
gl_count = 0
|
12005 |
+
|
12006 |
+
for i, line in tqdm.tqdm(enumerate(f), disable=not verbose):
|
12007 |
+
|
12008 |
+
try:
|
12009 |
+
record = json.loads(line)
|
12010 |
+
records.append(record)
|
12011 |
+
gl_count += 1
|
12012 |
+
|
12013 |
+
except KeyboardInterrupt:
|
12014 |
+
if verbose:
|
12015 |
+
print('=' * 70)
|
12016 |
+
print('Stoping...')
|
12017 |
+
print('=' * 70)
|
12018 |
+
|
12019 |
+
f.close()
|
12020 |
+
|
12021 |
+
return records
|
12022 |
+
|
12023 |
+
except json.JSONDecodeError:
|
12024 |
+
if verbose:
|
12025 |
+
print('=' * 70)
|
12026 |
+
print('[ERROR] Line', i, 'is corrupted! Skipping it...')
|
12027 |
+
print('=' * 70)
|
12028 |
+
|
12029 |
+
continue
|
12030 |
+
|
12031 |
+
f.close()
|
12032 |
+
|
12033 |
+
if verbose:
|
12034 |
+
print('=' * 70)
|
12035 |
+
print('Loaded total of', gl_count, 'jsonl records.')
|
12036 |
+
print('=' * 70)
|
12037 |
+
print('Done!')
|
12038 |
+
print('=' * 70)
|
12039 |
+
|
12040 |
+
return records
|
12041 |
+
|
12042 |
+
###################################################################################
|
12043 |
+
|
12044 |
+
def read_jsonl_lines(lines_indexes_list,
|
12045 |
+
file_name='data',
|
12046 |
+
file_ext='.jsonl',
|
12047 |
+
verbose=True
|
12048 |
+
):
|
12049 |
+
|
12050 |
+
if verbose:
|
12051 |
+
print('=' * 70)
|
12052 |
+
print('Reading jsonl file...')
|
12053 |
+
print('=' * 70)
|
12054 |
+
|
12055 |
+
if not os.path.splitext(file_name)[1]:
|
12056 |
+
file_name += file_ext
|
12057 |
+
|
12058 |
+
records = []
|
12059 |
+
l_count = 0
|
12060 |
+
|
12061 |
+
lines_indexes_list.sort(reverse=True)
|
12062 |
+
|
12063 |
+
with open(file_name, 'r') as f:
|
12064 |
+
for current_line_number, line in tqdm.tqdm(enumerate(f)):
|
12065 |
+
|
12066 |
+
try:
|
12067 |
+
if current_line_number in lines_indexes_list:
|
12068 |
+
record = json.loads(line)
|
12069 |
+
records.append(record)
|
12070 |
+
lines_indexes_list = lines_indexes_list[:-1]
|
12071 |
+
l_count += 1
|
12072 |
+
|
12073 |
+
if not lines_indexes_list:
|
12074 |
+
break
|
12075 |
+
|
12076 |
+
except KeyboardInterrupt:
|
12077 |
+
if verbose:
|
12078 |
+
print('=' * 70)
|
12079 |
+
print('Stoping...')
|
12080 |
+
print('=' * 70)
|
12081 |
+
|
12082 |
+
f.close()
|
12083 |
+
|
12084 |
+
return records
|
12085 |
+
|
12086 |
+
except json.JSONDecodeError:
|
12087 |
+
if verbose:
|
12088 |
+
print('=' * 70)
|
12089 |
+
print('[ERROR] Line', current_line_number, 'is corrupted! Skipping it...')
|
12090 |
+
print('=' * 70)
|
12091 |
+
|
12092 |
+
continue
|
12093 |
+
|
12094 |
+
f.close()
|
12095 |
+
|
12096 |
+
if verbose:
|
12097 |
+
print('=' * 70)
|
12098 |
+
print('Loaded total of', l_count, 'jsonl records.')
|
12099 |
+
print('=' * 70)
|
12100 |
+
print('Done!')
|
12101 |
+
print('=' * 70)
|
12102 |
+
|
12103 |
+
return records
|
12104 |
+
|
12105 |
+
###################################################################################
|
12106 |
+
|
12107 |
+
def compute_base(x: int, n: int) -> int:
|
12108 |
+
|
12109 |
+
if x < 0:
|
12110 |
+
raise ValueError("x must be non-negative.")
|
12111 |
+
if x == 0:
|
12112 |
+
return 2
|
12113 |
+
|
12114 |
+
b = max(2, int(x ** (1 / n)))
|
12115 |
+
|
12116 |
+
if b ** n <= x:
|
12117 |
+
b += 1
|
12118 |
+
|
12119 |
+
return b
|
12120 |
+
|
12121 |
+
###################################################################################
|
12122 |
+
|
12123 |
+
def encode_int_auto(x: int, n: int) -> tuple[int, list[int]]:
|
12124 |
+
|
12125 |
+
base = compute_base(x, n)
|
12126 |
+
digits = [0] * n
|
12127 |
+
|
12128 |
+
for i in range(n - 1, -1, -1):
|
12129 |
+
digits[i] = x % base
|
12130 |
+
x //= base
|
12131 |
+
|
12132 |
+
return base, digits
|
12133 |
+
|
12134 |
+
###################################################################################
|
12135 |
+
|
12136 |
+
def decode_int_auto(base: int, digits: list[int]) -> int:
|
12137 |
+
|
12138 |
+
x = 0
|
12139 |
+
for digit in digits:
|
12140 |
+
if digit < 0 or digit >= base:
|
12141 |
+
raise ValueError(f"Each digit must be in the range 0 to {base - 1}. Invalid digit: {digit}")
|
12142 |
+
|
12143 |
+
x = x * base + digit
|
12144 |
+
|
12145 |
+
return x
|
12146 |
+
|
12147 |
+
###################################################################################
|
12148 |
+
|
12149 |
+
def encode_int_manual(x, base, n):
|
12150 |
+
|
12151 |
+
digits = [0] * n
|
12152 |
+
|
12153 |
+
for i in range(n - 1, -1, -1):
|
12154 |
+
digits[i] = x % base
|
12155 |
+
x //= base
|
12156 |
+
|
12157 |
+
return digits
|
12158 |
+
|
12159 |
+
###################################################################################
|
12160 |
+
|
12161 |
+
def escore_notes_pitches_chords_signature(escore_notes,
|
12162 |
+
max_patch=128,
|
12163 |
+
sort_by_counts=False,
|
12164 |
+
use_full_chords=False
|
12165 |
+
):
|
12166 |
+
|
12167 |
+
escore_notes = [e for e in escore_notes if e[6] <= max_patch % 129]
|
12168 |
+
|
12169 |
+
if escore_notes:
|
12170 |
+
|
12171 |
+
cscore = chordify_score([1000, escore_notes])
|
12172 |
+
|
12173 |
+
sig = []
|
12174 |
+
dsig = []
|
12175 |
+
|
12176 |
+
drums_offset = 321 + 128
|
12177 |
+
|
12178 |
+
bad_chords_counter = 0
|
12179 |
+
|
12180 |
+
for c in cscore:
|
12181 |
+
|
12182 |
+
all_pitches = [e[4] if e[3] != 9 else e[4]+128 for e in c]
|
12183 |
+
chord = sorted(set(all_pitches))
|
12184 |
+
|
12185 |
+
pitches = sorted([p for p in chord if p < 128], reverse=True)
|
12186 |
+
drums = [(d+drums_offset)-128 for d in chord if d > 127]
|
12187 |
+
|
12188 |
+
if pitches:
|
12189 |
+
if len(pitches) > 1:
|
12190 |
+
tones_chord = sorted(set([p % 12 for p in pitches]))
|
12191 |
+
|
12192 |
+
try:
|
12193 |
+
sig_token = ALL_CHORDS_SORTED.index(tones_chord) + 128
|
12194 |
+
except:
|
12195 |
+
checked_tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=use_full_chords)
|
12196 |
+
sig_token = ALL_CHORDS_SORTED.index(checked_tones_chord) + 128
|
12197 |
+
bad_chords_counter += 1
|
12198 |
+
|
12199 |
+
elif len(pitches) == 1:
|
12200 |
+
sig_token = pitches[0]
|
12201 |
+
|
12202 |
+
sig.append(sig_token)
|
12203 |
+
|
12204 |
+
if drums:
|
12205 |
+
dsig.extend(drums)
|
12206 |
+
|
12207 |
+
sig_p = {}
|
12208 |
+
|
12209 |
+
for item in sig+dsig:
|
12210 |
+
|
12211 |
+
if item in sig_p:
|
12212 |
+
sig_p[item] += 1
|
12213 |
+
|
12214 |
+
else:
|
12215 |
+
sig_p[item] = 1
|
12216 |
+
|
12217 |
+
sig_p[-1] = bad_chords_counter
|
12218 |
+
|
12219 |
+
fsig = [list(v) for v in sig_p.items()]
|
12220 |
+
|
12221 |
+
if sort_by_counts:
|
12222 |
+
fsig.sort(key=lambda x: x[1], reverse=True)
|
12223 |
+
|
12224 |
+
return fsig
|
12225 |
+
|
12226 |
+
else:
|
12227 |
+
return []
|
12228 |
+
|
12229 |
###################################################################################
|
12230 |
# This is the end of the TMIDI X Python module
|
12231 |
###################################################################################
|