asigalov61 commited on
Commit
d04f7e4
·
verified ·
1 Parent(s): 7dcbfde

Upload TMIDIX.py

Browse files
Files changed (1) hide show
  1. TMIDIX.py +872 -9
TMIDIX.py CHANGED
@@ -1,6 +1,5 @@
1
  #! /usr/bin/python3
2
 
3
-
4
  r'''###############################################################################
5
  ###################################################################################
6
  #
@@ -8,7 +7,7 @@ r'''############################################################################
8
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
9
  # Version 1.0
10
  #
11
- # NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1438
12
  #
13
  # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
14
  #
@@ -1458,8 +1457,6 @@ import os
1458
 
1459
  import datetime
1460
 
1461
- import copy
1462
-
1463
  from datetime import datetime
1464
 
1465
  import secrets
@@ -1476,12 +1473,12 @@ import multiprocessing
1476
 
1477
  from itertools import zip_longest
1478
  from itertools import groupby
 
1479
  from collections import Counter
 
1480
 
1481
  from operator import itemgetter
1482
 
1483
- import sys
1484
-
1485
  from abc import ABC, abstractmethod
1486
 
1487
  from difflib import SequenceMatcher as SM
@@ -1493,7 +1490,7 @@ import matplotlib.pyplot as plt
1493
 
1494
  import psutil
1495
 
1496
- from collections import defaultdict
1497
 
1498
  ###################################################################################
1499
  #
@@ -4184,6 +4181,17 @@ def advanced_score_processor(raw_score,
4184
  basic_single_track_score.append(ev)
4185
  num_tracks += 1
4186
 
 
 
 
 
 
 
 
 
 
 
 
4187
  basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
4188
  basic_single_track_score.sort(key=lambda x: x[1])
4189
 
@@ -4198,7 +4206,7 @@ def advanced_score_processor(raw_score,
4198
  enhanced_single_track_score.append(event)
4199
  num_patch_changes += 1
4200
 
4201
- if event[0] == 'note':
4202
  if event[3] != 9:
4203
  event.extend([patches[event[3]]])
4204
  all_score_patches.extend([patches[event[3]]])
@@ -11300,7 +11308,7 @@ def create_files_list(datasets_paths=['./'],
11300
 
11301
  files_exts = tuple(files_exts)
11302
 
11303
- for dataset_addr in tqdm.tqdm(datasets_paths):
11304
  for dirpath, dirnames, filenames in os.walk(dataset_addr):
11305
  for file in filenames:
11306
  if file not in filez_set and file.endswith(files_exts):
@@ -11363,6 +11371,861 @@ def has_consecutive_trend(nums, count):
11363
 
11364
  return False
11365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11366
  ###################################################################################
11367
  # This is the end of the TMIDI X Python module
11368
  ###################################################################################
 
1
  #! /usr/bin/python3
2
 
 
3
  r'''###############################################################################
4
  ###################################################################################
5
  #
 
7
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
8
  # Version 1.0
9
  #
10
+ # NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1437
11
  #
12
  # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
13
  #
 
1457
 
1458
  import datetime
1459
 
 
 
1460
  from datetime import datetime
1461
 
1462
  import secrets
 
1473
 
1474
  from itertools import zip_longest
1475
  from itertools import groupby
1476
+
1477
  from collections import Counter
1478
+ from collections import defaultdict
1479
 
1480
  from operator import itemgetter
1481
 
 
 
1482
  from abc import ABC, abstractmethod
1483
 
1484
  from difflib import SequenceMatcher as SM
 
1490
 
1491
  import psutil
1492
 
1493
+ import json
1494
 
1495
  ###################################################################################
1496
  #
 
4181
  basic_single_track_score.append(ev)
4182
  num_tracks += 1
4183
 
4184
+ for e in basic_single_track_score:
4185
+
4186
+ if e[0] == 'note':
4187
+ e[3] = e[3] % 16
4188
+ e[4] = e[4] % 128
4189
+ e[5] = e[5] % 128
4190
+
4191
+ if e[0] == 'patch_change':
4192
+ e[2] = e[2] % 16
4193
+ e[3] = e[3] % 128
4194
+
4195
  basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
4196
  basic_single_track_score.sort(key=lambda x: x[1])
4197
 
 
4206
  enhanced_single_track_score.append(event)
4207
  num_patch_changes += 1
4208
 
4209
+ if event[0] == 'note':
4210
  if event[3] != 9:
4211
  event.extend([patches[event[3]]])
4212
  all_score_patches.extend([patches[event[3]]])
 
11308
 
11309
  files_exts = tuple(files_exts)
11310
 
11311
+ for dataset_addr in tqdm.tqdm(datasets_paths, disable=not verbose):
11312
  for dirpath, dirnames, filenames in os.walk(dataset_addr):
11313
  for file in filenames:
11314
  if file not in filez_set and file.endswith(files_exts):
 
11371
 
11372
  return False
11373
 
11374
+ ###################################################################################
11375
+
11376
+ def escore_notes_primary_features(escore_notes):
11377
+
11378
+ #=================================================================
11379
+
11380
+ def mean(values):
11381
+ return sum(values) / len(values) if values else None
11382
+
11383
+ def std(values):
11384
+ if not values:
11385
+ return None
11386
+ m = mean(values)
11387
+ return math.sqrt(sum((x - m) ** 2 for x in values) / len(values)) if m is not None else None
11388
+
11389
+ def skew(values):
11390
+ if not values:
11391
+ return None
11392
+ m = mean(values)
11393
+ s = std(values)
11394
+ if s is None or s == 0:
11395
+ return None
11396
+ return sum(((x - m) / s) ** 3 for x in values) / len(values)
11397
+
11398
+ def kurtosis(values):
11399
+ if not values:
11400
+ return None
11401
+ m = mean(values)
11402
+ s = std(values)
11403
+ if s is None or s == 0:
11404
+ return None
11405
+ return sum(((x - m) / s) ** 4 for x in values) / len(values) - 3
11406
+
11407
+ def median(values):
11408
+ if not values:
11409
+ return None
11410
+ srt = sorted(values)
11411
+ n = len(srt)
11412
+ mid = n // 2
11413
+ if n % 2 == 0:
11414
+ return (srt[mid - 1] + srt[mid]) / 2.0
11415
+ return srt[mid]
11416
+
11417
+ def percentile(values, p):
11418
+ if not values:
11419
+ return None
11420
+ srt = sorted(values)
11421
+ n = len(srt)
11422
+ k = (n - 1) * p / 100.0
11423
+ f = int(k)
11424
+ c = k - f
11425
+ if f + 1 < n:
11426
+ return srt[f] * (1 - c) + srt[f + 1] * c
11427
+ return srt[f]
11428
+
11429
+ def diff(values):
11430
+ if not values or len(values) < 2:
11431
+ return []
11432
+ return [values[i + 1] - values[i] for i in range(len(values) - 1)]
11433
+
11434
+ def mad(values):
11435
+ if not values:
11436
+ return None
11437
+ m = median(values)
11438
+ return median([abs(x - m) for x in values])
11439
+
11440
+ def entropy(values):
11441
+ if not values:
11442
+ return None
11443
+ freq = {}
11444
+ for v in values:
11445
+ freq[v] = freq.get(v, 0) + 1
11446
+ total = len(values)
11447
+ ent = 0.0
11448
+ for count in freq.values():
11449
+ p_val = count / total
11450
+ ent -= p_val * math.log2(p_val)
11451
+ return ent
11452
+
11453
+ def mode(values):
11454
+ if not values:
11455
+ return None
11456
+ freq = {}
11457
+ for v in values:
11458
+ freq[v] = freq.get(v, 0) + 1
11459
+ max_count = max(freq.values())
11460
+ modes = [k for k, count in freq.items() if count == max_count]
11461
+ return min(modes)
11462
+
11463
+
11464
+ #=================================================================
11465
+
11466
+ sp_score = solo_piano_escore_notes(escore_notes)
11467
+
11468
+ dscore = delta_score_notes(sp_score)
11469
+
11470
+ seq = []
11471
+
11472
+ for d in dscore:
11473
+ seq.extend([d[1], d[2], d[4]])
11474
+
11475
+ #=================================================================
11476
+
11477
+ n = len(seq)
11478
+ if n % 3 != 0:
11479
+ seq = seq[: n - (n % 3)]
11480
+ arr = [seq[i:i + 3] for i in range(0, len(seq), 3)]
11481
+
11482
+ #=================================================================
11483
+
11484
+ features = {}
11485
+
11486
+ delta_times = [row[0] for row in arr]
11487
+ if delta_times:
11488
+ features['delta_times_mean'] = mean(delta_times)
11489
+ features['delta_times_std'] = std(delta_times)
11490
+ features['delta_times_min'] = min(delta_times)
11491
+ features['delta_times_max'] = max(delta_times)
11492
+ features['delta_times_skew'] = skew(delta_times)
11493
+ features['delta_times_kurtosis'] = kurtosis(delta_times)
11494
+ delta_zero_count = sum(1 for x in delta_times if x == 0)
11495
+ features['delta_times_zero_ratio'] = delta_zero_count / len(delta_times)
11496
+ nonzero_dt = [x for x in delta_times if x != 0]
11497
+ if nonzero_dt:
11498
+ features['delta_times_nonzero_mean'] = mean(nonzero_dt)
11499
+ features['delta_times_nonzero_std'] = std(nonzero_dt)
11500
+ else:
11501
+ features['delta_times_nonzero_mean'] = None
11502
+ features['delta_times_nonzero_std'] = None
11503
+ features['delta_times_mad'] = mad(delta_times)
11504
+ features['delta_times_cv'] = (features['delta_times_std'] / features['delta_times_mean']
11505
+ if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
11506
+ features['delta_times_entropy'] = entropy(delta_times)
11507
+ features['delta_times_range'] = max(delta_times) - min(delta_times)
11508
+ features['delta_times_median'] = median(delta_times)
11509
+ features['delta_times_quantile_25'] = percentile(delta_times, 25)
11510
+ features['delta_times_quantile_75'] = percentile(delta_times, 75)
11511
+ if (features['delta_times_quantile_25'] is not None and features['delta_times_quantile_75'] is not None):
11512
+ features['delta_times_iqr'] = features['delta_times_quantile_75'] - features['delta_times_quantile_25']
11513
+ else:
11514
+ features['delta_times_iqr'] = None
11515
+ else:
11516
+ for key in ['delta_times_mean', 'delta_times_std', 'delta_times_min', 'delta_times_max',
11517
+ 'delta_times_skew', 'delta_times_kurtosis', 'delta_times_zero_ratio',
11518
+ 'delta_times_nonzero_mean', 'delta_times_nonzero_std', 'delta_times_mad',
11519
+ 'delta_times_cv', 'delta_times_entropy', 'delta_times_range', 'delta_times_median',
11520
+ 'delta_times_quantile_25', 'delta_times_quantile_75', 'delta_times_iqr']:
11521
+ features[key] = None
11522
+
11523
+ #=================================================================
11524
+
11525
+ durations = [row[1] for row in arr]
11526
+ if durations:
11527
+ features['durations_mean'] = mean(durations)
11528
+ features['durations_std'] = std(durations)
11529
+ features['durations_min'] = min(durations)
11530
+ features['durations_max'] = max(durations)
11531
+ features['durations_skew'] = skew(durations)
11532
+ features['durations_kurtosis'] = kurtosis(durations)
11533
+ features['durations_mad'] = mad(durations)
11534
+ features['durations_cv'] = (features['durations_std'] / features['durations_mean']
11535
+ if features['durations_mean'] and features['durations_mean'] != 0 else None)
11536
+ features['durations_entropy'] = entropy(durations)
11537
+ features['durations_range'] = max(durations) - min(durations)
11538
+ features['durations_median'] = median(durations)
11539
+ features['durations_quantile_25'] = percentile(durations, 25)
11540
+ features['durations_quantile_75'] = percentile(durations, 75)
11541
+ if features['durations_quantile_25'] is not None and features['durations_quantile_75'] is not None:
11542
+ features['durations_iqr'] = features['durations_quantile_75'] - features['durations_quantile_25']
11543
+ else:
11544
+ features['durations_iqr'] = None
11545
+ else:
11546
+ for key in ['durations_mean', 'durations_std', 'durations_min', 'durations_max',
11547
+ 'durations_skew', 'durations_kurtosis', 'durations_mad', 'durations_cv',
11548
+ 'durations_entropy', 'durations_range', 'durations_median', 'durations_quantile_25',
11549
+ 'durations_quantile_75', 'durations_iqr']:
11550
+ features[key] = None
11551
+
11552
+ #=================================================================
11553
+
11554
+ pitches = [row[2] for row in arr]
11555
+ if pitches:
11556
+ features['pitches_mean'] = mean(pitches)
11557
+ features['pitches_std'] = std(pitches)
11558
+ features['pitches_min'] = min(pitches)
11559
+ features['pitches_max'] = max(pitches)
11560
+ features['pitches_skew'] = skew(pitches)
11561
+ features['pitches_kurtosis'] = kurtosis(pitches)
11562
+ features['pitches_range'] = max(pitches) - min(pitches)
11563
+ features['pitches_median'] = median(pitches)
11564
+ features['pitches_quantile_25'] = percentile(pitches, 25)
11565
+ features['pitches_quantile_75'] = percentile(pitches, 75)
11566
+ if len(pitches) > 1:
11567
+ dps = diff(pitches)
11568
+ features['pitches_diff_mean'] = mean(dps)
11569
+ features['pitches_diff_std'] = std(dps)
11570
+ else:
11571
+ features['pitches_diff_mean'] = None
11572
+ features['pitches_diff_std'] = None
11573
+ features['pitches_mad'] = mad(pitches)
11574
+ if len(pitches) > 2:
11575
+ peaks = sum(1 for i in range(1, len(pitches)-1)
11576
+ if pitches[i] > pitches[i-1] and pitches[i] > pitches[i+1])
11577
+ valleys = sum(1 for i in range(1, len(pitches)-1)
11578
+ if pitches[i] < pitches[i-1] and pitches[i] < pitches[i+1])
11579
+ else:
11580
+ peaks, valleys = None, None
11581
+ features['pitches_peak_count'] = peaks
11582
+ features['pitches_valley_count'] = valleys
11583
+ if len(pitches) > 1:
11584
+ x = list(range(len(pitches)))
11585
+ denominator = (len(x) * sum(xi ** 2 for xi in x) - sum(x) ** 2)
11586
+ if denominator != 0:
11587
+ slope = (len(x) * sum(x[i] * pitches[i] for i in range(len(x))) -
11588
+ sum(x) * sum(pitches)) / denominator
11589
+ else:
11590
+ slope = None
11591
+ features['pitches_trend_slope'] = slope
11592
+ else:
11593
+ features['pitches_trend_slope'] = None
11594
+
11595
+ features['pitches_unique_count'] = len(set(pitches))
11596
+ pitch_class_hist = {i: 0 for i in range(12)}
11597
+ for p in pitches:
11598
+ pitch_class_hist[p % 12] += 1
11599
+ total_pitch = len(pitches)
11600
+ for i in range(12):
11601
+ features[f'pitches_pc_{i}'] = (pitch_class_hist[i] / total_pitch) if total_pitch > 0 else None
11602
+
11603
+ max_asc = 0
11604
+ cur_asc = 0
11605
+ max_desc = 0
11606
+ cur_desc = 0
11607
+ for i in range(1, len(pitches)):
11608
+ if pitches[i] > pitches[i-1]:
11609
+ cur_asc += 1
11610
+ max_asc = max(max_asc, cur_asc)
11611
+ cur_desc = 0
11612
+ elif pitches[i] < pitches[i-1]:
11613
+ cur_desc += 1
11614
+ max_desc = max(max_desc, cur_desc)
11615
+ cur_asc = 0
11616
+ else:
11617
+ cur_asc = 0
11618
+ cur_desc = 0
11619
+ features['pitches_max_consecutive_ascending'] = max_asc if pitches else None
11620
+ features['pitches_max_consecutive_descending'] = max_desc if pitches else None
11621
+ p_intervals = diff(pitches)
11622
+ features['pitches_median_diff'] = median(p_intervals) if p_intervals else None
11623
+ if p_intervals:
11624
+ dc = sum(1 for i in range(1, len(p_intervals))
11625
+ if (p_intervals[i] > 0 and p_intervals[i-1] < 0) or (p_intervals[i] < 0 and p_intervals[i-1] > 0))
11626
+ features['pitches_direction_changes'] = dc
11627
+ else:
11628
+ features['pitches_direction_changes'] = None
11629
+ else:
11630
+ for key in (['pitches_mean', 'pitches_std', 'pitches_min', 'pitches_max', 'pitches_skew',
11631
+ 'pitches_kurtosis', 'pitches_range', 'pitches_median', 'pitches_quantile_25',
11632
+ 'pitches_quantile_75', 'pitches_diff_mean', 'pitches_diff_std', 'pitches_mad',
11633
+ 'pitches_peak_count', 'pitches_valley_count', 'pitches_trend_slope',
11634
+ 'pitches_unique_count', 'pitches_max_consecutive_ascending', 'pitches_max_consecutive_descending',
11635
+ 'pitches_median_diff', 'pitches_direction_changes'] +
11636
+ [f'pitches_pc_{i}' for i in range(12)]):
11637
+ features[key] = None
11638
+
11639
+ #=================================================================
11640
+
11641
+ overall = [x for row in arr for x in row]
11642
+ if overall:
11643
+ features['overall_mean'] = mean(overall)
11644
+ features['overall_std'] = std(overall)
11645
+ features['overall_min'] = min(overall)
11646
+ features['overall_max'] = max(overall)
11647
+ features['overall_cv'] = (features['overall_std'] / features['overall_mean']
11648
+ if features['overall_mean'] and features['overall_mean'] != 0 else None)
11649
+ else:
11650
+ for key in ['overall_mean', 'overall_std', 'overall_min', 'overall_max', 'overall_cv']:
11651
+ features[key] = None
11652
+
11653
+ #=================================================================
11654
+
11655
+ onsets = []
11656
+ cumulative = 0
11657
+ for dt in delta_times:
11658
+ onsets.append(cumulative)
11659
+ cumulative += dt
11660
+ if onsets and durations:
11661
+ overall_piece_duration = onsets[-1] + durations[-1]
11662
+ else:
11663
+ overall_piece_duration = None
11664
+ features['overall_piece_duration'] = overall_piece_duration
11665
+ features['overall_notes_density'] = (len(arr) / overall_piece_duration
11666
+ if overall_piece_duration and overall_piece_duration > 0 else None)
11667
+ features['rhythm_ratio'] = (features['durations_mean'] / features['delta_times_mean']
11668
+ if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
11669
+ features['overall_sum_delta_times'] = (sum(delta_times) if delta_times else None)
11670
+ features['overall_sum_durations'] = (sum(durations) if durations else None)
11671
+ features['overall_voicing_ratio'] = (sum(durations) / overall_piece_duration
11672
+ if overall_piece_duration and durations else None)
11673
+ features['overall_onset_std'] = std(onsets) if onsets else None
11674
+
11675
+ #=================================================================
11676
+
11677
+ chords_raw = []
11678
+ chords_pc = []
11679
+ current_group = []
11680
+ for i, note in enumerate(arr):
11681
+ dt = note[0]
11682
+ if i == 0:
11683
+ current_group = [i]
11684
+ else:
11685
+ if dt == 0:
11686
+ current_group.append(i)
11687
+ else:
11688
+ if len(current_group) >= 2:
11689
+ chord_notes = [arr[j][2] for j in current_group]
11690
+ chords_raw.append(tuple(sorted(chord_notes)))
11691
+ chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
11692
+
11693
+ current_group = [i]
11694
+
11695
+ if current_group and len(current_group) >= 2:
11696
+ chord_notes = [arr[j][2] for j in current_group]
11697
+ chords_raw.append(tuple(sorted(chord_notes)))
11698
+ chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
11699
+
11700
+ if chords_raw:
11701
+ chord_count = len(chords_raw)
11702
+ features['chords_count'] = chord_count
11703
+ features['chords_density'] = (chord_count / overall_piece_duration
11704
+ if overall_piece_duration and chord_count is not None else None)
11705
+ chord_sizes = [len(ch) for ch in chords_raw]
11706
+ features['chords_size_mean'] = mean(chord_sizes)
11707
+ features['chords_size_std'] = std(chord_sizes)
11708
+ features['chords_size_min'] = min(chord_sizes) if chord_sizes else None
11709
+ features['chords_size_max'] = max(chord_sizes) if chord_sizes else None
11710
+ features['chords_unique_raw_count'] = len(set(chords_raw))
11711
+ features['chords_unique_pc_count'] = len(set(chords_pc))
11712
+ features['chords_entropy_raw'] = entropy(chords_raw)
11713
+ features['chords_entropy_pc'] = entropy(chords_pc)
11714
+ if len(chords_raw) > 1:
11715
+ rep_raw = sum(1 for i in range(1, len(chords_raw)) if chords_raw[i] == chords_raw[i - 1])
11716
+ features['chords_repeat_ratio_raw'] = rep_raw / (len(chords_raw) - 1)
11717
+ else:
11718
+ features['chords_repeat_ratio_raw'] = None
11719
+ if len(chords_pc) > 1:
11720
+ rep_pc = sum(1 for i in range(1, len(chords_pc)) if chords_pc[i] == chords_pc[i - 1])
11721
+ features['chords_repeat_ratio_pc'] = rep_pc / (len(chords_pc) - 1)
11722
+ else:
11723
+ features['chords_repeat_ratio_pc'] = None
11724
+ if len(chords_raw) > 1:
11725
+ bigrams_raw = [(chords_raw[i], chords_raw[i + 1]) for i in range(len(chords_raw) - 1)]
11726
+ features['chords_bigram_entropy_raw'] = entropy(bigrams_raw)
11727
+ else:
11728
+ features['chords_bigram_entropy_raw'] = None
11729
+ if len(chords_pc) > 1:
11730
+ bigrams_pc = [(chords_pc[i], chords_pc[i + 1]) for i in range(len(chords_pc) - 1)]
11731
+ features['chords_bigram_entropy_pc'] = entropy(bigrams_pc)
11732
+ else:
11733
+ features['chords_bigram_entropy_pc'] = None
11734
+ features['chords_mode_raw'] = mode(chords_raw)
11735
+ features['chords_mode_pc'] = mode(chords_pc)
11736
+ if chords_pc:
11737
+ pc_sizes = [len(ch) for ch in chords_pc]
11738
+ features['chords_pc_size_mean'] = mean(pc_sizes)
11739
+ else:
11740
+ features['chords_pc_size_mean'] = None
11741
+ else:
11742
+ for key in ['chords_count', 'chords_density', 'chords_size_mean', 'chords_size_std',
11743
+ 'chords_size_min', 'chords_size_max', 'chords_unique_raw_count', 'chords_unique_pc_count',
11744
+ 'chords_entropy_raw', 'chords_entropy_pc', 'chords_repeat_ratio_raw', 'chords_repeat_ratio_pc',
11745
+ 'chords_bigram_entropy_raw', 'chords_bigram_entropy_pc', 'chords_mode_raw', 'chords_mode_pc',
11746
+ 'chords_pc_size_mean']:
11747
+ features[key] = None
11748
+
11749
+ #=================================================================
11750
+
11751
+ if delta_times:
11752
+ med_dt = features['delta_times_median']
11753
+ iqr_dt = features['delta_times_iqr']
11754
+ threshold_a = med_dt + 1.5 * iqr_dt if med_dt is not None and iqr_dt is not None else None
11755
+ threshold_b = percentile(delta_times, 90)
11756
+ if threshold_a is not None and threshold_b is not None:
11757
+ phrase_threshold = max(threshold_a, threshold_b)
11758
+ elif threshold_a is not None:
11759
+ phrase_threshold = threshold_a
11760
+ elif threshold_b is not None:
11761
+ phrase_threshold = threshold_b
11762
+ else:
11763
+ phrase_threshold = None
11764
+ else:
11765
+ phrase_threshold = None
11766
+
11767
+ phrases = []
11768
+ current_phrase = []
11769
+ if onsets:
11770
+ current_phrase.append(0)
11771
+ for i in range(len(onsets) - 1):
11772
+ gap = onsets[i + 1] - onsets[i]
11773
+ if phrase_threshold is not None and gap > phrase_threshold:
11774
+ phrases.append(current_phrase)
11775
+ current_phrase = []
11776
+ current_phrase.append(i + 1)
11777
+ if current_phrase:
11778
+ phrases.append(current_phrase)
11779
+ if phrases:
11780
+ phrase_note_counts = []
11781
+ phrase_durations = []
11782
+ phrase_densities = []
11783
+ phrase_mean_pitches = []
11784
+ phrase_pitch_ranges = []
11785
+ phrase_start_times = []
11786
+ phrase_end_times = []
11787
+ for phrase in phrases:
11788
+ note_count = len(phrase)
11789
+ phrase_note_counts.append(note_count)
11790
+ ph_start = onsets[phrase[0]]
11791
+ ph_end = onsets[phrase[-1]] + durations[phrase[-1]]
11792
+ phrase_start_times.append(ph_start)
11793
+ phrase_end_times.append(ph_end)
11794
+ ph_duration = ph_end - ph_start
11795
+ phrase_durations.append(ph_duration)
11796
+ density = note_count / ph_duration if ph_duration > 0 else None
11797
+ phrase_densities.append(density)
11798
+ ph_pitches = [pitches[i] for i in phrase if i < len(pitches)]
11799
+ phrase_mean_pitches.append(mean(ph_pitches) if ph_pitches else None)
11800
+ phrase_pitch_ranges.append((max(ph_pitches) - min(ph_pitches)) if ph_pitches else None)
11801
+ if len(phrases) > 1:
11802
+ phrase_gaps = []
11803
+ for i in range(len(phrases) - 1):
11804
+ gap = phrase_start_times[i + 1] - phrase_end_times[i]
11805
+ phrase_gaps.append(gap if gap > 0 else 0)
11806
+ else:
11807
+ phrase_gaps = []
11808
+ features['phrases_count'] = len(phrases)
11809
+ features['phrases_avg_note_count'] = mean(phrase_note_counts) if phrase_note_counts else None
11810
+ features['phrases_std_note_count'] = std(phrase_note_counts) if phrase_note_counts else None
11811
+ features['phrases_min_note_count'] = min(phrase_note_counts) if phrase_note_counts else None
11812
+ features['phrases_max_note_count'] = max(phrase_note_counts) if phrase_note_counts else None
11813
+ features['phrases_avg_duration'] = mean(phrase_durations) if phrase_durations else None
11814
+ features['phrases_std_duration'] = std(phrase_durations) if phrase_durations else None
11815
+ features['phrases_min_duration'] = min(phrase_durations) if phrase_durations else None
11816
+ features['phrases_max_duration'] = max(phrase_durations) if phrase_durations else None
11817
+ features['phrases_avg_density'] = mean(phrase_densities) if phrase_densities else None
11818
+ features['phrases_std_density'] = std(phrase_densities) if phrase_densities else None
11819
+ features['phrases_avg_mean_pitch'] = mean(phrase_mean_pitches) if phrase_mean_pitches else None
11820
+ features['phrases_avg_pitch_range'] = mean(phrase_pitch_ranges) if phrase_pitch_ranges else None
11821
+ if phrase_gaps:
11822
+ features['phrases_avg_gap'] = mean(phrase_gaps)
11823
+ features['phrases_std_gap'] = std(phrase_gaps)
11824
+ features['phrases_min_gap'] = min(phrase_gaps)
11825
+ features['phrases_max_gap'] = max(phrase_gaps)
11826
+ else:
11827
+ features['phrases_avg_gap'] = None
11828
+ features['phrases_std_gap'] = None
11829
+ features['phrases_min_gap'] = None
11830
+ features['phrases_max_gap'] = None
11831
+ features['phrases_threshold'] = phrase_threshold
11832
+ else:
11833
+ for key in ['phrases_count', 'phrases_avg_note_count', 'phrases_std_note_count',
11834
+ 'phrases_min_note_count', 'phrases_max_note_count', 'phrases_avg_duration',
11835
+ 'phrases_std_duration', 'phrases_min_duration', 'phrases_max_duration',
11836
+ 'phrases_avg_density', 'phrases_std_density', 'phrases_avg_mean_pitch',
11837
+ 'phrases_avg_pitch_range', 'phrases_avg_gap', 'phrases_std_gap',
11838
+ 'phrases_min_gap', 'phrases_max_gap', 'phrases_threshold']:
11839
+ features[key] = None
11840
+
11841
+ #=================================================================
11842
+
11843
+ return features
11844
+
11845
+ ###################################################################################
11846
+
11847
+ def winsorized_normalize(data, new_range=(0, 255), clip=1.5):
11848
+
11849
+ #=================================================================
11850
+
11851
+ new_min, new_max = new_range
11852
+
11853
+ #=================================================================
11854
+
11855
+ def percentile(values, p):
11856
+
11857
+ srt = sorted(values)
11858
+ n = len(srt)
11859
+ if n == 1:
11860
+ return srt[0]
11861
+ k = (n - 1) * p / 100.0
11862
+ f = int(k)
11863
+ c = k - f
11864
+ if f + 1 < n:
11865
+ return srt[f] * (1 - c) + srt[f + 1] * c
11866
+
11867
+ return srt[f]
11868
+
11869
+ #=================================================================
11870
+
11871
+ q1 = percentile(data, 25)
11872
+ q3 = percentile(data, 75)
11873
+ iqr = q3 - q1
11874
+
11875
+ lower_bound_w = q1 - clip * iqr
11876
+ upper_bound_w = q3 + clip * iqr
11877
+
11878
+ data_min = min(data)
11879
+ data_max = max(data)
11880
+ effective_low = max(lower_bound_w, data_min)
11881
+ effective_high = min(upper_bound_w, data_max)
11882
+
11883
+ #=================================================================
11884
+
11885
+ if effective_high == effective_low:
11886
+
11887
+ if data_max == data_min:
11888
+ return [int(new_min)] * len(data)
11889
+
11890
+ normalized = [(x - data_min) / (data_max - data_min) for x in data]
11891
+
11892
+ return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
11893
+
11894
+ #=================================================================
11895
+
11896
+ clipped = [x if x >= effective_low else effective_low for x in data]
11897
+ clipped = [x if x <= effective_high else effective_high for x in clipped]
11898
+
11899
+ normalized = [(x - effective_low) / (effective_high - effective_low) for x in clipped]
11900
+
11901
+ #=================================================================
11902
+
11903
+ return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
11904
+
11905
+ ###################################################################################
11906
+
11907
+ def tokenize_features_to_ints_winsorized(features, new_range=(0, 255), clip=1.5, none_token=-1):
11908
+
11909
+ values = []
11910
+ tokens = []
11911
+
11912
+ #=================================================================
11913
+
11914
+ def process_value(val):
11915
+
11916
+ if isinstance(val, (int, float)):
11917
+ return int(round(abs(val)))
11918
+
11919
+ elif isinstance(val, (list, tuple)):
11920
+ return int(round(abs(sum(val) / len(val))))
11921
+
11922
+ else:
11923
+ return int(abs(hash(val)) % (10 ** 8))
11924
+
11925
+ #=================================================================
11926
+
11927
+ for key in sorted(features.keys()):
11928
+
11929
+ value = features[key]
11930
+
11931
+ if value is None:
11932
+ tokens.append(none_token)
11933
+ values.append(none_token)
11934
+
11935
+ else:
11936
+ tokens.append(process_value(value))
11937
+
11938
+ if isinstance(value, (list, tuple)):
11939
+ values.append(sum(value) / len(value))
11940
+
11941
+ else:
11942
+ values.append(value)
11943
+
11944
+ #=================================================================
11945
+
11946
+ norm_tokens = winsorized_normalize(tokens, new_range, clip)
11947
+
11948
+ #=================================================================
11949
+
11950
+ return values, tokens, norm_tokens
11951
+
11952
+ ###################################################################################
11953
+
11954
+ def write_jsonl(records_dicts_list,
11955
+ file_name='data',
11956
+ file_ext='.jsonl',
11957
+ file_mode='w',
11958
+ line_sep='\n',
11959
+ verbose=True
11960
+ ):
11961
+
11962
+ if verbose:
11963
+ print('=' * 70)
11964
+ print('Writing', len(records_dicts_list), 'records to jsonl file...')
11965
+ print('=' * 70)
11966
+
11967
+ if not os.path.splitext(file_name)[1]:
11968
+ file_name += file_ext
11969
+
11970
+ l_count = 0
11971
+
11972
+ with open(file_name, mode=file_mode) as f:
11973
+ for record in tqdm.tqdm(records_dicts_list, disable=not verbose):
11974
+ f.write(json.dumps(record) + line_sep)
11975
+ l_count += 1
11976
+
11977
+ f.close()
11978
+
11979
+ if verbose:
11980
+ print('=' * 70)
11981
+ print('Written total of', l_count, 'jsonl records.')
11982
+ print('=' * 70)
11983
+ print('Done!')
11984
+ print('=' * 70)
11985
+
11986
+ ###################################################################################
11987
+
11988
+ def read_jsonl(file_name='data',
11989
+ file_ext='.jsonl',
11990
+ verbose=True
11991
+ ):
11992
+
11993
+ if verbose:
11994
+ print('=' * 70)
11995
+ print('Reading jsonl file...')
11996
+ print('=' * 70)
11997
+
11998
+ if not os.path.splitext(file_name)[1]:
11999
+ file_name += file_ext
12000
+
12001
+ with open(file_name, 'r') as f:
12002
+
12003
+ records = []
12004
+ gl_count = 0
12005
+
12006
+ for i, line in tqdm.tqdm(enumerate(f), disable=not verbose):
12007
+
12008
+ try:
12009
+ record = json.loads(line)
12010
+ records.append(record)
12011
+ gl_count += 1
12012
+
12013
+ except KeyboardInterrupt:
12014
+ if verbose:
12015
+ print('=' * 70)
12016
+ print('Stoping...')
12017
+ print('=' * 70)
12018
+
12019
+ f.close()
12020
+
12021
+ return records
12022
+
12023
+ except json.JSONDecodeError:
12024
+ if verbose:
12025
+ print('=' * 70)
12026
+ print('[ERROR] Line', i, 'is corrupted! Skipping it...')
12027
+ print('=' * 70)
12028
+
12029
+ continue
12030
+
12031
+ f.close()
12032
+
12033
+ if verbose:
12034
+ print('=' * 70)
12035
+ print('Loaded total of', gl_count, 'jsonl records.')
12036
+ print('=' * 70)
12037
+ print('Done!')
12038
+ print('=' * 70)
12039
+
12040
+ return records
12041
+
12042
+ ###################################################################################
12043
+
12044
+ def read_jsonl_lines(lines_indexes_list,
12045
+ file_name='data',
12046
+ file_ext='.jsonl',
12047
+ verbose=True
12048
+ ):
12049
+
12050
+ if verbose:
12051
+ print('=' * 70)
12052
+ print('Reading jsonl file...')
12053
+ print('=' * 70)
12054
+
12055
+ if not os.path.splitext(file_name)[1]:
12056
+ file_name += file_ext
12057
+
12058
+ records = []
12059
+ l_count = 0
12060
+
12061
+ lines_indexes_list.sort(reverse=True)
12062
+
12063
+ with open(file_name, 'r') as f:
12064
+ for current_line_number, line in tqdm.tqdm(enumerate(f)):
12065
+
12066
+ try:
12067
+ if current_line_number in lines_indexes_list:
12068
+ record = json.loads(line)
12069
+ records.append(record)
12070
+ lines_indexes_list = lines_indexes_list[:-1]
12071
+ l_count += 1
12072
+
12073
+ if not lines_indexes_list:
12074
+ break
12075
+
12076
+ except KeyboardInterrupt:
12077
+ if verbose:
12078
+ print('=' * 70)
12079
+ print('Stoping...')
12080
+ print('=' * 70)
12081
+
12082
+ f.close()
12083
+
12084
+ return records
12085
+
12086
+ except json.JSONDecodeError:
12087
+ if verbose:
12088
+ print('=' * 70)
12089
+ print('[ERROR] Line', current_line_number, 'is corrupted! Skipping it...')
12090
+ print('=' * 70)
12091
+
12092
+ continue
12093
+
12094
+ f.close()
12095
+
12096
+ if verbose:
12097
+ print('=' * 70)
12098
+ print('Loaded total of', l_count, 'jsonl records.')
12099
+ print('=' * 70)
12100
+ print('Done!')
12101
+ print('=' * 70)
12102
+
12103
+ return records
12104
+
12105
+ ###################################################################################
12106
+
12107
+ def compute_base(x: int, n: int) -> int:
12108
+
12109
+ if x < 0:
12110
+ raise ValueError("x must be non-negative.")
12111
+ if x == 0:
12112
+ return 2
12113
+
12114
+ b = max(2, int(x ** (1 / n)))
12115
+
12116
+ if b ** n <= x:
12117
+ b += 1
12118
+
12119
+ return b
12120
+
12121
+ ###################################################################################
12122
+
12123
+ def encode_int_auto(x: int, n: int) -> tuple[int, list[int]]:
12124
+
12125
+ base = compute_base(x, n)
12126
+ digits = [0] * n
12127
+
12128
+ for i in range(n - 1, -1, -1):
12129
+ digits[i] = x % base
12130
+ x //= base
12131
+
12132
+ return base, digits
12133
+
12134
+ ###################################################################################
12135
+
12136
+ def decode_int_auto(base: int, digits: list[int]) -> int:
12137
+
12138
+ x = 0
12139
+ for digit in digits:
12140
+ if digit < 0 or digit >= base:
12141
+ raise ValueError(f"Each digit must be in the range 0 to {base - 1}. Invalid digit: {digit}")
12142
+
12143
+ x = x * base + digit
12144
+
12145
+ return x
12146
+
12147
+ ###################################################################################
12148
+
12149
+ def encode_int_manual(x, base, n):
12150
+
12151
+ digits = [0] * n
12152
+
12153
+ for i in range(n - 1, -1, -1):
12154
+ digits[i] = x % base
12155
+ x //= base
12156
+
12157
+ return digits
12158
+
12159
+ ###################################################################################
12160
+
12161
+ def escore_notes_pitches_chords_signature(escore_notes,
12162
+ max_patch=128,
12163
+ sort_by_counts=False,
12164
+ use_full_chords=False
12165
+ ):
12166
+
12167
+ escore_notes = [e for e in escore_notes if e[6] <= max_patch % 129]
12168
+
12169
+ if escore_notes:
12170
+
12171
+ cscore = chordify_score([1000, escore_notes])
12172
+
12173
+ sig = []
12174
+ dsig = []
12175
+
12176
+ drums_offset = 321 + 128
12177
+
12178
+ bad_chords_counter = 0
12179
+
12180
+ for c in cscore:
12181
+
12182
+ all_pitches = [e[4] if e[3] != 9 else e[4]+128 for e in c]
12183
+ chord = sorted(set(all_pitches))
12184
+
12185
+ pitches = sorted([p for p in chord if p < 128], reverse=True)
12186
+ drums = [(d+drums_offset)-128 for d in chord if d > 127]
12187
+
12188
+ if pitches:
12189
+ if len(pitches) > 1:
12190
+ tones_chord = sorted(set([p % 12 for p in pitches]))
12191
+
12192
+ try:
12193
+ sig_token = ALL_CHORDS_SORTED.index(tones_chord) + 128
12194
+ except:
12195
+ checked_tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=use_full_chords)
12196
+ sig_token = ALL_CHORDS_SORTED.index(checked_tones_chord) + 128
12197
+ bad_chords_counter += 1
12198
+
12199
+ elif len(pitches) == 1:
12200
+ sig_token = pitches[0]
12201
+
12202
+ sig.append(sig_token)
12203
+
12204
+ if drums:
12205
+ dsig.extend(drums)
12206
+
12207
+ sig_p = {}
12208
+
12209
+ for item in sig+dsig:
12210
+
12211
+ if item in sig_p:
12212
+ sig_p[item] += 1
12213
+
12214
+ else:
12215
+ sig_p[item] = 1
12216
+
12217
+ sig_p[-1] = bad_chords_counter
12218
+
12219
+ fsig = [list(v) for v in sig_p.items()]
12220
+
12221
+ if sort_by_counts:
12222
+ fsig.sort(key=lambda x: x[1], reverse=True)
12223
+
12224
+ return fsig
12225
+
12226
+ else:
12227
+ return []
12228
+
12229
  ###################################################################################
12230
  # This is the end of the TMIDI X Python module
12231
  ###################################################################################