Spaces:
Sleeping
Sleeping
add cuc
Browse files- README.md +27 -10
- matching_series.py +113 -27
- plot_cuc.py +24 -0
README.md
CHANGED
|
@@ -28,30 +28,47 @@ At minium, the metric requires the original time-series and the generated time-s
|
|
| 28 |
>>> metric = evaluate.load("bowdbeg/matching_series")
|
| 29 |
>>> results = metric.compute(references=references, predictions=predictions, batch_size=1000)
|
| 30 |
>>> print(results)
|
| 31 |
-
{'
|
| 32 |
```
|
| 33 |
|
| 34 |
### Inputs
|
| 35 |
- **predictions**: (list of list of list of float or numpy.ndarray): The generated time-series. The shape of the array should be `(num_generation, seq_len, num_features)`.
|
| 36 |
- **references**: (list of list of list of float or numpy.ndarray): The original time-series. The shape of the array should be `(num_reference, seq_len, num_features)`.
|
| 37 |
- **batch_size**: (int, optional): The batch size for computing the metric. This affects quadratically. Default is None.
|
|
|
|
|
|
|
| 38 |
|
| 39 |
### Output Values
|
| 40 |
|
| 41 |
Let prediction instances be $P = \{p_1, p_2, \ldots, p_n\}$ and reference instances be $R = \{r_1, r_2, \ldots, r_m\}$.
|
| 42 |
|
| 43 |
-
- **
|
| 44 |
-
- **
|
| 45 |
-
- **
|
| 46 |
- **index_mse**: (float): Average of the MSE between the generated instance and the reference instance with the same index. In the equation, $\frac{1}{n} \sum_{i=1}^{n} \mathrm{MSE}(p_i, r_i)$.
|
| 47 |
-
- **
|
| 48 |
-
- **
|
| 49 |
-
- **
|
| 50 |
- **index_mse_features**: (list of float): index_mse computed individually for each feature.
|
| 51 |
-
- **
|
| 52 |
-
- **
|
| 53 |
-
- **
|
| 54 |
- **macro_index_mse**: (float): Average of the index_mse_features.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
#### Values from Popular Papers
|
| 57 |
<!-- *Give examples, preferrably with links to leaderboards or publications, to papers that have reported this metric, along with the values they have reported.* -->
|
|
|
|
| 28 |
>>> metric = evaluate.load("bowdbeg/matching_series")
|
| 29 |
>>> results = metric.compute(references=references, predictions=predictions, batch_size=1000)
|
| 30 |
>>> print(results)
|
| 31 |
+
{'precision_mse': 0.15642462680824154, 'f1_mse': 0.15423970232736145, 'recall_mse': 0.15211497466247828, 'index_mse': 0.1650527529752939, 'precision_mse_features': [0.14161461272391063, 0.13959801451122986, 0.13494790079336152, 0.13812467072775822, 0.13502155933085397, 0.13773603530687478, 0.13782869677371534, 0.13880373566781345, 0.1347356979110729, 0.1380613227954152], 'f1_mse_features': [0.13200523240237663, 0.1321561699583367, 0.12686344486378406, 0.12979789457435542, 0.12768556637792927, 0.1316950291866994, 0.12937893459231917, 0.13052145628415104, 0.12571029554640592, 0.12686388502130683], 'recall_mse_features': [0.12361708937664843, 0.1254676048318782, 0.11969288602958734, 0.12241798787954035, 0.12110565263179066, 0.12616166677071738, 0.12190537193383513, 0.1231719120998892, 0.1178181328089802, 0.11734651764610313], 'index_mse_features': [0.16728853331521837, 0.1673468681819004, 0.16940025907048203, 0.16828093040638223, 0.17486439883284577, 0.15779474562305962, 0.16255301663470148, 0.16224400164732194, 0.1531092505944622, 0.167645525446565], 'macro_precision_mse': 0.1376472246542006, 'macro_recall_mse': 0.121870482200897, 'macro_f1_mse': 0.12926779088076645, 'macro_index_mse': 0.1650527529752939, 'matching_precision': 0.09, 'matching_recall': 1.0, 'matching_f1': 0.1651376146788991, 'matching_precision_features': [0.1, 0.1, 0.1, 0.1, 0.09, 0.09, 0.1, 0.1, 0.1, 0.1], 'matching_recall_features': [1.0, 1.0, 1.0, 0.7, 0.9, 1.0, 0.9, 1.0, 0.9, 0.8], 'matching_f1_features': [0.18181818181818182, 0.18181818181818182, 0.18181818181818182, 0.175, 0.16363636363636364, 0.1651376146788991, 0.18, 0.18181818181818182, 0.18, 0.17777777777777778], 'macro_matching_precision': 0.098, 'macro_matching_recall': 0.92, 'macro_matching_f1': 0.1768824483365768, 'cuc': 0.1364, 'coverages': [0.10000000000000002, 0.16666666666666666, 0.3, 0.5333333333333333, 0.9], 'macro_cuc': 0.13874, 'macro_coverages': [0.10000000000000002, 0.18000000000000002, 0.31, 0.48, 0.98], 'cuc_features': [0.1428, 0.13580000000000003, 0.15250000000000002, 0.14579999999999999, 0.12990000000000002, 0.1364, 0.1459, 0.12330000000000002, 0.13580000000000003, 0.13920000000000002], 'coverages_features': [[0.10000000000000002, 0.16666666666666666, 0.3666666666666667, 0.5, 1.0], [0.10000000000000002, 0.16666666666666666, 0.26666666666666666, 0.43333333333333335, 1.0], [0.10000000000000002, 0.20000000000000004, 0.3666666666666667, 0.6, 1.0], [0.10000000000000002, 0.16666666666666666, 0.3333333333333333, 0.5333333333333333, 1.0], [0.10000000000000002, 0.20000000000000004, 0.26666666666666666, 0.4666666666666666, 0.9], [0.10000000000000002, 0.16666666666666666, 0.30000000000000004, 0.5333333333333333, 0.9], [0.10000000000000002, 0.20000000000000004, 0.3333333333333333, 0.5333333333333333, 1.0], [0.10000000000000002, 0.20000000000000004, 0.3, 0.3, 1.0], [0.10000000000000002, 0.16666666666666666, 0.26666666666666666, 0.4333333333333333, 1.0], [0.10000000000000002, 0.16666666666666666, 0.30000000000000004, 0.4666666666666666, 1.0]]}
|
| 32 |
```
|
| 33 |
|
| 34 |
### Inputs
|
| 35 |
- **predictions**: (list of list of list of float or numpy.ndarray): The generated time-series. The shape of the array should be `(num_generation, seq_len, num_features)`.
|
| 36 |
- **references**: (list of list of list of float or numpy.ndarray): The original time-series. The shape of the array should be `(num_reference, seq_len, num_features)`.
|
| 37 |
- **batch_size**: (int, optional): The batch size for computing the metric. This affects quadratically. Default is None.
|
| 38 |
+
- **cuc_n_calculation**: (int, optional): The number of samples to compute the coverage because sampling exists. Default is 3.
|
| 39 |
+
- **cuc_n_samples**: (list of int, optional): The number of samples to compute the coverage. Default is $[2^i \text{for} i \leq \log_2 n] + [n]$.
|
| 40 |
|
| 41 |
### Output Values
|
| 42 |
|
| 43 |
Let prediction instances be $P = \{p_1, p_2, \ldots, p_n\}$ and reference instances be $R = \{r_1, r_2, \ldots, r_m\}$.
|
| 44 |
|
| 45 |
+
- **precision_mse**: (float): Average of the MSE between the generated instance and the reference instance with the lowest MSE. Intuitively, this is similar to precision in classification. In the equation, $\frac{1}{n} \sum_{i=1}^{n} \min_{j} \mathrm{MSE}(p_i, r_j)$.
|
| 46 |
+
- **recall_mse**: (float): Average of the MSE between the reference instance and the with the lowest MSE. Intuitively, this is similar to recall in classification. In the equation, $\frac{1}{m} \sum_{j=1}^{m} \min_{i} \mathrm{MSE}(p_i, r_j)$.
|
| 47 |
+
- **f1_mse**: (float): Harmonic mean of the precision_mse and recall_mse. This is similar to F1-score in classification.
|
| 48 |
- **index_mse**: (float): Average of the MSE between the generated instance and the reference instance with the same index. In the equation, $\frac{1}{n} \sum_{i=1}^{n} \mathrm{MSE}(p_i, r_i)$.
|
| 49 |
+
- **precision_mse_features**: (list of float): precision_mse computed individually for each feature.
|
| 50 |
+
- **recall_mse_features**: (list of float): recall_mse computed individually for each feature.
|
| 51 |
+
- **f1_mse_features**: (list of float): f1_mse computed individually for each feature.
|
| 52 |
- **index_mse_features**: (list of float): index_mse computed individually for each feature.
|
| 53 |
+
- **macro_precision_mse**: (float): Average of the precision_mse_features.
|
| 54 |
+
- **macro_recall_mse**: (float): Average of the recall_mse_features.
|
| 55 |
+
- **macro_f1_mse**: (float): Average of the f1_mse_features.
|
| 56 |
- **macro_index_mse**: (float): Average of the index_mse_features.
|
| 57 |
+
- **matching_precision**: (float): Precision of the matching instances. In the equation, $\frac{ | \{i | \min_{i} \mathrm{MSE}(p_i, r_j)\} | }{m}$.
|
| 58 |
+
- **matching_recall**: (float): Recall of the matching instances. In the equation, $\frac{ | \{j | \min_{j} \mathrm{MSE}(p_i, r_j)\} | }{n}$.
|
| 59 |
+
- **matching_f1**: (float): F1-score of the matching instances.
|
| 60 |
+
- **matching_precision_features**: (list of float): matching_precision computed individually for each feature.
|
| 61 |
+
- **matching_recall_features**: (list of float): matching_recall computed individually for each feature.
|
| 62 |
+
- **matching_f1_features**: (list of float): matching_f1 computed individually for each feature.
|
| 63 |
+
- **macro_matching_precision**: (float): Average of the matching_precision_features.
|
| 64 |
+
- **macro_matching_recall**: (float): Average of the matching_recall_features.
|
| 65 |
+
- **macro_matching_f1**: (float): Average of the matching_f1_features.
|
| 66 |
+
- **coverages**: (list of float): Coverage of the matching instances computed on the sampled generated data in cuc_n_samples. In the equation, $[\frac{ | \{ j | \min_{j} \mathrm{MSE}(p_i, r_j) \text{where}~p_i \in \mathrm{sample}(P, \mathrm{n\_sample}) \} | }{m} \text{for}~\mathrm{n\_sample} \in \mathrm{cuc\_n\_samples} ]$.
|
| 67 |
+
- **cuc**: (float): Coverage of the matching instances. In the equation, $\frac{ | \{i | \min_{i} \mathrm{MSE}(p_i, r_j) < \mathrm{threshold}\} | }{n}$.
|
| 68 |
+
- **coverages_features**: (list of list of float): coverages computed individually for each feature.
|
| 69 |
+
- **cuc_features**: (list of float): cuc computed individually for each feature.
|
| 70 |
+
- **macro_coverages**: (list of float): Average of the coverages_features.
|
| 71 |
+
- **macro_cuc**: (float): Average of the cuc_features.
|
| 72 |
|
| 73 |
#### Values from Popular Papers
|
| 74 |
<!-- *Give examples, preferrably with links to leaderboards or publications, to papers that have reported this metric, along with the values they have reported.* -->
|
matching_series.py
CHANGED
|
@@ -13,8 +13,9 @@
|
|
| 13 |
# limitations under the License.
|
| 14 |
"""TODO: Add a description here."""
|
| 15 |
|
|
|
|
| 16 |
import statistics
|
| 17 |
-
from typing import Optional, Union
|
| 18 |
|
| 19 |
import datasets
|
| 20 |
import evaluate
|
|
@@ -127,9 +128,11 @@ class matching_series(evaluate.Metric):
|
|
| 127 |
|
| 128 |
def _compute(
|
| 129 |
self,
|
| 130 |
-
predictions: Union[
|
| 131 |
-
references: Union[
|
| 132 |
batch_size: Optional[int] = None,
|
|
|
|
|
|
|
| 133 |
):
|
| 134 |
"""
|
| 135 |
Compute the scores of the module given the predictions and references
|
|
@@ -139,6 +142,8 @@ class matching_series(evaluate.Metric):
|
|
| 139 |
references: list of reference
|
| 140 |
shape: (num_reference, num_timesteps, num_features)
|
| 141 |
batch_size: batch size to use for the computation. If None, the whole dataset is processed at once.
|
|
|
|
|
|
|
| 142 |
Returns:
|
| 143 |
"""
|
| 144 |
predictions = np.array(predictions)
|
|
@@ -175,47 +180,128 @@ class matching_series(evaluate.Metric):
|
|
| 175 |
|
| 176 |
# matching mse
|
| 177 |
# shape: (num_generation,)
|
| 178 |
-
|
| 179 |
|
| 180 |
# best match for each reference time series
|
| 181 |
# shape: (num_reference,)
|
| 182 |
best_match_inv = np.argmin(mse_mean, axis=0)
|
| 183 |
-
|
| 184 |
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
# take matching for each feature and compute metrics for them
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
| 191 |
index_mse_features = []
|
|
|
|
|
|
|
| 192 |
for f in range(predictions.shape[-1]):
|
| 193 |
mse_f = mse[:, :, f]
|
| 194 |
index_mse_f = mse_f.diagonal(axis1=0, axis2=1).mean()
|
| 195 |
best_match_f = np.argmin(mse_f, axis=-1)
|
| 196 |
-
|
| 197 |
best_match_inv_f = np.argmin(mse_f, axis=0)
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
index_mse_features.append(index_mse_f)
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
macro_index_mse = statistics.mean(index_mse_features)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
return {
|
| 209 |
-
"
|
| 210 |
-
"
|
| 211 |
-
"
|
| 212 |
"index_mse": index_mse,
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
"index_mse_features": index_mse_features,
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
"macro_index_mse": macro_index_mse,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
# limitations under the License.
|
| 14 |
"""TODO: Add a description here."""
|
| 15 |
|
| 16 |
+
import math
|
| 17 |
import statistics
|
| 18 |
+
from typing import List, Optional, Union
|
| 19 |
|
| 20 |
import datasets
|
| 21 |
import evaluate
|
|
|
|
| 128 |
|
| 129 |
def _compute(
|
| 130 |
self,
|
| 131 |
+
predictions: Union[List, np.ndarray],
|
| 132 |
+
references: Union[List, np.ndarray],
|
| 133 |
batch_size: Optional[int] = None,
|
| 134 |
+
cuc_n_calculation: int = 3,
|
| 135 |
+
cuc_n_samples: Union[List[int], str] = "auto",
|
| 136 |
):
|
| 137 |
"""
|
| 138 |
Compute the scores of the module given the predictions and references
|
|
|
|
| 142 |
references: list of reference
|
| 143 |
shape: (num_reference, num_timesteps, num_features)
|
| 144 |
batch_size: batch size to use for the computation. If None, the whole dataset is processed at once.
|
| 145 |
+
cuc_n_calculation: number of Coverage Under Curve calculate times
|
| 146 |
+
cuc_n_samples: number of samples to use for Coverage Under Curve calculation. If "auto", it uses the number of samples of the predictions.
|
| 147 |
Returns:
|
| 148 |
"""
|
| 149 |
predictions = np.array(predictions)
|
|
|
|
| 180 |
|
| 181 |
# matching mse
|
| 182 |
# shape: (num_generation,)
|
| 183 |
+
precision_mse = mse_mean[np.arange(len(best_match)), best_match].mean()
|
| 184 |
|
| 185 |
# best match for each reference time series
|
| 186 |
# shape: (num_reference,)
|
| 187 |
best_match_inv = np.argmin(mse_mean, axis=0)
|
| 188 |
+
recall_mse = mse_mean[best_match_inv, np.arange(len(best_match_inv))].mean()
|
| 189 |
|
| 190 |
+
f1_mse = 2 / (1 / precision_mse + 1 / recall_mse)
|
| 191 |
+
|
| 192 |
+
# matching precision, recall and f1
|
| 193 |
+
matching_precision = np.unique(best_match).size / len(best_match_inv)
|
| 194 |
+
matching_recall = np.unique(best_match_inv).size / len(best_match)
|
| 195 |
+
matching_f1 = 2 / (1 / matching_precision + 1 / matching_recall)
|
| 196 |
|
| 197 |
# take matching for each feature and compute metrics for them
|
| 198 |
+
precision_mse_features = []
|
| 199 |
+
recall_mse_features = []
|
| 200 |
+
f1_mse_features = []
|
| 201 |
+
matching_precision_features = []
|
| 202 |
+
matching_recall_features = []
|
| 203 |
+
matching_f1_features = []
|
| 204 |
index_mse_features = []
|
| 205 |
+
coverages_features = []
|
| 206 |
+
cuc_features = []
|
| 207 |
for f in range(predictions.shape[-1]):
|
| 208 |
mse_f = mse[:, :, f]
|
| 209 |
index_mse_f = mse_f.diagonal(axis1=0, axis2=1).mean()
|
| 210 |
best_match_f = np.argmin(mse_f, axis=-1)
|
| 211 |
+
precision_mse_f = mse_f[np.arange(len(best_match_f)), best_match_f].mean()
|
| 212 |
best_match_inv_f = np.argmin(mse_f, axis=0)
|
| 213 |
+
recall_mse_f = mse_f[best_match_inv_f, np.arange(len(best_match_inv_f))].mean()
|
| 214 |
+
f1_mse_f = 2 / (1 / precision_mse_f + 1 / recall_mse_f)
|
| 215 |
+
precision_mse_features.append(precision_mse_f)
|
| 216 |
+
recall_mse_features.append(recall_mse_f)
|
| 217 |
+
f1_mse_features.append(f1_mse_f)
|
| 218 |
index_mse_features.append(index_mse_f)
|
| 219 |
+
|
| 220 |
+
matching_precision_f = np.unique(best_match_f).size / len(best_match_f)
|
| 221 |
+
matching_recall_f = np.unique(best_match_inv_f).size / len(best_match_inv_f)
|
| 222 |
+
matching_f1_f = 2 / (1 / matching_precision_f + 1 / matching_recall_f)
|
| 223 |
+
matching_precision_features.append(matching_precision_f)
|
| 224 |
+
matching_recall_features.append(matching_recall_f)
|
| 225 |
+
matching_f1_features.append(matching_f1_f)
|
| 226 |
+
|
| 227 |
+
coverages_f, cuc_f = self.compute_cuc(best_match_f, len(references), cuc_n_calculation, cuc_n_samples)
|
| 228 |
+
coverages_features.append(coverages_f)
|
| 229 |
+
cuc_features.append(cuc_f)
|
| 230 |
+
|
| 231 |
+
macro_precision_mse = statistics.mean(precision_mse_features)
|
| 232 |
+
macro_recall_mse = statistics.mean(recall_mse_features)
|
| 233 |
+
macro_f1_mse = statistics.mean(f1_mse_features)
|
| 234 |
macro_index_mse = statistics.mean(index_mse_features)
|
| 235 |
+
|
| 236 |
+
macro_matching_precision = statistics.mean(matching_precision_features)
|
| 237 |
+
macro_matching_recall = statistics.mean(matching_recall_features)
|
| 238 |
+
macro_matching_f1 = statistics.mean(matching_f1_features)
|
| 239 |
+
|
| 240 |
+
# cuc
|
| 241 |
+
coverages, cuc = self.compute_cuc(best_match, len(references), cuc_n_calculation, cuc_n_samples)
|
| 242 |
+
|
| 243 |
+
macro_cuc = statistics.mean(cuc_features)
|
| 244 |
+
macro_coverages = [statistics.mean(c) for c in zip(*coverages_features)]
|
| 245 |
+
|
| 246 |
return {
|
| 247 |
+
"precision_mse": precision_mse,
|
| 248 |
+
"f1_mse": f1_mse,
|
| 249 |
+
"recall_mse": recall_mse,
|
| 250 |
"index_mse": index_mse,
|
| 251 |
+
"precision_mse_features": precision_mse_features,
|
| 252 |
+
"f1_mse_features": f1_mse_features,
|
| 253 |
+
"recall_mse_features": recall_mse_features,
|
| 254 |
"index_mse_features": index_mse_features,
|
| 255 |
+
"macro_precision_mse": macro_precision_mse,
|
| 256 |
+
"macro_recall_mse": macro_recall_mse,
|
| 257 |
+
"macro_f1_mse": macro_f1_mse,
|
| 258 |
"macro_index_mse": macro_index_mse,
|
| 259 |
+
"matching_precision": matching_precision,
|
| 260 |
+
"matching_recall": matching_recall,
|
| 261 |
+
"matching_f1": matching_f1,
|
| 262 |
+
"matching_precision_features": matching_precision_features,
|
| 263 |
+
"matching_recall_features": matching_recall_features,
|
| 264 |
+
"matching_f1_features": matching_f1_features,
|
| 265 |
+
"macro_matching_precision": macro_matching_precision,
|
| 266 |
+
"macro_matching_recall": macro_matching_recall,
|
| 267 |
+
"macro_matching_f1": macro_matching_f1,
|
| 268 |
+
"cuc": cuc,
|
| 269 |
+
"coverages": coverages,
|
| 270 |
+
"macro_cuc": macro_cuc,
|
| 271 |
+
"macro_coverages": macro_coverages,
|
| 272 |
+
"cuc_features": cuc_features,
|
| 273 |
+
"coverages_features": coverages_features,
|
| 274 |
}
|
| 275 |
+
|
| 276 |
+
def compute_cuc(
|
| 277 |
+
self,
|
| 278 |
+
match: np.ndarray,
|
| 279 |
+
n_reference: int,
|
| 280 |
+
n_calculation: int,
|
| 281 |
+
n_samples: Union[List[int], str],
|
| 282 |
+
):
|
| 283 |
+
"""
|
| 284 |
+
Compute Coverage Under Curve
|
| 285 |
+
Args:
|
| 286 |
+
match: best match for each generated time series
|
| 287 |
+
n_reference: number of reference time series
|
| 288 |
+
n_calculation: number of Coverage Under Curve calculate times
|
| 289 |
+
n_samples: number of samples to use for Coverage Under Curve calculation. If "auto", it uses the number of samples of the predictions.
|
| 290 |
+
Returns:
|
| 291 |
+
"""
|
| 292 |
+
n_generaiton = len(match)
|
| 293 |
+
if n_samples == "auto":
|
| 294 |
+
exp = int(math.log2(n_generaiton))
|
| 295 |
+
n_samples = [int(2**i) for i in range(exp)]
|
| 296 |
+
n_samples.append(n_generaiton)
|
| 297 |
+
assert isinstance(n_samples, list) and all(isinstance(n, int) for n in n_samples)
|
| 298 |
+
|
| 299 |
+
coverages = []
|
| 300 |
+
for n_sample in n_samples:
|
| 301 |
+
coverage = 0
|
| 302 |
+
for _ in range(n_calculation):
|
| 303 |
+
sample = np.random.choice(match, size=n_sample, replace=False) # type: ignore
|
| 304 |
+
coverage += len(np.unique(sample)) / n_reference
|
| 305 |
+
coverages.append(coverage / n_calculation)
|
| 306 |
+
cuc = np.trapz(coverages, n_samples) / len(n_samples) / max(n_samples)
|
| 307 |
+
return coverages, cuc
|
plot_cuc.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from argparse import ArgumentParser
|
| 3 |
+
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
|
| 6 |
+
parser = ArgumentParser()
|
| 7 |
+
parser.add_argument("input", type=str, help="Input file of json data, output of matching_series")
|
| 8 |
+
parser.add_argument("output", type=str, help="Output file of the plot")
|
| 9 |
+
args = parser.parse_args()
|
| 10 |
+
|
| 11 |
+
with open(args.input, "r") as f:
|
| 12 |
+
data = json.load(f)
|
| 13 |
+
|
| 14 |
+
coverages = data["coverages"]
|
| 15 |
+
x = [2**i for i in range(len(coverages))]
|
| 16 |
+
y = coverages
|
| 17 |
+
|
| 18 |
+
fig, ax = plt.subplots()
|
| 19 |
+
|
| 20 |
+
ax.plot(x, y, "o-")
|
| 21 |
+
ax.set_xscale("log", base=2)
|
| 22 |
+
ax.set_xlabel("Number of generations")
|
| 23 |
+
ax.set_ylabel("Coverage")
|
| 24 |
+
plt.savefig(args.output)
|