|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
from scipy.optimize import curve_fit |
|
|
|
def parse_file(file_path): |
|
data = [] |
|
with open(file_path, 'r') as file: |
|
for line in file: |
|
parts = line.strip().split() |
|
step = int(parts[0].split(':')[1].split('/')[0]) |
|
is_train = 'val' not in parts[1] |
|
if is_train: |
|
loss_key = 'train_loss' |
|
else: |
|
loss_key = 'val_loss' |
|
loss = float(parts[1].split(':')[1]) |
|
step_avg = float(parts[3].split(':')[1].replace('ms', '')) |
|
data.append({ |
|
'step': step, |
|
'loss': loss, |
|
'step_avg': step_avg, |
|
'is_train': is_train |
|
}) |
|
return data |
|
|
|
|
|
file_path = 'baseline_log.txt' |
|
data = parse_file(file_path) |
|
|
|
|
|
|
|
|
|
steps = np.array([d['step'] for d in filter(lambda item: item['is_train'],data)]) |
|
losses = np.array([d['loss'] for d in filter(lambda item: item['is_train'],data)]) |
|
|
|
|
|
log_steps = np.log10(steps) |
|
log_losses = np.log10(losses) |
|
|
|
|
|
def linear_func(x, a, b): |
|
return a * x + b |
|
|
|
|
|
popt, pcov = curve_fit(linear_func, log_steps, log_losses) |
|
|
|
|
|
plt.loglog(steps, losses, label='Data') |
|
|
|
|
|
x_fit = np.logspace(np.log10(np.min(steps)), np.log10(np.max(steps)), 100) |
|
y_fit = 10 ** (popt[0] * np.log10(x_fit) + popt[1]) |
|
plt.loglog(x_fit, y_fit, label='Fitted line', color='red') |
|
|
|
|
|
plt.title('Loss as a function of step') |
|
plt.xlabel('Step') |
|
plt.ylabel('Loss') |
|
plt.legend() |
|
|
|
|
|
print('Fitted parameters: a = {:.2f}, b = {:.2f}'.format(popt[0], popt[1])) |
|
|
|
|
|
plt.savefig('loss_plot2.png') |
|
|
|
|
|
plt.show() |
|
|