import matplotlib.pyplot as plt import numpy as np from scipy.optimize import curve_fit def parse_file(file_path): data = [] with open(file_path, 'r') as file: for line in file: parts = line.strip().split() step = int(parts[0].split(':')[1].split('/')[0]) is_train = 'val' not in parts[1] if is_train: loss_key = 'train_loss' else: loss_key = 'val_loss' loss = float(parts[1].split(':')[1]) step_avg = float(parts[3].split(':')[1].replace('ms', '')) data.append({ 'step': step, 'loss': loss, 'step_avg': step_avg, 'is_train': is_train }) return data # Usage file_path = 'baseline_log.txt' data = parse_file(file_path) # Extract the steps and losses into separate lists steps = np.array([d['step'] for d in filter(lambda item: item['is_train'],data)]) losses = np.array([d['loss'] for d in filter(lambda item: item['is_train'],data)]) # Take the logarithm of the data log_steps = np.log10(steps) log_losses = np.log10(losses) # Define a linear function def linear_func(x, a, b): return a * x + b # Fit the linear function to the logarithmic data popt, pcov = curve_fit(linear_func, log_steps, log_losses) # Create the plot plt.loglog(steps, losses, label='Data') # Plot the fitted line x_fit = np.logspace(np.log10(np.min(steps)), np.log10(np.max(steps)), 100) y_fit = 10 ** (popt[0] * np.log10(x_fit) + popt[1]) plt.loglog(x_fit, y_fit, label='Fitted line', color='red') # Add title and labels plt.title('Loss as a function of step') plt.xlabel('Step') plt.ylabel('Loss') plt.legend() # Print the fitted parameters print('Fitted parameters: a = {:.2f}, b = {:.2f}'.format(popt[0], popt[1])) # Save the plot to a file plt.savefig('loss_plot2.png') # Show the plot plt.show()