from tqdm import tqdm import os, sys os.chdir(DEFAULT_SAVEDIR) sys.path.append(“../src/”) sys.path.append(“../utils/”)
import torch from tensorboardX import SummaryWriter # from utils import get_accuracy import numpy as np from utils import batchify, normalize_text from preprocessing import Pipeline import jsonlines from models import RNNEncoderDecoder, AttentionRNNEncoderDecoder
if name == “main”: # Only run the following code when this script is executed directly
with open("../../data/config.json") as f: config = json.load(f) pipeline = Pipeline( task="summarization", max_length=512, # Maximum length of input text and summary texts batch_size=4096, # Batch size for processing data in parallel lowercase=True, truncate=True, ) pipeline.load(config["data"]["path"]) train_set = pipeline.get_train() eval_set = pipeline.get_test() # Create a TensorBoard summary writer to log training metrics and visualize progress in the TensorBoard interface tb_writer = SummaryWriter(config["save_path"]) # Path where tensorboard logs will be saved
for epoch in range(10): # Loop over multiple training epochs print(f"Epoch {epoch + 1} of {len(train_set)}") total_loss = [] accuracies = [] pbar = tqdm(enumerate(batchify(train_set, batch_size=4096)), total=len(train_set)) # Create a progress bar for iterations in the training set for i, (source, target) in pbar: if i % 50 == 0 and i > 0: print("Saving checkpoint...") torch.save({"model": model.state_dict(), "optimizer": optimizer.state_dict()}, f"./models/{epoch}_checkpoint.pth") # Forward pass through the RNN encoder-decoder or AttentionRNNEncoderDecoder network, compute loss and update gradients output = model(source, target) loss_value = -output[0] # Compute negative log likelihood loss value from the model's forward pass result total_loss.append(loss_value.item()) accuracies.append(get_accuracy(target, output)) avg_loss = sum(total_loss) / len(train_set) # Compute average loss for the epoch tb_writer.add_scalar("Loss/Train", avg_loss, global_step=epoch + 1) accuracies = np.array(accuracies).mean() # Average accuracy over all samples in this training epoch print(f"Epoch {epoch + 1} average loss: {avg_loss}") tb_writer.add_scalar("Accuracy/Train", accuracies, global_step=epoch + 1) # Evaluate the model's performance on validation set data after each training epoch print(f"Epoch {epoch + 1} evaluation...") avg_loss = [] accuracies = [] for i, (source, target) in enumerate(batchify(eval_set)): output = model(source, None, teacher_forcing=False) # Teacher forcing is turned off during evaluation to mimic real inference conditions loss_value = -output[0] # Compute negative log likelihood loss value from the model's forward pass result avg_loss.append(loss_value.item()) accuracies.append(get_accuracy(target, output)) avg_loss = sum(avg_loss) / len(eval_set) # Compute average loss for the epoch on validation set data print(f"Epoch {epoch + 1} eval. average loss: {avg_loss}") tb_writer.add_scalar("Loss/Validation", avg_loss, global_step=epoch + 1) accuracies = np.array(accuracies).mean() # Average accuracy over all samples in this validation epoch print(f"Epoch {epoch + 1} eval. average accuracy: {accuracies}") tb_writer.add_scalar("Accuracy/Validation", accuracies, global_step=epoch + 1) # Save the trained model's state dictionary to a file for future use or deployment torch.save(model.state_dict(), f"./models/{config['run_name']}.pth")