def create_model(num_classes, load_pretrain_weights=True): backbone = resnet50_fpn_backbone(pretrain_path="/disk527/Commondisk/a804_qkf/vscodeproject/code/faster_rcnn/backbone/resnet50.pth", norm_layer=torch.nn.BatchNorm2d, trainable_layers=3) # 训练自己数据集时不要修改这里的91,修改的是传入的num_classes参数 model = FasterRCNN(backbone=backbone, num_classes=91) if load_pretrain_weights: # 载入预训练模型权重 # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth weights_dict = torch.load("/disk527/Commondisk/a804_qkf/vscodeproject/code/faster_rcnn/backbone/fasterrcnn_resnet50_fpn_coco.pth", map_location='cpu') missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model
for epoch in tqdm(range(args.start_epoch, args.epochs), desc=”总进度”): 训练一个epoch: 使用函数utils.train_one_epoch(); 记录指标: .appeend() 主要针对train_loss、learning_rate、val_map进行记录; 参数更新: lr_scheduler.step(); 在验证集上评估:utils.evaluate(); 写入结果: with open; 保存模型权重: 权重文件要用到torch.save函数,在调用权重文件时,对应的用torch.load函数,这两个函数相关联; 循环结束 循环结束后,对训练过程的map loss 进行绘制
[lang:Python] plot_loss
1 2 3 4 5 6 7 8 9
# plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map)
if epoch == 0 and warmup is True: warmup_factor = 1.0 / 1000 # 初始学习率缩放因子(从0.001倍开始) warmup_iters = min(1000, len(data_loader) - 1) # 预热步数(最多1000步) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
3.循环训练
[lang:Python] for
1
for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
数据转移到指定设备,使用.to(device)
前向传播:model(images,targets)
损失求和::losses_reduced = sum(loss for loss in loss_dict_reduced.values())
更新滑动平均损失
检查损失是否有效
反向传播与参数更新:清空梯度optimizer.zero_grad()、反向传播losses.backward() 、参数更新optimizer.step()、学习率更新lr_scheduler.step()
记录指标