PyTorch实现YOLOv3

2023-10-30

1, 网络结构

左边Darknet网络结构，右边YOLOv3网络结构，详细解析可参考链接

2, pytorch代码实现

darknet53.py

# -*- coding: utf-8 -*-
# @Time    : 2020/10/20 下午10:17
# @Author  : zxq
# @File    : YOLOv3_model.py
# @Software: PyCharm
from collections import OrderedDict

import torch
import torch.nn as nn


class Conv2dBatchLeaky(nn.Module):
    """ This convenience layer groups a 2D convolution, a batchnorm and a leaky ReLU.
    They are executed in a sequential manner.
    对应左图中Convolutional
    DarkNet最小子模块
    只有stride=1控制特征缩放

    Args:
        in_channels (int): Number of input channels
        out_channels (int): Number of output channels
        kernel_size (int or tuple): Size of the kernel of the convolution
        stride (int or tuple): Stride of the convolution
        negative_slope (number, optional): Controls the angle of the negative slope of the leaky ReLU; Default **0.1**
    """

    def __init__(self, in_channels, out_channels, kernel_size, stride, negative_slope=0.1):
        super(Conv2dBatchLeaky, self).__init__()

        # Parameters
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        # padding, 所以如果stride=1,则不会改变特征的高宽
        if isinstance(kernel_size, (list, tuple)):
            self.padding = [int(ii / 2) for ii in kernel_size]
        else:
            self.padding = int(kernel_size / 2)  # 向下取整
        self.leaky_slope = negative_slope

        # Layer，打包
        self.layers = nn.Sequential(
            nn.Conv2d(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.padding, bias=False),
            nn.BatchNorm2d(self.out_channels),  # , eps=1e-6, momentum=0.01),
            nn.LeakyReLU(self.leaky_slope, inplace=True)
        )

    def forward(self, x):
        x = self.layers(x)  # 因为打包好了，这里只需一句搞定
        return x


class ResBlock(nn.Module):
    def __init__(self, in_channels):
        """
        残差块
        每个BasizeBlock由两次conv+bn+leakyReLU组成
        特征图的通道数变化： in_channels -> in_channels//2 -> in_channels
        :param in_channels: 输入x特征图的通道数
        """
        super(ResBlock, self).__init__()
        # in_channels -> in_channels // 2，channel维度降维，减少参数目的，这也是为什么两次卷积后再残差的原因。
        self.conv1 = Conv2dBatchLeaky(in_channels, in_channels // 2, kernel_size=1, stride=1, negative_slope=0.1)
        # in_channels//2 -> in_channels
        self.conv2 = Conv2dBatchLeaky(in_channels // 2, in_channels, kernel_size=3, stride=1, negative_slope=0.1)

    def forward(self, x):
        input_feature = x  # in_channels = 64, 则out_channels=32
        x = self.conv1(x)  # -> in_channels//2
        x = self.conv2(x)  # -> channels=64
        x += input_feature  # 残差块：输入的特征加上两次卷积后的特征，作为下一个残差块的输入。
        return x


class DarkNet(nn.Module):
    def __init__(self, layers):
        """
        DarkNet由5个模块组成，每个模块又由多个残差块组成
        :param layers: list. len(layers)==5，每个数字代表各个模块的残差块个数，可以用来控制模型的大小。
        eg.
        Darknet53, layers==[1, 2, 8, 8, 4]
        """
        super(DarkNet, self).__init__()
        start_channel = 32  # 第一个卷积后的特征图通道数，这里固定  # c= 32
        self.conv = Conv2dBatchLeaky(in_channels=3, out_channels=start_channel, kernel_size=3, stride=1)  # 高宽不变

        # 定义5个模块，每个模块前面都有一个卷积用于高宽的下采样，同时通道数翻倍。每个模块不会改变特征维度，包括h,w,c。
        self.conv1 = Conv2dBatchLeaky(in_channels=start_channel, out_channels=start_channel * 2, kernel_size=3, stride=2)  # 32->64
        self.layer1 = self._build_layer(input_channels=start_channel * 2, num_res_block=layers[0])  # 64->64

        self.conv2 = Conv2dBatchLeaky(in_channels=start_channel * 2, out_channels=start_channel * 4, kernel_size=3, stride=2)  # ->128
        self.layer2 = self._build_layer(input_channels=start_channel * 4, num_res_block=layers[1])  # 128->128

        self.conv3 = Conv2dBatchLeaky(in_channels=start_channel * 4, out_channels=start_channel * 8, kernel_size=3, stride=2)  # ->256
        self.layer3 = self._build_layer(input_channels=start_channel * 8, num_res_block=layers[2])  # 256->256

        self.conv4 = Conv2dBatchLeaky(in_channels=start_channel * 8, out_channels=start_channel * 16, kernel_size=3, stride=2)  # ->512
        self.layer4 = self._build_layer(input_channels=start_channel * 16, num_res_block=layers[3])  # 512->512

        self.conv5 = Conv2dBatchLeaky(in_channels=start_channel * 16, out_channels=start_channel * 32, kernel_size=3, stride=2)  # ->1024
        self.layer5 = self._build_layer(input_channels=start_channel * 32, num_res_block=layers[4])  # 1024->1024

        self.output_channels = [start_channel * 2,  # 64  layer1
                                start_channel * 4,  # 128 layer2
                                start_channel * 8,  # 256
                                start_channel * 16,  # 512
                                start_channel * 32, ]  # 1024

    @staticmethod
    def _build_layer(input_channels, num_res_block=1):
        """
        建议DarkNet子模块
        每个子模块都是由多个残差块组成
        :param input_channels: 输入特征的通道数
        :param num_res_block: 子模块的残差块个数。
        :return:
        """
        layers = []
        for i in range(0, num_res_block):
            layers.append(("res_block_{}".format(i), ResBlock(in_channels=input_channels)))
        return nn.Sequential(OrderedDict(layers))

    def forward(self, x):
        x = self.conv(x)  # [b,3,416,416] -> [b,32,416,416]

        x = self.conv1(x)  # [b,32,416,416] -> [b,64,208,208]
        x = self.layer1(x)  # 维度不变

        x = self.conv2(x)  # [b,64,208,208] -> [b,128,104,104]
        x = self.layer2(x)

        x = self.conv3(x)  # [b,128,104,104] -> [b,256,52,52]
        out3 = self.layer3(x)

        out4 = self.conv4(out3)  # [b,256,52,52] -> [b,512,26,26]
        out4 = self.layer4(out4)

        out5 = self.conv5(out4)  # [b,512,26,26] -> [b,1024,13,13]
        out5 = self.layer5(out5)

        return out3, out4, out5  # [b,256,52,52], [b,512,26,26], [b,1024,13,13]


def darknet53(pretrained, **kwargs):
    model = DarkNet([1, 2, 8, 8, 4])
    if pretrained:  # 如果不用，则False，如果用，则必须是权重路径
        if isinstance(pretrained, str):
            model.load_state_dict(torch.load(pretrained))
        else:
            raise Exception("darknet request pretrained path. got [{}]".format(pretrained))
    return model

yolov3_module.py

# -*- coding: utf-8 -*-
# @Time    : 2020/10/22 下午10:10
# @Author  : zxq
# @File    : yolov3_module.py
# @Software: PyCharm
import torch
import torch.nn as nn
import torch.nn.functional as F
import yaml

from backbone.darknet53 import Conv2dBatchLeaky, darknet53


class Conv2dBlock5L(nn.Module):
    """
    对应网络结构图中的Conv2D Block 5L，具体功能是6个conv+bn+leakyReLU，
    为什么叫5L，我猜是输出通道有5次是在c1和c2两种之间变换
    只改变通道数
    """

    def __init__(self, in_channels, out_channels):
        """
        :param in_channels: 前面DarkNet输出的特征图通道数
        :param out_channels: list. [c1, c2]. 通道数就在c1和c2之间变化，最后输出c2通道数
        然后5个卷积的通道数就在in_channels和in_channels//2两者间变化
        """
        super(Conv2dBlock5L, self).__init__()
        conv = Conv2dBatchLeaky(in_channels=in_channels, out_channels=out_channels[0], kernel_size=1, stride=1)  # 降维，减少计算量

        conv1 = Conv2dBatchLeaky(in_channels=out_channels[0], out_channels=out_channels[1], kernel_size=3, stride=1)
        conv2 = Conv2dBatchLeaky(in_channels=out_channels[1], out_channels=out_channels[0], kernel_size=1, stride=1)
        conv3 = Conv2dBatchLeaky(in_channels=out_channels[0], out_channels=out_channels[1], kernel_size=3, stride=1)
        conv4 = Conv2dBatchLeaky(in_channels=out_channels[1], out_channels=out_channels[0], kernel_size=1, stride=1)
        conv5 = Conv2dBatchLeaky(in_channels=out_channels[0], out_channels=out_channels[1], kernel_size=3, stride=1)
        self.out_channels = out_channels[1]

        # 打包下，省得在forward重复写
        self.layers = nn.Sequential(
            conv,
            conv1,
            conv2,
            conv3,
            conv4,
            conv5
        )

    def forward(self, x):
        x = self.layers(x)
        return x


class Upsample(nn.Module):
    """ nn.Upsample is deprecated """

    def __init__(self, scale_factor, mode="nearest"):
        super(Upsample, self).__init__()
        self.scale_factor = scale_factor
        self.mode = mode

    def forward(self, x):
        x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
        return x


class YOLOv3(nn.Module):
    def __init__(self, config):
        super(YOLOv3, self).__init__()
        self.backbone = darknet53(pretrained=False)
        # num_anchors * (5+num_classes): 3 * (5+ 80) = 255
        anchors = config['yolo']['anchor']  # [10,13,  16,30,  33,23,  30,61,  62,45, ...]  # 9个
        self.anchors = [(anchors[i], anchors[i + 1]) for i in
                        range(0, len(anchors) - 1, 2)]  # [(10,13),  (16,30),  ...]
        num_anchors = len(self.anchors) // 3  # 平均分成3份
        num_classes = config['yolo']['classes']

        # 默认每个输出层的anchor个数都是len(config['yolo']['anchor'][0]),
        # 对于每个输出层的所有位置输出属性维度： coco: 3x85=255, 图中是3x(5+20)=75
        self.final_out_channels = num_anchors * (5 + num_classes)

        # 1, stride 32
        # output_channels[-1]是DarkNet最后一层输出, 这里layer5对应的尺度是DarkNet第5个模块的输出尺度
        self.block_layer5 = Conv2dBlock5L(in_channels=self.backbone.output_channels[-1], out_channels=[512, 1024])
        # yolo layer，这里使用1x1卷积，简单的把channels修改为self.final_out_channels
        self.conv1x1_out5 = nn.Conv2d(in_channels=self.block_layer5.out_channels, out_channels=self.final_out_channels,
                                      kernel_size=1, stride=1, padding=0, bias=True)

        # 2, stride 16
        # 对应结构图中的Conv2D + UpSampling2D, 其中conv用来修改通道数，upsample用来修改高宽尺度
        # channels: -> 256
        self.conv5 = Conv2dBatchLeaky(in_channels=self.block_layer5.out_channels, out_channels=256, kernel_size=1,
                                      stride=1)
        # upSample: 13x13 -> 26x26
        self.up_sample = Upsample(scale_factor=2, mode='nearest')
        # concat up_sample4 + backbone.out4
        in_channels = self.backbone.output_channels[-2] + 256  # 512+256=768
        # yolo layer 4
        self.block_layer4 = Conv2dBlock5L(in_channels=in_channels, out_channels=[256, 512])  # 768->512
        self.conv1x1_out4 = nn.Conv2d(in_channels=self.block_layer4.out_channels, out_channels=self.final_out_channels,
                                      kernel_size=1, stride=1, padding=0, bias=True)

        # 3, stride 8
        self.conv4 = Conv2dBatchLeaky(in_channels=self.block_layer4.out_channels, out_channels=128, kernel_size=1,
                                      stride=1)  # 512 -> 128
        # up_sample3: 26x26 -> 52x52
        # concat: up_sample3 + backbone.out3
        in_channels = self.backbone.output_channels[-3] + 128  # 256+128=384
        # yolo layer 3
        self.block_layer3 = Conv2dBlock5L(in_channels=in_channels, out_channels=[128, 256])  # channels: -> 256
        self.conv1x1_out3 = nn.Conv2d(in_channels=self.block_layer3.out_channels, out_channels=self.final_out_channels,
                                      kernel_size=1, stride=1, padding=0, bias=True)

    def forward(self, x):
        backbone_out3, backbone_out4, backbone_out5 = self.backbone(x)  # [b,256,52,52],[b,512,26,26],[b,1024,52,52]

        # stride 32
        block_out5 = self.block_layer5(backbone_out5)  # [b,1024,13,13]. chw都没变，1024,13,13
        yolo_out5 = self.conv1x1_out5(block_out5)  # [b,1024,13,13]->[b,255,13,13]省去了一步conv3x3，这里通过1x1的卷积输出固定channel的特征图

        # stride 16
        x = self.conv5(block_out5)  # [b,1024,13,13] -> [b,256,13,13]
        x = self.up_sample(x)  # [b,256,13,13] -> [b,256,26,26]
        x = torch.cat([backbone_out4, x], 1)  # backbone_out4: [b,512,26,26], x: [b,256,26,26] -> [b,768,26,26]
        block_out4 = self.block_layer4(x)  # [b,768,26,26] -> [b,512,26,26], 图中是变成[256]
        yolo_out4 = self.conv1x1_out4(block_out4)  # [b,512,26,26] -> [b,255,26,26]

        # stride 8
        x = self.conv4(block_out4)  # [b,512,26,26] -> [b,128,26,26]
        x = self.up_sample(x)  # [b,128,26,26] -> [b,128,52,52]
        x = torch.cat([backbone_out3, x], 1)  # backbone_out3: [b,256,52,52], x: [b,128,52,52] -> [b,384,52,52]
        block_out3 = self.block_layer3(x)  # [b,384,52,52] -> [b,256,52,52]
        yolo_out3 = self.conv1x1_out3(block_out3)  # [b,256,52,52] -> [b,255,52,52]

        return yolo_out3, yolo_out4, yolo_out5


if __name__ == '__main__':
    cfg_dict = yaml.load(open('./config/cfg.yaml'), Loader=yaml.SafeLoader)
    yolo_module = YOLOv3(config=cfg_dict)
    x = torch.Tensor(4, 3, 416, 416)
    output3, output4, output5 = yolo_module(x)
    print(output3.shape, output4.shape, output5.shape)

yolov3_loss.py

# -*- coding: utf-8 -*-
# @Time    : 2020/10/23 下午10:10
# @Author  : zxq
# @File    : yolov3_loss.py
# @Software: PyCharm

import math

import torch
import torch.nn as nn
import numpy as np

from utils.utils import bbox_iou


class YOLOLoss(nn.Module):
    def __init__(self, image_size, num_classes, anchors):
        super(YOLOLoss, self).__init__()
        self.image_size = image_size  # 原始图片大小: (x, y)
        self.num_classes = num_classes  # 检测目标类别数
        self.anchors = anchors  # [[x1, y1], [x2, y2], [x3, y3] 在原图上的尺度
        self.num_anchors = len(anchors)
        self.bbox_attrs = 5 + num_classes  # num_classes: 类别个数, bbox_attrs：属性个数。(x,y,w,h,conf,c0,c1,c2,...,c79)

        self.ignore_threshold = 0.5
        self.lambda_xy = 2.5
        self.lambda_wh = 2.5
        self.lambda_conf = 1.0
        self.lambda_cls = 1.0

        self.bce_loss = nn.BCELoss()
        self.mse_loss = nn.MSELoss()

    def forward(self, input, targets=None):
        """

        :param input: [b, c, h, w]
        :param targets: [b, num_gt, num_attr]. attr = [cls, x_ratio, y_ratio, w_ratio, h_ratio]. 存放的是比例, x_r = x/img_w
        :return:
        """
        batch_size = input.shape[0]
        in_h = input.shape[2]
        in_w = input.shape[3]
        stride_h = self.image_size[1] / in_h  # 高下采样的倍数
        stride_w = self.image_size[0] / in_w
        # 原图缩放了，anchor也要缩放对应的倍数，获取在特征图上的anchors
        scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]  # anchors缩放到对应的yolo输出层

        # [b,c,h,w] -> [b,num_anchors, bbox_attr,h,w] -> [b,num_anchors, h,w, bbox_attr]
        prediction = input.view(batch_size, self.num_anchors, self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4,
                                                                                                   2).contiguous()

        # Get outputs attr
        # [b,num_anchors,h,w,bbox_attr] -> [b, num_anchors,h,w]  中心坐标相对于cell左上角的偏移量 (0,1)之间
        x = torch.sigmoid(prediction[..., 0]).cuda()
        y = torch.sigmoid(prediction[..., 1]).cuda()  # -> [b, num_anchors,h,w]  Center y
        w = prediction[..., 2].cuda()  # -> [b, num_anchors,h,w]
        h = prediction[..., 3].cuda()  # -> [b, num_anchors,h,w]
        conf = torch.sigmoid(prediction[..., 4]).cuda()  # 目标概率
        pred_cls = prediction[..., 5:].cuda()  # [b, num_anchors, h,w, num_classes]类别概率

        # train
        if targets is not None:
            mask, noobj_mask, tx, ty, tw, th, tconf, tcls = \
                self.build_target(targets, scaled_anchors, in_w, in_h, self.ignore_threshold)

            mask, noobj_mask = mask.cuda(), noobj_mask.cuda()
            tx, ty, tw, th = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda()
            tconf, tcls = tconf.cuda(), tcls.cuda()

            # loss
            # 1 location loss
            # x.shape: [b, num_anchors,h,w]. mask.shape: [b, num_anchors,h,w]
            loss_x = self.bce_loss(x * mask, tx * mask)  # x*mask: 预测的偏移量, tx: 标注的偏移量。mask值为1的位置是最佳anchor的位置
            loss_y = self.bce_loss(y * mask, ty * mask)
            loss_w = self.mse_loss(w * mask, tw * mask)
            loss_h = self.mse_loss(h * mask, th * mask)
            # 2 object loss
            # mask值为1的位置是有目标的cell，noobj_mask值为1的位置是没有目标的cell。
            loss_conf = self.bce_loss(conf * mask, mask) + 0.5 * self.bce_loss(conf * noobj_mask, noobj_mask * 0.0)
            # 3 class loss
            # pred_cls.shape: [2,3,52,52,80], mask.shape: [2,3,52,52]
            loss_cls = self.bce_loss(pred_cls[mask == 1], tcls[mask == 1])  # pred_cls[mask == 1].shape: [num_obj, 80]

            #  total loss = losses * weight
            loss = (loss_x + loss_y) * self.lambda_xy + \
                   (loss_w + loss_h) * self.lambda_wh + \
                   loss_conf * self.lambda_conf + \
                   loss_cls * self.lambda_cls

            return loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item()

        # detect
        else:
            pass

    def build_target(self, target, anchors, in_w, in_h, ignore_threshold):
        """

        :param target: [b, num_gt, num_attr]. attr = [cls, x_ratio, y_ratio, w_ratio, h_ratio]. 标注的gt box信息
        :param anchors: list. [(w1, h1), (w2, h2), (w3, h3)]. 在特征图尺度上的anchor
        :param in_w: 预测的特征图宽
        :param in_h: 预测的特征图高
        :param ignore_threshold: 计算标注的gt_bbox和3个anchor_box之间的iou，找到比较合适的anchor用于训练；
        长方形的目标，最好不要用竖直的anchor训练。
        :return:
        mask: bool. mask[b, best_anchor_index, gj, gi] = 1. 值为1的地方，就是对应cell最佳的anchor
        noobj_mask: bool. noobj_mask[b, anchor_ious > ignore_threshold, gj, gi] = 0, 值为1的地方，没有目标
        tx: tx[b, best_anchor_index, gj, gi] = gx - gi  存放相对于cell(gj, gj)左上角的偏移量, 网络学习的是偏移量
        ty: ty[b, best_anchor_index, gj, gi] = gy - gj
        tw: tw[b, best_anchor_index, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)，网络学习的是log(gw/aw)
        th: th[b, best_anchor_index, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
        tconf: tconf[b, best_n, gj, gi] = 1
        tcls: tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1
        """

        batch_size = target.shape[0]

        mask = torch.zeros(batch_size, self.num_anchors, in_h, in_w,
                           requires_grad=False)  # [b,num_anchors,w,h]. [2,3,52,52]
        noobj_mask = torch.ones(batch_size, self.num_anchors, in_h, in_w, requires_grad=False)  # [b,num_anchors,w,h]
        tx = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False)  # [b,num_anchors,w,h]
        ty = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False)  # [b,num_anchors,w,h]
        tw = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False)  # [b,num_anchors,w,h]
        th = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False)  # [b,num_anchors,w,h]
        tconf = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False)  # [b,num_anchors,w,h]
        # [b,num_anchors,w,h, num_cls]
        tcls = torch.zeros(batch_size, self.num_anchors, in_h, in_w, self.num_classes,
                           requires_grad=False)  # [2,3,52,52,80]
        for b in range(batch_size):  # 遍历batch中的每个图像
            for t in range(target.shape[1]):  # 遍历图像中的所有目标
                if target[b, t].sum() == 0:  # 当前图像中没有目标，每张图片的目标个数可能不同，组成batch时进行了填0操作
                    continue

                # 标注存放的x_ratio,y_ratio,w_ratio,h_ratio值是相对于原始图像的比例值,
                # 获取在特征图尺度下的gt标注bbox信息
                gx = target[b, t, 1] * in_w  # float. 在特征层尺度的gt x坐标。tensor(0.3282) × 52 = 17.06
                gy = target[b, t, 2] * in_h  # tensor(0.7696) * 52 = 40.02
                gw = target[b, t, 3] * in_w  # 在特征层尺度上的高. tensor(0.4632) * 52 = 24.08
                gh = target[b, t, 4] * in_h  # 12.59
                # Get grid box indices
                # 17.06, 40.02 -> 17, 40
                gi = int(gx)  # 对特征图上的坐标gx向下取整
                gj = int(gy)  # (gi, gj)就是有目标的网格

                # Get shape of gt box
                # tensor([ 0.0000,  0.0000, gw, gh]) -> tensor([[ 0.0000,  0.0000, gw, gh]])
                gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)  # tensor([[ 0.0000,  0.0000, 24.0841, 12.5948]])
                # Get shape of anchor box
                # ->(3, 4).  每一行是类似于[0. , 0. , 2.2, 3.4]的anchor宽高信息。
                anchor_box = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)),
                                                               np.array(anchors)), 1))
                # Calculate iou between gt and anchor shapes
                """
                gt_box = tensor([[0.0000, 0.0000, gw, gh]])
                anchor_box = tensor([[0.0000, 0.0000, 2.2000, 3.4000],
                                    [0.0000, 0.0000, 4.2000, 5.1000],
                                    [0.0000, 0.0000, 2.3000, 6.5000]])
                """
                anchor_ious = bbox_iou(gt_box, anchor_box)  # gt_box.shape: (1,4). anchor_box.shape: (3,4)
                # Where the overlap is larger than threshold set mask to zero (ignore)
                noobj_mask[b, anchor_ious > ignore_threshold, gj, gi] = 0  # noobj_mask值为1就没有目标，ignore_threshold越大，值为1的越多
                # Find the best matching anchor box
                best_anchor_index = np.argmax(anchor_ious)

                # masks
                mask[b, best_anchor_index, gj, gi] = 1  # 最合适的anchor索引
                # Coordinates tx, ty
                tx[b, best_anchor_index, gj, gi] = gx - gi  # 存放相对于cell左上角的偏移量
                ty[b, best_anchor_index, gj, gi] = gy - gj
                # Width and height tw, th
                tw[b, best_anchor_index, gj, gi] = math.log(gw / anchors[best_anchor_index][0] + 1e-16)
                th[b, best_anchor_index, gj, gi] = math.log(gh / anchors[best_anchor_index][1] + 1e-16)
                # object
                tconf[b, best_anchor_index, gj, gi] = 1
                # One-hot encoding of label
                tcls[b, best_anchor_index, gj, gi, int(target[b, t, 0])] = 1

        return mask, noobj_mask, tx, ty, tw, th, tconf, tcls


if __name__ == '__main__':
    loss_module = YOLOLoss(image_size=(416, 416), num_classes=80, anchors=[[116, 90], [156, 198], [373, 326]])
    net_output = torch.rand(2, 255, 52, 52) * 10  # out5层的输出特征
    target1 = torch.FloatTensor([[16, 0.328250, 0.769577, 0.463156, 0.242207],
                                 [1, 0.128828, 0.375258, 0.249063, 0.733333],
                                 [0, 0.521430, 0.258251, 0.021172, 0.060869]])
    target2 = torch.FloatTensor([[59, 0.510930, 0.442073, 0.978141, 0.872188],
                                 [77, 0.858305, 0.073521, 0.074922, 0.059833],
                                 [0, 0.569492, 0.285235, 0.024547, 0.122254]])
    # [b, num_gt, num_attr]. [b, num_gt, cls, x_ratio, y_ratio, w_ratio, h_ratio]
    targets = torch.cat((target1.unsqueeze(0), target2.unsqueeze(0)), 0)  # [2, 2, 5]
    loss = loss_module(input=net_output, targets=targets)  # [b, num_gt, cls, x_r, y_r, w_r, h_r]

train.py

import torch
import yaml

from yolov3_module import YOLOv3

if __name__ == '__main__':
    cfg_dict = yaml.load(open('./config/cfg.yaml'), Loader=yaml.SafeLoader)
    yolo_module = YOLOv3(config=cfg_dict)
    x = torch.Tensor(4, 3, 416, 416)
    output3, output4, output5 = yolo_module(x)
    print(output3.shape, output4.shape, output5.shape)

    # YOLO loss with 3 scales
    yolo_loss = []

待续。。。

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)

Pytorch

YOLO

PyTorch实现YOLOv3 的相关文章

PyTorch：tensor.cuda（）和tensor.to（torch.device（“cuda：0”））之间有什么区别？

在 PyTorch 中以下两种将张量或模型发送到 GPU 的方法有什么区别 Setup X np array 1 3 2 3 2 3 5 6 1 2 3 4 X model X torch DoubleTensor X Method
Pytorch 分析器显示两个不同网络的卷积平均执行时间不同

我有两个网络我正在对它们进行分析以查看哪些操作占用了大部分时间我注意到CUDA time avg为了aten conv2d不同网络的操作有所不同这也增加了一个数量级在我的第一个网络中它是22us 而对于第二个网络则是3ms 我的第
PoseWarping：如何矢量化此 for 循环（z 缓冲区）

我正在尝试使用地面真实深度图姿势信息和相机矩阵将帧从视图 1 扭曲到视图 2 我已经能够删除大部分 for 循环并将其矢量化除了一个 for 循环扭曲时由于遮挡视图 1 中的多个像素可能会映射到视图 2 中的单个位置在这种情况下
二维数组的按行 numpy.isin [重复]

这个问题在这里已经有答案了我有两个数组 A np array 3 1 4 1 1 4 B np array 0 1 5 2 4 5 2 3 5 是否可以使用numpy isin二维数组按行排列我想检查一下是否A i j is in B
Pytorch：了解 nn.Module 类内部如何工作

一般来说一个nn Module可以由子类继承如下所示 def init weights m if type m nn Linear torch nn init xavier uniform m weight class LinearRe
预训练 Transformer 模型的配置更改

我正在尝试为重整变压器实现一个分类头分类头工作正常但是当我尝试更改配置参数之一 config axis pos shape 即模型的序列长度参数时它会抛出错误 Reformer embeddings position embeddin
查找张量中沿轴的非零元素的数量

我想找到沿特定轴的张量中非零元素的数量有没有 PyTorch 函数可以做到这一点我尝试使用非零 http pytorch org docs master torch html highlight nonzero torch nonzer
运行时错误：CUDA 错误：设备端断言已触发 - 训练 LayoutLMV3 时

我正在训练最新版本的layoutLMv3模型但在开始训练时trainer train 出现以下错误请帮我解决它我使用的是 v100 4 GPU RuntimeError Traceback most recent call last
使用 KL 散度时，变分自动编码器为每个输入 mnist 图像提供相同的输出图像

当不使用 KL 散度项时 VAE 几乎完美地重建 mnist 图像但在提供随机噪声时无法正确生成新图像当使用 KL 散度项时 VAE 在重建和生成图像时都会给出相同的奇怪输出这是损失函数的 pytorch 代码 def loss fu
torch.mm、torch.matmul 和 torch.mul 有什么区别？

阅读完 pytorch 文档后我仍然需要帮助来理解之间的区别torch mm torch matmul and torch mul 由于我不完全理解它们所以我无法简明地解释这一点 B torch tensor 1 1207 0 3137
在非单一维度 1 处，张量 a (2) 的大小必须与张量 b (39) 的大小匹配

这是我第一次从事文本分类工作我正在使用 CamemBert 进行二进制文本分类使用 fast bert 库该库主要受到 fastai 的启发当我运行下面的代码时 from fast bert data cls import Bert
PyTorch 中复数矩阵的行列式

有没有办法在 PyTorch 中计算复矩阵的行列式 torch det未针对 ComplexFloat 实现不幸的是目前尚未实施一种方法是实现您自己的版本或简单地使用np linalg det 这是一个简短的函数它计算我使用 LU
pytorch 中的 keras.layers.Masking 相当于什么？

我有时间序列序列我需要通过将零填充到矩阵中并在 keras 中使用 keras layers Masking 来将序列的长度固定为一个数字我可以忽略这些填充的零以进行进一步的计算我想知道它怎么可能在 Pytorch 中完成要么我需要
使 CUDA 内存不足

我正在尝试训练网络但我明白了我将批量大小设置为 300 并收到此错误但即使我将其减少到 100 我仍然收到此错误更令人沮丧的是在 1200 个图像上运行 10 epoch 大约需要 40 分钟有什么建议吗错了我怎样才能加快这
PyTorch LSTM：运行时错误：无效参数 0：张量的大小必须匹配，维度 0 除外。维度 1 为 1219 和 440

我有一个基本的 PyTorch LSTM import torch nn as nn import torch nn functional as F class BaselineLSTM nn Module def init self su
Blenderbot 微调

我一直在尝试微调 HuggingFace 的对话模型 Blendebot 我已经尝试过官方拥抱脸网站上给出的传统方法该方法要求我们使用 trainer train 方法来完成此操作我使用 compile 方法尝试了它我尝试过使用 Py
如何有效地对一个数组中某个值在另一个数组中的位置出现的次数求和

我正在寻找一种有效的 for 循环避免解决方案来解决我遇到的数组相关问题我想使用一个巨大的一维数组 A gt size 250 000 用于一维索引的 0 到 40 之间的值以及用于第二维索引的具有 0 到 9995 之间的值的相同大
如何使用Python计算多类分割任务的dice系数？

我想知道如何计算多类分割的骰子系数这是计算二元分割任务的骰子系数的脚本如何循环每个类并计算每个类的骰子先感谢您 import numpy def dice coeff im1 im2 empty score 1 0 im1 numpy
预期设备类型为 cuda 的对象，但在 Pytorch 中获得了设备类型 cpu

我有以下计算损失函数的代码 class MSE loss nn Module metric L1 L2 norms or cosine similarity mode training or evaluation mode def init
了解 YOLO 是如何训练的

我试图了解 YOLO v2 是如何训练的为此我使用这个 keras 实现https github com experiencor keras yolo2 https github com experiencor keras yolo2在

随机推荐

单片机基础——使用USART发送和接收数据（中断模式）

本篇文章主要介绍如何使用STM32CubeMX初始化STM32L431RCT6的USART 并使用中断模式发送和接收数据 1 准备工作硬件准备首先需要准备一个开发板这里我准备的是STM32L4的开发板 BearPi 2 生成MDK工程
面试时，问哪些问题能试出一个 Android 应用开发者真正的水平？

https www zhihu com question 19765032
双系统进入linux桌面屏幕偏移问题的解决

机器安装xp和红旗桌面双系统每次进入linux桌面时屏幕总是偏右在网络找到解决方法希望给有这种情况的朋友有所帮助一些linux用户常见的是nvidia显卡用户在配置完X服务器后已经可以进入xwindow桌面只是屏幕是歪的怎么
pytorch分布式训练之 TypeError： function takes exactly 5 arguments（1 given）

在使用pytorch进行分布式训练时老是遇到 TypeError function takes exactly 5 arguments 1 given 报错然后训练终止 GPU一直处于占满状态如果不看输出日志还以为一直在训练其实训练已
《JVM学习笔记》字节码基础

前言借用深入理解Java虚拟机中的一句话代码编译的结果从本地机器码转变为字节码是存储格式发展的一小步确实编程语言发展的一大步 JVM提出的字节码数据格式的规范使得它不在仅仅服务于Java 而面向全语言只要该语言能编译成符合JV
2019年来看过的 Java 系书籍，靠着他们一路升职加薪

学习Java 书籍是必不可少的学习工具之一尤其是对于自学者而言废话不多说下边就给大家推荐一些Java进阶的好书就拿 IT 行业来说技术的发展日新月异技术的进步大步流星稍微不注意就掉队了而如何才能让自己跟上技术前进的步伐呢读
Stability AI 把绘画门槛打为 0！

本文由 GPT 4 所创作配图由 Stable Doodle 生成编者按 Stability AI 上新了其收购的 Clipdrop 发布了全新的 Stable Doodle 工具我在使用后最为直观的感受就是把绘画门槛打下来了在
waf报服务器信息泄露,烽火三十六技丨RayWAF支招敏感信息泄露的“查”与“防”...

随着互联网的兴起人们的衣食住行都离不开网络信息与数据的价值及其安全的重要性越来越凸显人们对于在网络中留存私密信息从原来的无所畏惧变得如履薄冰一个不小心就可能让这些敏感信息处于裸奔状态如被不法分子获取利用造成的后果将不堪设
C#连接SqlServer并执行sql

原帖 http shmily2038 javaeye com blog 727065 public class CommonFileUtil string coonString server database MySchool 你的数据库名
Linux samba服务器配置教程

此教程适用于Centos 和 Ubuntu 其它Linux系统一般大概率配置命令相同一关闭防火墙 1 Centos 查看防火墙状态 systemctl status firewalld service 关闭防火墙 systemctl s
texstudio在命令行用texhash更新宏包显示：‘texhash‘ 不是内部或外部命令，也不是可运行的程序

在texlive根目录下 texlive bin win32 中找到 texhash exe 双击执行重启texstudio 发现更新的宏包可以用了
【千律】C++基础：CString类的常用方法和控制台输出

1 CString类的常用方法 2 在控制台输出CString include
Android面试题-终极解决ViewPager.setCurrentItem中间很多页面切换方案

本文配套视频 ViewPager setCurrentItem的bug演示一 ViewPager setCurrentItem解决方案二今天做项目用ViewPager setCurrentItem 方法如果两个页面相聚比较远就会闪瞎我
windows server 2012 R2 解决漏洞(CVE-2018-0886)--避了很多坑

系统 windows server 2012 R2 standard 扫描出漏洞 Microsoft Windows CredSSP 远程执行代码漏洞 CVE 2018 0886 原理扫描按照微软官方给的答案 https portal m
hnucm-oj1177 - 2021年春季学期《算法分析与设计》练习9

hnucm oj1177 2021年春季学期算法分析与设计练习9 A 最大子段和升级版题目描述使用动态规划算法求整数数组可能包含负整数的最大子段和以及和最大子段的起始位置和结束位置例如输入数组 6 1 5 4 7 输出14
xxl-job定时任务调度

xxl job定时任务调度单节点任务调度 Timer和TimerTask ScheduledExecutorService 三方框架 Quartz 分布式任务调度 elisticjob xxl jb xxl job elistic job
mysql-发生系统错误1067

发生了什么自己在写安装jdk的bat脚本时出了点问题为了找出问题参考了mysql安装脚本时编写的过程中不小心执行了mysql安装脚本等反应过来的时候就提示这个错误网上百度了很多 1067的解决办法找了几个靠谱的解决办法
超高真空度精密控制解决方案设计中百度“文心一言”的具体应用

摘要本文采用国产版本ChatGPT百度文心一言作为一种辅助工具针对超高真空度精密控制装置的开发进行了初期的技术路线设计对话调研的重点是了解可调节式微流量进气阀门和可用于连接非线性输出信号型真空计的PID控制器总体而言目前的人工
选择排序-配执行动画

排序过程算法1 public static void selectSort int array for int i 0 i lt array length 1 i for int k i k lt array length k if ar
PyTorch实现YOLOv3

1 网络结构左边Darknet网络结构右边YOLOv3网络结构详细解析可参考链接 2 pytorch代码实现 darknet53 py coding utf 8 Time 2020 10 20 下午10 17 Author zxq F

PyTorch实现YOLOv3

PyTorch实现YOLOv3 的相关文章

随机推荐

热门标签