神经网络量化----TensorRT深刻解读

from torch import nn
import torch

# the module that replace BN layer
class DummyModule(nn.Module):
    def __init__(self):
        super(DummyModule, self).__init__()

    def forward(self, x):
        return x

# BN flod
def bn_folding(conv, bn):
    
    # ******************** BN parameter *********************
    mean = bn.running_mean
    std = torch.sqrt(bn.running_var + bn.eps)
    gamma = bn.weight
    beta = bn.bias
    # ******************* conv parameter********************
    w = conv.weight
    w_fold = w.clone()
    if conv.bias is not None:
        b = conv.bias
    else:
        b = mean.new_zeros(mean.shape)
    b_fold = b.clone()
    
    w_fold = w * (gamma / std).reshape([conv.out_channels, 1, 1, 1])
    b_fold = beta + (b - mean) * (gamma / std) 
    
    bnfold_conv = nn.Conv2d(conv.in_channels,
                         conv.out_channels,
                         conv.kernel_size,
                         conv.stride,
                         conv.padding,
                         groups=conv.groups,
                         bias=True)
    bnfold_conv.weight.data = w_fold
    bnfold_conv.bias.data = b_fold
    return bnfold_conv

'''BN must be after convolution'''
def model_bn_folding(model):
    children = list(model.named_children())
    # children = list(model.named_modules())
    #print(children)
    name_temp = None
    child_temp = None
    for name, child in children:
        #print(name, '   ', child)
        if isinstance(child, nn.BatchNorm2d):
            bnfold_conv = bn_folding(child_temp, child) # BN融合
            model._modules[name_temp] = bnfold_conv
            model._modules[name] = DummyModule()
            child_temp = None
        elif isinstance(child, nn.Conv2d):
            name_temp = name
            child_temp = child
        else:
            
            model_bn_folding(child)
    return model

2.conv和ReLU的融合

新建一个module将卷积和ReLU包含在内了。

import torch
from torch import nn

import torch.nn.functional as F
from quant_utils import ConvRelu, LinearRelu, DummyModule

# device = torch.device("cpu")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

'''BN must be after convolution'''
def model_relu_folding(model):
    children = list(model.named_children())
    
    # children = list(model.named_modules())
    #print(children)
    name_temp = None
    child_temp = None
    is_conv = True
    for name, child in children:
        print(name, '   ', child)
        if isinstance(child, nn.ReLU):
            if is_conv:
                model._modules[name_temp] = ConvRelu(child_temp, is_relu=1).to(device)
               
            else: 
                model._modules[name_temp] = LinearRelu(child_temp, is_relu=1).to(device)
            
            model._modules[name] = DummyModule().to(device)
            
            # child_temp = None
            # name_temp = None
        elif isinstance(child, nn.Conv2d):
            name_temp = name
            child_temp = child               
            
            model._modules[name] = ConvRelu(child, is_relu=0).to(device)            
            is_conv = True
            
        elif isinstance(child, nn.Linear):
            name_temp = name
            child_temp = child            
            model._modules[name] = LinearRelu(child, is_relu=0).to(device)
            is_conv = False
            
        else:
            model_relu_folding(child)
    return model

quant_utils.py

在ConvRelu中，使用register_buffer申请了权重和激活值量化相关的变量，采用model.train()的形式创建了一些mode，用来进行不同阶段的TensorRT算法。

weight_quant（）：统计权重的绝对值的最大值，量化的scale

initial_activate_max（）：统计激活值的最值，这个需要在校准集上跑一遍才能统计出的。

initial_histograms（）：统计激活值的直方图，这个也需要跑一遍校准集，需要注意的一点，如果有ReLU的话，0值对应的直方图数量很多，会减小其他值的权重，由于采用对称映射，0几乎无误差，所以将0值对应的直方图设置为0.

get_optimal_threshold（）：计算KL散度，获取最佳的阈值。

from torch import nn
import torch
import torch.nn.functional as F
import copy
from collections import OrderedDict
import numpy as np


INTERVAL_NUM = 4001
QUANTIZE_NUM = 127    # 7bit
STATISTIC = 1.0


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# the module that replace relu layer
class DummyModule(nn.Module):
    def __init__(self):
        super(DummyModule, self).__init__()

    def forward(self, x):
        return x
    
    
# the module that replace conv layer
class ConvRelu(nn.Module):
    def __init__(self, conv, is_relu=0, bits=8, threshold=204800):
        super(ConvRelu, self).__init__()
        
        #self.conv_relu_fold = conv
        self.threshold = threshold
        self.bits = bits
        self.is_relu = is_relu
        self.kernel_size = conv.kernel_size
        self.stride = conv.stride
        self.padding = conv.padding
        self.groups = conv.groups
        self.bias = conv.bias
        self.weight = conv.weight
        '''mode : Normal, TRT_weight_quant, TRT_activate_collection_max, TRT_activate_collection_hist, TRT_activate_KL, Normal_TRT'''
        self.mode = 'TRT_weight_quant'         
        
        
        #self.register_buffer('is_relu', torch.tensor(is_relu))
        self.register_buffer('quant_num', torch.tensor((1 << bits) - 1))
        
        '''activation_para'''
        self.register_buffer('activate_flag', torch.zeros(1))
        self.register_buffer('activate_distubution', torch.zeros(INTERVAL_NUM))
        self.register_buffer('activate_distubution_edges', torch.zeros(INTERVAL_NUM+1))
        self.register_buffer('activate_max', torch.zeros(1))
        self.register_buffer('th', torch.zeros(1))
        self.register_buffer('optimal_th', torch.zeros(1))
        # self.register_buffer('activate_distubution_interval', torch.zeros(1))
        '''weight_para'''
        self.register_buffer('weight_flag', torch.zeros(1))
        self.register_buffer('weight_scale', torch.zeros(conv.weight.data.shape[0]))
        self.register_buffer('weight_zero', torch.zeros(conv.weight.data.shape[0]))
        self.register_buffer('weight_max', torch.zeros(conv.weight.data.shape[0]))
        
    def initial_activate_max(self, input):
        max_val = torch.max(input)
        min_val = torch.min(input)
        self.activate_max = torch.max(self.activate_max, torch.max(torch.abs(max_val), torch.abs(min_val)))
        # Avoid unusually large activation by clip blob_max with threshold
        self.th= min(self.activate_max, self.threshold)
        # print('test: ', self.th)
        
    def weight_quant(self):
        '''Avoid multiple operations caused by multiple identification of the module'''
        self.weight_flag = torch.ones(1).to(device)
        
        weight_max = torch.max(torch.max(torch.max(self.weight, 3, keepdim=True)[0], 2, keepdim=True)[0], 1, keepdim=True)[0]
        weight_min = torch.min(torch.min(torch.min(self.weight, 3, keepdim=True)[0], 2, keepdim=True)[0], 1, keepdim=True)[0]
        # weight_max_min = torch.cat((torch.abs(weight_max), torch.abs(weight_min)), 0).view([2,-1])
        # self.weight_max = torch.max(weight_max_min,0,keepdim=True)[0]
        weight_threshold = torch.max(torch.abs(weight_max), torch.abs(weight_min))
        self.weight_max = weight_threshold
        # print('weight_shape: ', weight_threshold.shape)   
        self.weight_scale = torch.where(weight_threshold < torch.tensor(0.0001).to(device), torch.tensor(0.0).to(device), ((1 << (self.bits-1))-1) / weight_threshold)
        # print('weight_scale111: ', self.weight_scale)
        self.weight_zero = torch.where(weight_threshold < torch.tensor(0.0001).to(device), torch.tensor(1.0).to(device), torch.tensor(0.0).to(device))
            
        
    # def initial_activate_distubution_interval(self):
    #     self.activate_distubution_interval = (torch.tensor(STATISTIC).to(device)) * self.th / torch.tensor(INTERVAL_NUM).to(device).astype(float)
        
    def initial_histograms(self, input):
        # Truncate the boundary of the active hist graph,
        # so the number exceeding the boundary value will not fall into statistics.
        # print('id0: ', id(input))
        input_cpu = input.cpu()
        # print('id1: ', id(input_cpu))
        # print(input_cpu)
        input_cpu_numpy = input_cpu.numpy().flatten()
        th = self.th.cpu().item()
        # print(th)
        hist, hist_edges = np.histogram(input_cpu_numpy, bins=INTERVAL_NUM, range=(-th, th))
        
        #hist = torch.histc(input, bins=INTERVAL_NUM, min=-self.th, max=self.th)
        
        self.activate_distubution += torch.from_numpy(hist).to(device)
        self.activate_distubution[2000] = torch.tensor(0).to(device)
        self.activate_distubution_edges = torch.from_numpy(hist_edges).to(device)
        
    def plot_hist(self, optimal_th=None):
        a = self.activate_distubution_edges.cpu().numpy()[:-1]
        b = self.activate_distubution.cpu().numpy()
        print('hist: ', a)
        print('hist_edge: ', b)
        import matplotlib.pyplot as plt
        plt.plot(self.activate_distubution_edges.cpu().numpy()[:-1], self.activate_distubution.cpu().numpy())
        if optimal_th is not None:
            plt.plot(optimal_th, 0, 'om')
            plt.annotate('optimal_th', xy=(optimal_th, 0), xytext=(optimal_th+1, 10000), arrowprops=dict(arrowstyle='->'))
        plt.ylabel('activate distubution')
        plt.show()
        
    def get_optimal_threshold(self):
        '''Avoid multiple operations caused by multiple identification of the module'''
        self.activate_flag = torch.ones(1).to(device)
        
        length = self.activate_distubution.shape[0]
        assert (length % 2 == 1)
        hist = self.activate_distubution.cpu().numpy()
        hist_edge = self.activate_distubution_edges.cpu().numpy()
        num_quantized_bins = self.quant_num.cpu().item()
        
        optimal_threshold = calibrate(hist, hist_edge, num_quantized_bins)
        self.optimal_th = torch.tensor(optimal_threshold).to(device)
        print('th: ', self.th)
        print('optimal_th: ', self.optimal_th)
        self.plot_hist(optimal_th=optimal_threshold)
    
        
    

    def forward(self, x):
        assert self.training is False
        
        
        
        # print('test')
        x  = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.groups)
        # x = self.conv_relu_fold(x)
        if self.is_relu:
            x = F.relu(x)
            
        if self.mode == 'TRT_activate_collection_max':
            '''collect max,min,threshold'''
            self.initial_activate_max(x)
            
        elif self.mode == 'TRT_activate_collection_hist':
            '''collect histograms'''
            self.initial_histograms(x) 
 
        elif self.mode == 'TRT_activate_KL':
            '''calibrate for optimal_threshold'''
            #self.initial_activate_distubution_interval()            
            # self.get_optimal_threshold()
                    
            pass
        elif self.mode == 'Normal_TRT':
            pass
        elif self.mode != 'TRT_weight_quant':
            
            raise ValueError("mode error")
        return x

以下代码是第二个参考代码中调用的C++代码，该C++代码有点错误，处理边界存在叠加，问题在：merge hist into num_quantized_bins bins部分，注意区分（已修改）。

def calibrate(hist, hist_edge, num_quantized_bins=255):
    num_bins = hist.size
    
    assert num_bins+1 == hist_edge.size
    zero_bin_idx = num_bins // 2
    num_half_quantized_bins = num_quantized_bins // 2
    thresholds = np.zeros(zero_bin_idx + 1 - num_half_quantized_bins)
    divergence = np.zeros(zero_bin_idx + 1 - num_half_quantized_bins)
    
    for i in range(num_half_quantized_bins, zero_bin_idx+1, 1):
        p_bin_index_start = zero_bin_idx - i
        p_bin_index_stop = zero_bin_idx + i + 1
        thresholds[i - num_half_quantized_bins] = hist_edge[p_bin_index_stop];
        
        
        sliced_nd_hist = np.zeros(p_bin_index_stop - p_bin_index_start)
        p = np.zeros(p_bin_index_stop - p_bin_index_start)
        
        # for j in range(num_bins):
        #     if j <= p_bin_index_start:
        #         p[0] +=
        
        p[1:] = hist[p_bin_index_start+1 : p_bin_index_stop]
        sliced_nd_hist[1:] = hist[p_bin_index_start+1 : p_bin_index_stop]
        p[0] = np.sum(hist[:p_bin_index_start+1])
        p[-1] = p[-1] + np.sum(hist[p_bin_index_stop:])
        # print(p)
        # print(sliced_nd_hist)
        
        '''calculate how many bins should be merged to generate quantized distribution q'''
        num_merged_bins = sliced_nd_hist.size // num_quantized_bins
        '''merge hist into num_quantized_bins bins'''
        quantized_bins = np.zeros(num_quantized_bins)
        for j in range(num_quantized_bins):
            start = j * num_merged_bins
            stop = (j+1) * num_merged_bins
            quantized_bins[j] = np.sum(sliced_nd_hist[start:stop])
            
        quantized_bins[-1] = quantized_bins[-1] + np.sum(sliced_nd_hist[num_quantized_bins * num_merged_bins : ])
        '''expand quantized_bins into p.size bins'''
        q = np.zeros(p_bin_index_stop - p_bin_index_start)
        is_nonzeros = (p != 0).astype(np.int64)
        for j in range(num_quantized_bins):
            start = j * num_merged_bins
            stop = q.size if (j == num_quantized_bins-1)  else (j+1) * num_merged_bins
            norm = is_nonzeros[start:stop].sum()
            if norm != 0:
                q[start:stop] = float(quantized_bins[j]) / float(norm)
        q[p == 0] = 0
        p = _smooth_distribution(p);
        q = _smooth_distribution(q);
        # p[p == 0] = 0.0001
        # q[q == 0] = 0.0001
        # print('p: ', p)
        # print('q: ', q)
        divergence[i - num_half_quantized_bins] = ComputeEntropy(p, q)
        # print(divergence[i - num_half_quantized_bins])
        # print('done')
    
    min_kl_divergence = np.argmin(divergence)
    return thresholds[min_kl_divergence]
        
            
            
            
def _smooth_distribution(p, eps=0.0001):
    
    is_zeros = (p == 0).astype(np.float32)
    is_nonzeros = (p != 0).astype(np.float32)
    n_zeros = is_zeros.sum()
    n_nonzeros = p.size - n_zeros
    if not n_nonzeros:
        raise ValueError('The discrete probability distribution is malformed. All entries are 0.')
    eps1 = eps * float(n_zeros) / float(n_nonzeros)
    assert eps1 < 1.0, 'n_zeros=%d, n_nonzeros=%d, eps1=%f' % (n_zeros, n_nonzeros, eps1)
    hist = p.astype(np.float32)
    hist += eps * is_zeros + (-eps1) * is_nonzeros
    assert (hist <= 0).sum() == 0
    return hist

#from scipy import *
def ComputeEntropy(p, q):
    assert p.size == q.size 
    p_sum = np.sum(p)
    q_sum = np.sum(q)
    p = p / p_sum
    q = q / q_sum
    KL_dis = np.sum(p * np.lib.scimath.log(p / q))
    return KL_dis

3.调用示例

import torch
import sys
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
sys.path.append('vgg')
from VggNet import * 
from datetime import datetime
from torch.utils.data import DataLoader

from torchvision import datasets,transforms

from ConvReluFold import model_relu_folding

from ConvBNFold import model_bn_folding
from quant_utils import ConvRelu, LinearRelu, DummyModule, TRT_Quantizer

model = torch.load('./model/vgg0.904_bnrelufold.pth')
model.eval()

'''---------------------------------------------------------------------------------------'''
'''---------------------- TRT_weight_quant ------------------------------------'''
TRT_Quantizer(model, mode='TRT_weight_quant')

'''---------------------------------------------------------------------------------------'''
'''---------------------- TRT_activate_collection_max ------------------------------------'''
TRT_Quantizer(model, mode='TRT_activate_collection_max')

model.eval()
correct = 0.0
total = 0
num = 0
with torch.no_grad():  
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device) 
  
        outputs = model(inputs)
        pred = outputs.argmax(dim = 1)  # 
        total += inputs.size(0)
        correct += torch.eq(pred,labels).sum().item()
        num += 1
        if num > 20:
            break
print('Accuracy of the network on the 10000 test images: %.2f %%' % (100.0 * correct / total))

'''---------------------------------------------------------------------------------------'''
'''---------------------- TRT_activate_collection_hist ------------------------------------'''
TRT_Quantizer(model, mode='TRT_activate_collection_hist')
correct = 0.0
total = 0
num = 0
with torch.no_grad():  # 训练集不需要反向传播
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device) 
  
        outputs = model(inputs)
        pred = outputs.argmax(dim = 1)  
        total += inputs.size(0)
        correct += torch.eq(pred,labels).sum().item()
        num += 1
        if num > 20:
            break
print('Accuracy of the network on the 10000 test images: %.2f %%' % (100.0 * correct / total))

'''---------------------------------------------------------------------------------------'''
'''---------------------- TRT_activate_KL ------------------------------------'''
TRT_Quantizer(model, mode='TRT_activate_KL')

注意：在使用融合后的模型时，必须import之前的model，否则会报错：缺少某个组件。（相当于在之前的方法的基础上新建了方法，所以还需要导入之前的方法才行）。

总结

这次编程让我对pytorch的了解又加深了一步，另外之前学的C++现在派上了用场，否则关于直方图那部分还真的不好编写，真的是学无止境呀。

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)

神经网络量化

Pytorch

c

算法

python

神经网络量化----TensorRT深刻解读的相关文章

Python range() 和 zip() 对象类型

我了解功能如何range and zip 可以在 for 循环中使用然而我期望range 输出一个列表很像seq在 Unix shell 中如果我运行以下代码 a range 10 print a 输出是range 10 表明它不是一
使用 Thread.Sleep() 时，异步编程如何与线程一起工作？

假设前言在之前的问题中我们注意到Thread Sleep阻塞线程参见什么时候使用Task Delay 什么时候使用Thread Sleep https stackoverflow com questions 20082221 whe
Unpivot Pandas 数据

我目前有一个DataFrame布置为 Jan Feb Mar Apr 2001 1 12 12 19 2002 9 2003 我想将数据逆透视使其看起来像 Date Value Jan 2001 1 Feb 2001 1 Mar 200
c++11 中的 std::thread 问题

我在尝试从标准模板库编译具有多线程的程序时遇到一些麻烦当我尝试编译以下程序时它返回一个晦涩的错误 include
我应该使用函数还是无状态函子？

这两段代码做同样的事情如您所见它将用于排序函数哪个更好我通常写后一种但我看到一些程序员像以前那样做 struct val lessthan binary function
Python Pandas DateOffset 使用另一列中的值

我以为这会很容易但下面的内容并不适合我想要的只是尝试通过使用另一列中的值将天数添加到预先存在的日期时间列来计算新的日期列我下面的偏移列只有 1 位数字 df new date df orig date apply lambda x
允许 .NET WebApi 忽略 DOCTYPE 声明

我正在尝试通过 WebApi 方法将 XML 反序列化为对象我有以下课程 XmlRoot IsNullable false public class MyObject XmlElement Name public string Name
将文本文件转换为 plink PED 和 MAP 格式

我有以下数据其中的一小部分名为 short2 pre snp tumor txt rs987435 C G 1 1 1 0 2 rs345783 C G 0 0 1 0 0 rs955894 G T 1 1 2 2 1 rs608879
如何使用包含的转换的排名来比较两个标准转换序列

include
在 C# 中加密并在 Flex 中解密

我需要解密 Flex 中的一些数据这些数据是用 C 加密并写入文件的为了简单起见我选择使用 as3crypto As3 库和 Bruce Schneier C 库 AS3 as3加密链接 http code google com p
如何带参数调用外部程序？

我想在我的代码中调用一个 Windows 程序并使用代码本身确定的参数我不想调用外部函数或方法而是调用 WinXP 环境中的实际 exe 或批处理脚本文件 C 或 C 将是首选语言但如果使用任何其他语言更容易完成此操作请告诉我
gcc 中的“假设”子句

gcc 最新版本 4 8 4 9 是否有类似于以下的假设子句 assume 内置icc支持吗例如 assume n 8 0 从 gcc 4 8 2 开始 gcc 中没有 assume 的等效项我不知道为什么这会非常有用马夫索建议
修改公共属性的访问修饰符是否是重大更改？

如果我将公共属性的 setter 的访问修饰符从私有更改为公共是否会导致引用它的其他程序集发生任何重大更改 UPDATE 这个问题是我 2012 年 1 月博客的主题 https ericlippert com 2012 01 09 ev
如何从标准输入读取一行，阻塞直到找到换行符？

我试图从命令行的标准输入一次读取任意长度的一行我不确定是否能够包含 GNU readline 并且更喜欢使用库函数我读过的文档表明getline应该可以工作但在我的实验中它不会阻塞我的示例程序 include
更新插入 MongoDB 时如何防止出现“_t”字段？

我有一个应用程序它使用 MongoDB 的 C 驱动程序将 Upsert 插入 MongoDB 数据库当我打电话给Update函数我无法指定我要更新的类型然后 t字段插入元素的类型这是我用来更新插入的代码 collection U
jquery ajax“发布”调用

我是 jQuery 和 Ajax 的新手并且在发布方面遇到问题我正在使用 jQuery Ajax post 调用将数据保存到数据库当我尝试保存数据时它将 null 传递给我的 C 方法 jQuery 看起来像这样 functio
为了清楚起见，是否应该在返回类型上使用无用的类型限定符？

当我们的头文件中有原型时我们的静态分析工具会抱怨返回类型上有无用的类型限定符例如 const int foo 我们这样定义它是因为该函数返回一个永远不会改变的常量认为 API 看起来更清晰const到位为了清楚起见我觉得这类似于
当另一个进程使用 std::fstream 写入文件时从文件读取[重复]

这个问题在这里已经有答案了我需要从文件中逐行读取它是由 std getline 完成的另一个进程的问题是一直向其附加数据然后我需要读取新行例如文件一开始包含10行我的程序读取了10行那么我的程序应该等待过了一会儿另一个进
如何在Asp.Net Core中自定义开发者异常页面？

这常见于ConfigureStartup cs 文件的方法具有如下所示的代码 if env IsDevelopment app UseDeveloperExceptionPage new DeveloperExceptionPageOpti
如何将 c_uint 的 ctypes 数组转换为 numpy 数组

我有以下 ctypes 数组 data ctypes c uint 100 我想创建一个 numpy 数组np data包含来自 ctypes 数组数据的整数值 ctypes 数组显然稍后会填充值我看到numpy中有一个ctypes接口

随机推荐

linux的链接方式

linux的硬链接和软链接 1 链接的概念 Linux链接分两种一种被称为硬链接 Hard Link 另一种被称为软链接也叫符号链接 Symbolic Link 默认情况下 ln命令产生硬链接 2 硬链接在Linux文件系统当中保存在
年入50万，程序员的第二条赛道

大家好我是厂长我有个朋友叫佩佩这几年我亲眼见证了她从月薪6千到年入百万她曾经靠一条短视频带货就赚了30万佣金最近我看到她在做小红书无货源电商这个风口项目两个月就做到了单店30万的战绩她说2023年是小红书电商元年 0粉
回味C语言

虽然在实际上没怎么使用C语言了但是看到C语言的书总是忍不住想看一下喜欢这种至简却又有着强大能力的语言读完书随手写的一些笔记略有些简单书还是很喜欢的推荐给大家 C专家编程第一章 C 穿越时空的迷雾原型决定C语言不支持函数重载
小程序面试题

1 简单描述一下微信小程序的相关文件类型答 WXML WeiXin Markup Language 是框架设计的一套标签语言结合基础组件事件系统可以构建出页面的结构内部主要是微信自己定义的套组件 WXSS WeiXin Styl
mysql 利用binlog增量备份，还原实例（日志备份数据库）

一什么是增量备份增量备份就是将新增加的数据进行备份假如你一个数据库有10G的数据每天会增加10M的数据数据库每天都要备份一次这么多数据是不是都要备份呢还是只要备份增加的数据呢很显然我只要备份增加的数据这样减少服务器的
C++ 调用python

本文代码已在vs2017上验证 c 调用python需要三类文件这些文件都可以在python安装目录下找到 1 include文件夹位于python目录下 2 dll文件位于python目录下如python37 dll 3 lib文
超分辨率概述

1 什么是超分辨率增强 Video super resolution is the task of upscaling a video from a low resolution to a high resolution 超分辨率 Supe
Git & GitHub 入门6：用好commit message

git log 可以查看所有的 commit messages 修改repo中的文件内容后 add该文件直接运行命令git commit进入message编辑状态可以输入多行commit message说明完成后点击ECS键退出编辑
Gin-swaggo为gin框架提供Swagger 文档

官方 https github com swaggo gin swagger 开始使用为API方法增加注释加在controller api 层 See Declarative Comments Format 运行下面命令下载swgo g
L2-4 部落PTA

在一个社区里每个人都有自己的小圈子还可能同时属于很多不同的朋友圈我们认为朋友的朋友都算在一个部落里于是要请你统计一下在一个给定社区中到底有多少个互不相交的部落并且检查任意两个人是否属于同一个部落输入格式输入在第一行给出一个
hadoop3.2.1编译安装

基础环境 centos 7 7 三台 hadoop需要的环境 Requirements Unix System JDK 1 8 Maven 3 3 or later ProtocolBuffer 2 5 0 CMake 3 1 or new
echart 折线图设置y轴单位_如何让echarts中y轴的单位位于数值的右上角

展开全部 1 创建折线图的数据区包括年份和数据 2 仅选择数据区创建折线图插入选项卡图表62616964757a686964616fe78988e69d8331333363396364工具组折线图 3 得到的折线图x坐标不满足要求
c++可变参数模板函数

可变参数模版函数类型一致可变参数使用头文件 cstdarg va list arg ptr 开头指针 va start arg ptr n 从开头开始读取n个 va arg arg ptr T 根据数据类型取出数据 va end ar
jdk1.8升级后 sun.io.CharToByteConverter 错误处理

项目工程中用到jdk1 6相关方法可以使用但是升级到jdk1 8以后编译出现java lang NoClassDefFoundError sun io CharToByteConverter错误后经查询是jdk1 8版本中已经从s
前端02：CSS选择器等基础知识

CSS基础选择器设置字体样式文本样式 CSS的三种引入方式能使用Chrome调试工具调试样式 HTML专注做结构呈现样式交给CSS 即结构 HTML 和样式CSS相分离 CSS主要由量分布构成选择器以及一条或多条声明选择器给谁
深度学习10篇文章之Interleaved Group Convolution

本文主要讲解Ting Zhang的Interleaved Group Convolutions for Deep Neural Networks 该文对Group convolution有较为详细的讲解 Abstract 文章开篇引出了 I
新昌中学2021高考成绩查询,2021绍兴市地区高考成绩排名查询,绍兴市高考各高中成绩喜报榜单...

距离2018年高考还有不到一个月的时间了很多人在准备最后冲刺的同时也在关心高考成绩 2018各地区高考成绩排名查询高考各高中成绩喜报榜单尚未公布下面是往年各地区高考成绩排名查询高考各高中成绩喜报榜单想要了解同学可以参考下同时关
轻松学懂图（下）——Dijkstra和Bellman-Ford算法

概述在上一篇文章中讲述了Kruskal和Prim算法用于得到最小生成树今天将会介绍两种得到最短路径的算法 Dijlkstra和Bellman Ford算法 Dijkstra算法算法的特点属于单源最短路径算法什么是单源呢通俗的说
前端使用自定义指令实现埋点【vue3】

vue项目有时候会需要进行数据采集记录用户行为习惯而且很多页面都会使用到所以用vue自定义指令来实现埋点功能埋点的几种方式页面埋点浏览次数及时长等点击埋点每一次点击行为曝光埋点统计区域是否被用户浏览 import cre
神经网络量化----TensorRT深刻解读

神经网络量化 TensorRT深刻解读目录神经网络量化 TensorRT深刻解读前言一 TensorRT简介二难点 1 架构 2 功能三实现 1 conv和ReLU的融合 2 conv和ReLU的融合 quant utils

神经网络量化----TensorRT深刻解读

神经网络量化----TensorRT深刻解读

前言

一、TensorRT简介

二、难点

1.架构

2.功能

三、实现

1.conv和ReLU的融合

2.conv和ReLU的融合

quant_utils.py

3.调用示例

总结

神经网络量化----TensorRT深刻解读 的相关文章

随机推荐

热门标签

神经网络量化----TensorRT深刻解读的相关文章