# 1.blobFromImage

Mat cv::dnn::blobFromImage	(	
    InputArray 	image,
    double 	scalefactor = 1.0,
    const Size & 	size = Size(),
    const Scalar & 	mean = Scalar(),
    bool 	swapRB = false,
    bool 	crop = false,
    int 	ddepth = CV_32F 
)	

OpenCV中的DNN模块包含blobFromImage方法对输入神经网络的图像进行处理,blobFromImage函数执行的操作及顺序为:

  • 1.先相对于原图像中心resize,crop
  • 2.再减均值
  • 3.像素值缩放0-255 -> 0-1
  • 4.图像数据通道转换,RGB->BGR
  • 返回一个NCHW 数组

# 2.与torchvision对比

torchvision中可以使用transforms实现输入圣经网络图像数据的标准化,主要通过ToTensorNormalize两个方法实现

ToTensor

  • 输入为unit8型的numpy数组或PIL.Image
  • 1.输入数据的transpose,HWC->CHW
  • 2.将像素值范围从0-255->0-1

Normalize

  • 输入的标准化
(adsbygoogle = window.adsbygoogle || []).push({});

# 3.示例

通过一段简单的程序介绍cv2.dnn.blobFromImage执行的操作与torchvision中的ToTensor+Normalize效果等同

import cv2
import torch
import torchvision
from torchvision import transforms
import numpy as np
from PIL import Image

dd = np.random.randint(0, 255, (4, 4, 3), dtype=np.uint8)
print(f"dd: {dd}")

print("===>>> by torch")
tt = torch.tensor(dd)
print(f"tt: {tt}")

tp = Image.fromarray(dd)

trans = transforms.Compose([
    transforms.ToTensor(), # HWC->CHW, 0-255-> 0-1
    transforms.Normalize((.5, .5, .5), (1., 1., 1.)) # (x-mwan)/std
])

trans_tt = trans(tp)
print(trans_tt)
print(f"trans_tt.shape: {trans_tt.shape}")


print("===>>> by opencv")
blob = cv2.dnn.blobFromImage(dd, 1/255, (4,4), (127.5, 127.5, 127.5), False, False)
print(f"blob: {blob}")
print(f"blog.shape: {blob.shape}")

上述代码的输出为:

tensor([[[-0.2294,  0.0412, -0.0020, -0.3667],
         [ 0.2020, -0.3078, -0.4373,  0.2373],
         [ 0.2333,  0.3118,  0.4255,  0.0176],
         [-0.3471,  0.1235, -0.3745,  0.3196]],

        [[ 0.4216,  0.3314,  0.4647,  0.4529],
         [ 0.4765,  0.0451, -0.3627,  0.4098],
         [ 0.3196, -0.3275, -0.4020,  0.4569],
         [-0.4255,  0.4216,  0.3980, -0.2882]],

        [[ 0.1314,  0.3392, -0.2098,  0.2686],
         [ 0.4255,  0.3980, -0.4608,  0.0216],
         [ 0.4176,  0.0294,  0.4216, -0.4961],
         [-0.1745, -0.4451,  0.1941,  0.0098]]])
trans_tt.shape: torch.Size([3, 4, 4])
===>>> by opencv
blob: [[[[-0.22941178  0.04117647 -0.00196078 -0.36666667]
   [ 0.2019608  -0.30784315 -0.43725494  0.23725492]
   [ 0.23333335  0.31176472  0.42549023  0.01764706]
   [-0.34705883  0.12352942 -0.3745098   0.31960785]]

  [[ 0.42156866  0.33137256  0.4647059   0.4529412 ]
   [ 0.47647062  0.04509804 -0.3627451   0.40980396]
   [ 0.31960785 -0.327451   -0.40196082  0.45686278]
   [-0.42549023  0.42156866  0.39803925 -0.2882353 ]]

  [[ 0.13137256  0.3392157  -0.20980394  0.26862746]
   [ 0.42549023  0.39803925 -0.46078435  0.02156863]
   [ 0.4176471   0.02941177  0.42156866 -0.49607846]
   [-0.17450981 -0.44509807  0.19411767  0.00980392]]]]
blog.shape: (1, 3, 4, 4)

可见两者的结果是一致的。

# 参考资料