CRNN,Opencv,Pytorch 字符识别

文章目录
0. 前言
至于CRNN网络的细节这里就不再多言了,网上有很多关于crnn的介绍,这里直接讲一下代码的实现流程
1. 数据集准备
CRNN是识别文本的网络,所以我们首先需要构建数据集,使用26个小写字母以及0到9十个数字,一共有36个字符,从这36个字符中随机选择4到9个字符(这里要说明一下,网上很多关于crnn的训练集中每张图片中的字符个数是一样的,这就具有很大的局限性 。所以本文使用4到9随机选择字符个数构建图片 。)
生成数据集代码如下:
import cv2import numpy as npimport randomimport imgaug.augmenters as iaadef get_img():zfu=['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0','1','2','3','4','5','6','7','8','9']# zfu=[str(i) for i in range(10)]# zfu=[str(i) for i in range(10)]k=random.randint(4,9)select=random.choices(zfu,k=k)lab=[zfu.index(i) for i in select]select="".join(select)font=cv2.FONT_HERSHEY_COMPLEXsrc=http://www.kingceram.com/post/np.ones(shape=(50,250,3)).astype('uint8')*255src=cv2.putText(src,select,(20,27),font,1,(0,0,0),2)seq = iaa.Sequential([# iaa.Flipud(0.5),# flip up and down (vertical)# iaa.Fliplr(0.5),# flip left and right (horizontal)iaa.Multiply((0.5, 1.5)),# change brightness, doesn't affect BBs(bounding boxes)iaa.GaussianBlur(sigma=(0, 1.0)),# 标准差为0到3之间的值iaa.Crop(percent=(0, 0.06)),iaa.Grayscale(alpha=(0, 1)),iaa.Affine(#translate_px={"x": (0, 15), "y": (0, 15)},# 平移scale=(0.95, 1.05),# 尺度变换mode=iaa.ia.ALL,cval=(100, 255)),iaa.Resize({"height": 32, "width": 200})])# img是numpy格式,无归一化src=http://www.kingceram.com/post/np.expand_dims(src,axis=0)src = seq(images=src)[0]# cv2.imshow('a21',src)# cv2.waitKey(0)return src,labf_train=open('train.txt','w')f_val=open('val.txt','w')for i in range(10000):img,lab=get_img()lab=[str(i) for i in lab]lab=" ".join(lab)path='train_data/'+str(i)+'.jpg'cv2.imwrite(path,img)f_train.write(path+' '+lab+'\n')print(i)for i in range(1000):img,lab=get_img()lab=[str(i) for i in lab]lab=" ".join(lab)path='val_data/'+str(i)+'.jpg'cv2.imwrite(path,img)f_val.write(path+' '+lab+'\n')print(i)
运行上述代码之前首先需要手动新建两个空文件夹用于存放训练图像和验证图像,文件夹名字分别是:和 。运行完上述代码以后会在文件夹中保存10000张训练图像,在文件夹中保存1000张验证图像 。此外还会生成两个txt文件,分别为train.txt和val.txt 。
txt文本中存放的是图片的路径及包含字符的类别,如下所示:

CRNN,Opencv,Pytorch  字符识别

文章插图
部分训练图像如下所示:
2.构建网络
构建crnn网络的代码如下所示:
# crnn.pyimport argparse, osimport torchimport torch.nn as nnclass BidirectionalLSTM(nn.Module):def __init__(self, nInput_size, nHidden, nOut):super(BidirectionalLSTM, self).__init__()self.lstm = nn.LSTM(nInput_size, nHidden, bidirectional=True)self.linear = nn.Linear(nHidden * 2, nOut)def forward(self, input):recurrent, (hidden, cell) = self.lstm(input)T, b, h = recurrent.size()t_rec = recurrent.view(T * b, h)output = self.linear(t_rec)# [T * b, nOut]output = output.view(T, b, -1)# 输出变换为[seq,batch,类别总数]return outputclass CNN(nn.Module):def __init__(self, imageHeight, nChannel):super(CNN, self).__init__()assert imageHeight % 32 == 0, 'image Height has to be a multiple of 32'self.depth_conv0 = nn.Conv2d(in_channels=nChannel, out_channels=nChannel, kernel_size=3, stride=1, padding=1,groups=nChannel)self.point_conv0 = nn.Conv2d(in_channels=nChannel, out_channels=64, kernel_size=1, stride=1, padding=0,groups=1)self.relu0 = nn.ReLU(inplace=True)self.pool0 = nn.MaxPool2d(kernel_size=2, stride=2)self.depth_conv1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, groups=64)self.point_conv1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=1, padding=0, groups=1)self.relu1 = nn.ReLU(inplace=True)self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)self.depth_conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128)self.point_conv2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=1, padding=0, groups=1)self.batchNorm2 = nn.BatchNorm2d(256)self.relu2 = nn.ReLU(inplace=True)self.depth_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256)self.point_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1, padding=0, groups=1)self.relu3 = nn.ReLU(inplace=True)self.pool3 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1))self.depth_conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256)self.point_conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)self.batchNorm4 = nn.BatchNorm2d(512)self.relu4 = nn.ReLU(inplace=True)self.depth_conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1, groups=512)self.point_conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)self.relu5 = nn.ReLU(inplace=True)self.pool5 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1))# self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0)self.depth_conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0, groups=512)self.point_conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)self.batchNorm6 = nn.BatchNorm2d(512)self.relu6 = nn.ReLU(inplace=True)def forward(self, input):depth0 = self.depth_conv0(input)point0 = self.point_conv0(depth0)relu0 = self.relu0(point0)pool0 = self.pool0(relu0)# print(pool0.size())depth1 = self.depth_conv1(pool0)point1 = self.point_conv1(depth1)relu1 = self.relu1(point1)pool1 = self.pool1(relu1)# print(pool1.size())depth2 = self.depth_conv2(pool1)point2 = self.point_conv2(depth2)batchNormal2 = self.batchNorm2(point2)relu2 = self.relu2(batchNormal2)# print(relu2.size())depth3 = self.depth_conv3(relu2)point3 = self.point_conv3(depth3)relu3 = self.relu3(point3)pool3 = self.pool3(relu3)# print(pool3.size())depth4 = self.depth_conv4(pool3)point4 = self.point_conv4(depth4)batchNormal4 = self.batchNorm4(point4)relu4 = self.relu4(batchNormal4)# print(relu4.size())depth5 = self.depth_conv5(relu4)point5 = self.point_conv5(depth5)relu5 = self.relu5(point5)pool5 = self.pool5(relu5)# print(pool5.size())depth6 = self.depth_conv6(pool5)point6 = self.point_conv6(depth6)batchNormal6 = self.batchNorm6(point6)relu6 = self.relu6(batchNormal6)# print(relu6.size())return relu6class CRNN(nn.Module):def __init__(self, imgHeight, nChannel, nClass, nHidden):super(CRNN, self).__init__()self.cnn = nn.Sequential(CNN(imgHeight, nChannel))self.lstm = nn.Sequential(BidirectionalLSTM(512, nHidden, nHidden),BidirectionalLSTM(nHidden, nHidden, nClass),)def forward(self, input):conv = self.cnn(input)# pytorch框架输出结构为BCHWbatch, channel, height, width = conv.size()assert height == 1, "the output height must be 1."# 将height==1的维度去掉-->BCWconv = conv.squeeze(dim=2)# 调整各个维度的位置(B,C,W)->(W,B,C),对应lstm的输入(seq,batch,input_size)conv = conv.permute(2, 0, 1)output = self.lstm(conv)return outputif __name__ == "__main__":x = torch.rand(1, 1, 32, 100)model = CRNN(imgHeight=32, nChannel=1, nClass=11, nHidden=256)y = model(x)print(y.shape)