4、picodet 小目标训练全流程( 九 )


要注意的是原先的json中保存的文件名都是图片文件名本身 , 而合并后文件名变成了绝对路径,这个应该是防止存在同名文件的情况 。如果合并json , 最后再转换voc , 如果有同名的也是不好处理,所以我们各自单独转换,这样即使文件名相同也不怕 。最后图片的文件名是原文件名.jpg
from pycocotools.coco import COCO importos, cv2, shutilfrom lxml import etree, objectifyfrom tqdm import tqdmfrom PIL import Imageimport numpy as npimport timeimport jsonimport argparsedef cover_copy(src,dst):'''src和dst都必须是文件 , 该函数是执行覆盖操作'''if os.path.exists(dst):os.remove(dst)shutil.copy(src,dst)else:shutil.copy(src,dst)def coco2voc(imgpath='VOCdevkit/COCO_VOC',jsonpath=None,savepath='COCO'):"""savepath:用来存放转换后数据和标注文件imgpath:用来指定原始COCO数据集的存放位置"""img_savepath= os.path.join(savepath,'images')ann_savepath=os.path.join(savepath,'labels')for p in [img_savepath,ann_savepath]:if os.path.exists(p):shutil.rmtree(p)os.makedirs(p)else:os.makedirs(p)start = time.time()no_ann=[] #用来存放没有标注数据的图片的id,并将这些图片复制到results文件夹中not_rgb=[] #是灰度图 , 同样将其保存print('loading annotations into memory...')tic = time.time()with open(jsonpath, 'r') as f:dataset_ann = json.load(f)assert type(dataset_ann) == dict, 'annotation file format {} not supported'.format(type(dataset))print('Done (t={:0.2f}s)'.format(time.time() - tic))coco = COCO(jsonpath)classes = dict()for cat in coco.dataset['categories']:classes[cat['id']] = cat['name']imgIds = coco.getImgIds()# imgIds=imgIds[0:1000]#测试用 , 抽取10张图片 , 看下存储效果for imgId in tqdm(imgIds):img = coco.loadImgs(imgId)[0]filename = img['file_name']filepath=os.path.join(imgpath,filename)annIds = coco.getAnnIds(imgIds=img['id'],iscrowd=None)anns = coco.loadAnns(annIds)if not len(anns):# print(f"{dataset}:{imgId}该文件没有标注信息 , 将其复制到{dataset}_noann_result中 , 以使查看")no_ann.append(imgId)result_path = os.path.join(savepath,"noann_result")dest_path = os.path.join(result_path,filename)if not os.path.exists(result_path):os.makedirs(result_path)cover_copy(filepath,dest_path)continue #如果没有标注信息 , 则把没有标注信息的图片移动到相关结果文件 noann_result中,来进行查看  , 然后返回做下一张图#有标注信息 , 接着往下走 , 获取标注信息objs = []for ann in anns:name = classes[ann['category_id']]if 'bbox' in ann:# print('bbox in ann',imgId)bbox = ann['bbox']xmin = (int)(bbox[0])ymin = (int)(bbox[1])xmax = (int)(bbox[2] + bbox[0])ymax = (int)(bbox[3] + bbox[1])obj = [name, 1.0, xmin, ymin, xmax, ymax]#标错框在这里if not(xmin-xmax==0 or ymin-ymax==0):objs.append(obj)else:print(f"{imgId} bbox在标注文件中不存在")# 单张图有多个标注框 , 某个类别没有框annopath = os.path.join(ann_savepath,filename[:-3] + "xml") #生成的xml文件保存路径dst_path = os.path.join(img_savepath,filename)im = Image.open(filepath)image = np.array(im).astype(np.uint8)if im.mode != "RGB":# if img.shape[-1] != 3:# print(f"{dataset}:{imgId}该文件非rgb图 , 其复制到{dataset}_notrgb_result中 , 以使查看")# print(f"img.shape{image.shape} and img.mode{im.mode}")not_rgb.append(imgId)result_path = os.path.join(savepath,"notrgb_result")dest_path = os.path.join(result_path,filename)if not os.path.exists(result_path):os.makedirs(result_path)cover_copy(filepath,dest_path) #复制到notrgb_result来方便查看im=im.convert('RGB')image = np.array(im).astype(np.uint8)im.save(dst_path,quality=95)#图片经过转换后 , 放到我们需要的位置片im.close()else:cover_copy(filepath, dst_path)#把原始图像复制到目标文件夹E = objectify.ElementMaker(annotate=False)anno_tree = E.annotation(E.folder('VOC'),E.filename(filename),E.source(E.database('COCO'),E.annotation('VOC'),E.image('COCO')),E.size(E.width(image.shape[1]),E.height(image.shape[0]),E.depth(image.shape[2])),E.segmented(0))for obj in objs:E2 = objectify.ElementMaker(annotate=False)anno_tree2 = E2.object(E.name(obj[0]),E.pose(),E.truncated("0"),E.difficult(0),E.bndbox(E.xmin(obj[2]),E.ymin(obj[3]),E.xmax(obj[4]),E.ymax(obj[5])))anno_tree.append(anno_tree2)etree.ElementTree(anno_tree).write(annopath, pretty_print=True)print(f"该数据集有{len(no_ann)}/{len(imgIds)}张图片没有instance标注信息 , 已经这些图片复制到{savepath}/noann_result中以使进行查看")print(f"该数据集有{len(not_rgb)}/{len(imgIds)}张图片是非RGB图像 , 已经这些图片复制到{savepath}/notrgb_result中以使进行查看")duriation = time.time()-startprint(f"数据集处理完成用时{round(duriation/60,2)}分")