从目标检测数据集中扣出所需类别进行分类

2023-10-26


做分类模型除了imagenet这些,还会有常见的目标检测数据集可以利用起来,把所需要的类别扣出来再做分类。检测数据集有VOC,COCO,openimage,object365等,对应的检测的数据集的格式也是常见的几种,VOC的xml左上右下的点,COCO的json的左上宽高,yolo数据集的中心点xywh的相值,openimage的csv 左上右下的相对值,object365的(等补充)
如下从这几个数据集中获取这几种车辆。

1、获取VOC数据集中两轮车

import tqdm
import glob
import shutil
import xml.etree.ElementTree as ET 
from PIL import Image 
# VOC有20个分类 'aeroplane', 'diningtable', 'sofa', 'bus', 'car', 'train', 'bicycle', 'horse', 'cow', 
# 'bird', 'sheep', 'chair',  'motorbike', 'pottedplant', 'tvmonitor', 'bottle', 'dog', 'boat', 'person', 'cat'
need_names = ["bicycle","motorbike","bottle","chair","tvmonitor","pottedplant"]  #指定要获取的分类
imgpath =  "collect/VOC/VOCdevkit/VOC2007/JPEGImages"  #指定图片位置
annpath =  "collect/VOC/VOCdevkit/VOC2007/Annotations/" #指定xml标注文件的位置
savepath = "collect/VOC" #指定图片要保存的位置,会按照我们要获取的分类名分别进行保存
name2dir = {i:os.path.join(savepath,i) for i in need_names}
for need_name in need_names:
    savedir = name2dir[need_name]
    if os.path.exists(savedir):
        shutil.rmtree(savedir)
    os.makedirs(savedir)
        
imgs = [os.path.join(imgpath,i) for i in os.listdir(imgpath)]
anns = [os.path.join(annpath,i) for i in os.listdir(annpath)]
imgnames = [os.path.splitext(os.path.basename(i))[0] for i in imgs]
imgs2name={ os.path.splitext(os.path.basename(i))[0]:os.path.basename(i) for i in imgs }
annnames = [os.path.splitext(os.path.basename(i))[0] for i in anns]

comname = list(set(imgnames) & set(annnames))

for name in tqdm.tqdm(comname):
    imgdir = os.path.join(imgpath,imgs2name[name])
    anndir = os.path.join(annpath,name+'.xml')
    tree = ET.parse(open(anndir))
    root = tree.getroot()
    size = root.find('size')
    w=  int(size.find('width').text)
    h=  int(size.find('height').text)
    if w<50 or h<50:
        continue
    i = 0
    for obj in root.iter('object'):
        cls_name = obj.find('name').text.lower()
        if cls_name in need_names:
            box = obj.find('bndbox')
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)
            w = max(0,xmax-xmin)
            h = max(0,ymax-ymin)
            if w<30 or h<30:
                continue
            im = Image.open(imgdir)
            imcrop = im.crop((xmin,ymin,xmax,ymax))
            savename = name+'_'+str(i)+'.jpg'
            imcrop.save(os.path.join(name2dir[cls_name],savename),quality=100)
            i+=1
        else:
            continue
        
for name,namedir in name2dir.items():
    print(f"{name} img num:{len(os.listdir(namedir))}")
100%|██████████| 9963/9963 [00:16<00:00, 591.14it/s]

bicycle img num:742
motorbike img num:715
bottle img num:705
chair img num:2477
tvmonitor img num:669
pottedplant img num:1030

2 、接着做COCO数据集的分类数据获取

import numpy as np
import tqdm
from PIL import Image
from pycocotools.coco import COCO
#以下是80个分类
'''
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
'''
annpath = "collect/COCO/annotations/instances_val2017.json"
imgpath = "collect/COCO/val2017/"
savepath = "collect/COCO"
need_names=["bicycle",'motorcycle']
for need_name in need_names:
    savedir = os.path.join(savepath,need_name)
    if os.path.exists(savedir):
        shutil.rmtree(savedir)
    os.makedirs(savedir)

data_source = COCO(annotation_file=annpath)
catIds = data_source.getCatIds()   #获取类别id 
categories = data_source.loadCats(catIds)
categories.sort(key=lambda x: x['id'])
classes2Id = {}
classes2catId = {}
catId2classes = {}
catId2Id={}  
Id2catId ={}
for i,cat in enumerate(categories):
    Id2catId[i]=cat['id']
    catId2Id[cat['id']]=i
    classes2Id[cat['name']]=i
    classes2catId[cat['name']]=cat['id']
    catId2classes[cat['id']]=cat['name']

img_ids = data_source.getImgIds()
print("num images:",len(img_ids))
for index, img_id in tqdm.tqdm(enumerate(img_ids), desc='crop needed classes'):
    img_info = data_source.loadImgs(img_id)[0]
    img_name = img_info['file_name']
    img_path = os.path.join(imgpath,img_name)
    file_name = os.path.splitext(img_name)[0]
    height = img_info['height']
    width = img_info['width']
    if width<80 or height<80:
        continue
    annotation_id = data_source.getAnnIds(img_id)
    if len(annotation_id) == 0:
        continue
    annotations = data_source.loadAnns(annotation_id)
    i = 0
    for annotation in annotations:
        catId = annotation['category_id']
        cls_name = catId2classes[catId]
        if cls_name in need_names:
            box = annotation['bbox']
            # some annotations have basically no width / height, skip them
            if box[2] < 1 or box[3] < 1:
                continue
            #top_x,top_y,width,height---->xmin,ymin,xmax,ymax
            xmin = max(0,box[0])
            ymin = max(0,box[1])
            xmax = min(box[0]+box[2],width)
            ymax = min(box[1]+box[3],height)
            w = xmax-xmin
            h = ymax-ymin
            if w<30 or h<30:
                continue
            im = Image.open(img_path)
            crop = im.crop((xmin,ymin,xmax,ymax))
            save_path = os.path.join(savepath,cls_name,file_name+'_'+str(i)+'.jpg')
            crop.save(save_path,quality=100)
            i+=1
        else:
            continue
for need_name in need_names:    
    print(f"{need_name} img num:{len(os.listdir(os.path.join(savepath,need_name)))}")
        
print('finish')


loading annotations into memory...
Done (t=0.38s)
creating index...
index created!
num images: 5000


crop needed classes: 5000it [00:02, 2112.55it/s]

bicycle img num:196
motorcycle img num:300
finish

3、YOLO 格式数据

以下的代码没做太多修改,是在实际中使用的,同时处理了两个yolo文件夹,用了多进程,也对于图片与标签文件的文件名关系做了字典的处理,而不是使用glob是因为这样处理速度会快。

import os
import tqdm
from glob import glob
from multiprocessing import Process
import numpy as np
import shutil
import cv2

# imgpaths = ['ebike/electric_bicycle/images_all',"ebike/luping_buchong/images"]
# annpaths = ['ebike/electric_bicycle/labels_all',"ebike/luping_buchong/labels"]
imgpaths = ['ebike/electric_bicycle/images',"ebike/luping_buchong/images"]
annpaths = ['ebike/electric_bicycle/labels',"ebike/luping_buchong/labels"]

all_imgs=[]
all_labels=[]

for i in range(len(imgpaths)):
    imgpath=imgpaths[i]
    annpath=annpaths[i]
    imgfiles=[os.path.splitext(name)[0] for name in os.listdir(imgpath)]
    imgsuffixes=[os.path.splitext(name)[1] for name in os.listdir(imgpath)]
    name_add_fix =dict(zip(imgfiles,imgsuffixes))
    annfiles=[os.path.splitext(name)[0] for name in os.listdir(annpath)]
    comfiles = list(set(imgfiles) & set(annfiles))
    for comfile in tqdm.tqdm(comfiles):
        img = os.path.join(imgpath,comfile+name_add_fix[comfile])
        ann = os.path.join(annpath,comfile+'.txt')
        all_imgs.append(img)
        all_labels.append(ann)
print("img num",len(all_imgs))
print("ann num",len(all_labels))

with open('classes.txt','r') as f:
    classes = f.read().strip().splitlines()
classes=['ebike']
cls_path = 'ebike/ebike_classification_need'
if os.path.exists(cls_path):
    shutil.rmtree(cls_path)
for class_name in classes:
    class_path = os.path.join(cls_path,class_name)
    os.makedirs(class_path,exist_ok=False)
id2class={}
for id,name in enumerate(classes):
    id2class[str(id)]=name
print(id2class)
need_id=[0,1]  #我们只要classes中的前两行(两类)
def crop_img(img_paths,label_paths,id2class,need_id,cls_path):
    for img_path,label_path in tqdm.tqdm(zip(img_paths,label_paths),total=len(img_paths)):
        img_name,img_suffix = os.path.splitext(os.path.split(img_path)[-1])
        with open(label_path,'r') as f:
            lines = f.read().strip().splitlines()
        for i,line in enumerate(lines):
            # if len(line.split())!=5:
            #     print(line,i,label_path)
            labelid,x,y,w,h=map(float,line.split()[:5])
            if not(int(labelid) in need_id):
                continue
            else:
                label = id2class[str(int(labelid))]
                img = np.fromfile(img_path,np.uint8)
                img = cv2.imdecode(img,1)
                if img is None:
                    continue
                imgh,imgw,_ = img.shape
                x=x*imgw
                y=y*imgh
                w=w*imgw
                h=h*imgh
                rate = w*1.0/h
                if 0.2<rate<5:
                    xmin = max(int(x-w/2),0)
                    ymin = max(int(y-h/2),0)
                    xmax = min(int(x+w/2),int(imgw))
                    ymax = min(int(y+h/2),int(imgh))
                    imgpart = img[ymin:ymax,xmin:xmax,:]
                    imgpath = os.path.join(cls_path,label,img_name+f"_{i}"+img_suffix)
                    try:
                        cv2.imencode('.jpg',imgpart)[1].tofile(imgpath)
                    except:
                        print("non img",imgpath)

num = 40
parts = np.linspace(0,len(all_imgs),num+1).astype(np.int32)
processes=[]
for i in range(num):
    img_paths = all_imgs[parts[i]:parts[i+1]]
    label_paths = all_labels[parts[i]:parts[i+1]]
    p = Process(target=crop_img,args=(img_paths,label_paths,id2class,need_id,cls_path))
    p.start()
    processes.append(p)
for pp in processes:
    pp.join()

4、openimage数据获取

import pandas as pd
from PIL import Image
import os
import tqdm
import shutil
import numpy as np

从数据集是获取我们需要的分类只是因为要对我们的数据做扩充,openimage中有些图是没标签的,所在我们统计出来的image_id数量是少于真实图片数量

获取标签

classes = pd.read_csv('openimage/labels/metadate/class-descriptions-boxable.csv',names=['labelname','displayname'])
classes.head()
labelname displayname
0 /m/011k07 Tortoise
1 /m/011q46kg Container
2 /m/012074 Magpie
3 /m/0120dh Sea turtle
4 /m/01226z Football
names = classes['displayname'].tolist()
temp = [i+'\n' for i in names]
with open('names.txt','w') as f:
    f.writelines(temp)

根据displayname 获取 labelname 并指定我们想要的类别

可以查看names.txt,有我们想要的分类,如bicycle或含这个词的类别


need_names = [i for i in names if 'cycle' in i.lower()]
print(need_names)
['Bicycle', 'Bicycle wheel', 'Stationary bicycle', 'Bicycle helmet', 'Motorcycle', 'Unicycle']
need_labelnames = []
for need_name in need_names:
    a=classes['displayname']==need_name
    b=classes[a].iloc[0,:].tolist()
    need_labelnames.append(b)
print(need_labelnames)
[['/m/0199g', 'Bicycle'], ['/m/01bqk0', 'Bicycle wheel'], ['/m/03kt2w', 'Stationary bicycle'], ['/m/03p3bw', 'Bicycle helmet'], ['/m/04_sv', 'Motorcycle'], ['/m/0f6nr', 'Unicycle']]
labelname2displayname = {}
for i in need_labelnames:
    key = i[0]
    value=i[1].replace(" ","_")
    labelname2displayname[key]=value
print(labelname2displayname)
{'/m/0199g': 'Bicycle', '/m/01bqk0': 'Bicycle_wheel', '/m/03kt2w': 'Stationary_bicycle', '/m/03p3bw': 'Bicycle_helmet', '/m/04_sv': 'Motorcycle', '/m/0f6nr': 'Unicycle'}

根据标签名找到对应的图片名称

测试集

先获取小的数据集,从测试集到验证集最后再做训练集


annfile = 'openimage/labels/detection/test-annotations-bbox.csv'

anns = pd.read_csv(annfile)
anns.head()
ImageID Source LabelName Confidence XMin XMax YMin YMax IsOccluded IsTruncated IsGroupOf IsDepiction IsInside
0 000026e7ee790996 xclick /m/07j7r 1 0.071875 0.145313 0.206250 0.391667 0 1 1 0 0
1 000026e7ee790996 xclick /m/07j7r 1 0.439063 0.571875 0.264583 0.435417 0 1 1 0 0
2 000026e7ee790996 xclick /m/07j7r 1 0.668750 1.000000 0.000000 0.552083 0 1 1 0 0
3 000062a39995e348 xclick /m/015p6 1 0.206208 0.849224 0.154639 1.000000 0 0 0 0 0
4 000062a39995e348 xclick /m/05s2s 1 0.137472 0.376940 0.000000 0.883652 1 1 0 0 0
total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)

total ann num: 937327
total img num: 112194
new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
    a=anns['LabelName']==i
    a = a.to_numpy()

    old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]
    
    # b=classes[a].iloc[0,:].tolist()
    # need_labelnames.append(b)
total num : 4757
need_anns.head()
ImageID Source LabelName Confidence XMin XMax YMin YMax IsOccluded IsTruncated IsGroupOf IsDepiction IsInside
323 0013a0927e6bbefc xclick /m/04_sv 1 0.192188 0.600000 0.306250 0.629167 0 0 0 0 0
522 001d1da154d00e83 xclick /m/0199g 1 0.199413 0.439883 0.681063 0.953488 1 0 0 0 0
523 001d1da154d00e83 xclick /m/0199g 1 0.425220 0.492669 0.807309 0.880399 1 0 0 0 0
524 001d1da154d00e83 xclick /m/0199g 1 0.548387 0.780059 0.677741 0.943522 1 0 0 0 0
1177 003c2b6816ba9d22 xclick /m/03p3bw 1 0.336780 0.497784 0.143488 0.348786 0 0 0 0 0
for i in need_names:
    save_path = os.path.join('openimage/test_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('test',image_id)
    all_imageids.append(image_dir)
    bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    labelname = need_anns.iloc[i,2]
    displayname = labelname2displayname[labelname]
    try:
        img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
        w,h = img.size 
        xmin = int(w*bbox[0])
        xmax = int(w*bbox[1])
        ymin = int(h*bbox[2])
        ymax = int(h*bbox[3])
        crop = img.crop((xmin,ymin,xmax,ymax))
        save_name = image_id+"_"+str(i)+'.jpg'
        save_path = os.path.join('openimage/test_imgs',displayname,save_name)
        crop.save(save_path,quality=100)
    except Exception as e:
        print("wrong image id:",e)
        continue
for i in need_names:
    name = i.replace(" ","_")
    save_path = os.path.join('openimage/test_imgs',name)
    print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]

img_names=os.path.join('openimage','test_images.txt')
if os.path.exists(img_names):
    os.remove(img_names)
with open(img_names,'w') as f:
    f.writelines(new_all_imageids)
100%|██████████| 4757/4757 [00:45<00:00, 103.85it/s]

Bicycle image num:1203
Bicycle_wheel image num:2314
Stationary_bicycle image num:47
Bicycle_helmet image num:492
Motorcycle image num:683
Unicycle image num:18

验证集

以上就完成了openimage test 数据集的获取,同样着道理只要获取val train即可,如下是val:

annfile = 'openimage/labels/detection/validation-annotations-bbox.csv'

anns = pd.read_csv(annfile)
anns.head()
ImageID Source LabelName Confidence XMin XMax YMin YMax IsOccluded IsTruncated IsGroupOf IsDepiction IsInside
0 0001eeaf4aed83f9 xclick /m/0cmf2 1 0.022673 0.964201 0.071038 0.800546 0 0 0 0 0
1 000595fe6fee6369 xclick /m/02wbm 1 0.000000 1.000000 0.000000 1.000000 0 0 1 0 0
2 000595fe6fee6369 xclick /m/02xwb 1 0.141384 0.179676 0.676275 0.731707 0 0 0 0 0
3 000595fe6fee6369 xclick /m/02xwb 1 0.213549 0.253314 0.299335 0.354767 1 0 0 0 0
4 000595fe6fee6369 xclick /m/02xwb 1 0.232695 0.288660 0.490022 0.545455 1 0 0 0 0
total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 303980
total img num: 37306
new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
    a=anns['LabelName']==i
    a = a.to_numpy()

    old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]

total num : 1629
total_num = need_anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(need_anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 1629
total img num: 445
need_anns.head()
ImageID Source LabelName Confidence XMin XMax YMin YMax IsOccluded IsTruncated IsGroupOf IsDepiction IsInside
226 001a995c1e25d892 xclick /m/04_sv 1 0.101562 0.876563 0.114583 0.945833 0 0 0 0 0
433 00575b9132bb3746 xclick /m/03p3bw 1 0.370206 0.513274 0.044248 0.307522 1 0 0 0 0
434 00575b9132bb3746 xclick /m/0199g 1 0.000000 0.508850 0.000000 0.681416 0 1 0 0 0
457 00575b9132bb3746 xclick /m/01bqk0 1 0.000000 0.194690 0.183628 0.676991 1 1 0 0 0
458 00575b9132bb3746 xclick /m/01bqk0 1 0.129794 0.300885 0.539823 0.960177 1 0 0 0 0
for i in need_names:
    save_path = os.path.join('openimage/validation_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('validation',image_id)
    all_imageids.append(image_dir)
    bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    labelname = need_anns.iloc[i,2]
    displayname = labelname2displayname[labelname]
    try:
        img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
        w,h = img.size 
        xmin = int(w*bbox[0])
        xmax = int(w*bbox[1])
        ymin = int(h*bbox[2])
        ymax = int(h*bbox[3])
        crop = img.crop((xmin,ymin,xmax,ymax))
        save_name = image_id+"_"+str(i)+'.jpg'
        save_path = os.path.join('openimage/validation_imgs',displayname,save_name)
        crop.save(save_path,quality=100)
    except Exception as e:
        print("wrong image id:",e)
        continue
for i in need_names:
    name = i.replace(" ","_")
    save_path = os.path.join('openimage/validation_imgs',name)
    print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]

img_names=os.path.join('openimage','validation_images.txt')
if os.path.exists(img_names):
    os.remove(img_names)
with open(img_names,'w') as f:
    f.writelines(new_all_imageids)
100%|██████████| 1629/1629 [00:27<00:00, 59.82it/s] 

Bicycle image num:418
Bicycle_wheel image num:780
Stationary_bicycle image num:10
Bicycle_helmet image num:187
Motorcycle image num:232
Unicycle image num:2

从上边两个数据集也可以看出,我们从大量的图片中只获取少量是我们需要的,这个速度很慢,而且是前提我们把所有图片都给下载了下来大概有570G左右(下载了好几天),很费力气。事实上可以只下载标签,然后生成我们需要的image id,如上边保存的两个test_images.txt 和validation_images.txt 这两个文件是可以利用官网提供的工具下载我们只需要的那部分图片的,因为训练集特别大,所在采用这种方法,事实上是所有数据集都推荐这种方法。这个工作可以参考 https://www.jianshu.com/p/40b58833af22

训练集

#这个要好长时间,耐心等待
annfile = 'openimage/labels/detection/oidv6-train-annotations-bbox.csv'

anns = pd.read_csv(annfile)
anns.head()
ImageID Source LabelName Confidence XMin XMax YMin YMax IsOccluded IsTruncated ... IsDepiction IsInside XClick1X XClick2X XClick3X XClick4X XClick1Y XClick2Y XClick3Y XClick4Y
0 000002b66c9c498e xclick /m/01g317 1 0.012500 0.195312 0.148438 0.587500 0 1 ... 0 0 0.148438 0.012500 0.059375 0.195312 0.148438 0.357812 0.587500 0.325000
1 000002b66c9c498e xclick /m/01g317 1 0.025000 0.276563 0.714063 0.948438 0 1 ... 0 0 0.025000 0.248438 0.276563 0.214062 0.914062 0.714063 0.782813 0.948438
2 000002b66c9c498e xclick /m/01g317 1 0.151562 0.310937 0.198437 0.590625 1 0 ... 0 0 0.243750 0.151562 0.310937 0.262500 0.198437 0.434375 0.507812 0.590625
3 000002b66c9c498e xclick /m/01g317 1 0.256250 0.429688 0.651563 0.925000 1 0 ... 0 0 0.315625 0.429688 0.256250 0.423438 0.651563 0.921875 0.826562 0.925000
4 000002b66c9c498e xclick /m/01g317 1 0.257812 0.346875 0.235938 0.385938 1 0 ... 0 0 0.317188 0.257812 0.346875 0.307812 0.235938 0.289062 0.348438 0.385938

5 rows × 21 columns

total_num = anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 14610229
total img num: 1743042
new_need_labelnames = [i[0] for i in need_labelnames]
old_a = np.zeros((anns.shape[0],)).astype(bool)
for i in new_need_labelnames:
    a=anns['LabelName']==i
    a = a.to_numpy()

    old_a = a | old_a
print("total num :",np.sum(old_a))
need_anns = anns[old_a]
total num : 129548
total_num = need_anns.shape[0]
print("total ann num:",total_num)
total_numimg = len(set(need_anns['ImageID'].to_list()))
print("total img num:",total_numimg)
total ann num: 129548
total img num: 26694
need_anns.head()
ImageID Source LabelName Confidence XMin XMax YMin YMax IsOccluded IsTruncated ... IsDepiction IsInside XClick1X XClick2X XClick3X XClick4X XClick1Y XClick2Y XClick3Y XClick4Y
158 00002f4ff380c64c xclick /m/0199g 1 0.000000 0.155556 0.654867 0.876106 0 1 ... 0 0 0.106667 0.000000 0.001481 0.155556 0.876106 0.803097 0.654867 0.796460
159 00002f4ff380c64c xclick /m/0199g 1 0.168889 0.402963 0.648230 0.873894 0 0 ... 0 0 0.268148 0.168889 0.208889 0.402963 0.648230 0.803097 0.873894 0.809735
160 00002f4ff380c64c xclick /m/0199g 1 0.414815 0.659259 0.654867 0.887168 0 0 ... 0 0 0.608889 0.414815 0.494815 0.659259 0.887168 0.796460 0.654867 0.803097
161 00002f4ff380c64c xclick /m/01bqk0 1 0.060741 0.151111 0.736726 0.873894 1 0 ... 0 0 0.103704 0.060741 0.108148 0.151111 0.736726 0.809735 0.873894 0.805310
162 00002f4ff380c64c xclick /m/01bqk0 1 0.165926 0.260741 0.743363 0.878319 1 0 ... 0 0 0.210370 0.165926 0.216296 0.260741 0.743363 0.816372 0.878319 0.816372

5 rows × 21 columns

for i in need_names:
    save_path = os.path.join('openimage/train_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('train',image_id)
    all_imageids.append(image_dir)
    # bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    # labelname = need_anns.iloc[i,2]
    # displayname = labelname2displayname[labelname]
    # try:
    #     img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
    #     w,h = img.size 
    #     xmin = int(w*bbox[0])
    #     xmax = int(w*bbox[1])
    #     ymin = int(h*bbox[2])
    #     ymax = int(h*bbox[3])
    #     crop = img.crop((xmin,ymin,xmax,ymax))
    #     save_name = image_id+"_"+str(i)+'.jpg'
    #     save_path = os.path.join('openimage/validation_imgs',displayname,save_name)
    #     crop.save(save_path,quality=100)
    # except Exception as e:
    #     print("wrong image id:",e)
    #     continue
# for i in need_names:
#     name = i.replace(" ","_")
#     save_path = os.path.join('openimage/validation_imgs',name)
#     print(f"{name} image num:{len(os.listdir(save_path))}")
new_all_imageids = [i+'\n' for i in set(all_imageids)]

img_names=os.path.join('openimage','train_images.txt')
if os.path.exists(img_names):
    os.remove(img_names)
with open(img_names,'w') as f:
    f.writelines(new_all_imageids)
100%|██████████| 129548/129548 [00:03<00:00, 34663.79it/s]
len(new_all_imageids)
26694

下载方式命令是:

python downloader.py train_images.txt --download_folder=train --num_processes=5

下载好的图片将保存在 download_folder指定的文件下, num_processes越大,下载用的时间越少
downloader.py下载需要科学上网,所以这里写出downloader.py的内容(20220920,对应v6版本)

# python3
# coding=utf-8
# Copyright 2020 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Open Images image downloader.

This script downloads a subset of Open Images images, given a list of image ids.
Typical uses of this tool might be downloading images:
- That contain a certain category.
- That have been annotated with certain types of annotations (e.g. Localized
Narratives, Exhaustively annotated people, etc.)

The input file IMAGE_LIST should be a text file containing one image per line
with the format <SPLIT>/<IMAGE_ID>, where <SPLIT> is either "train", "test",
"validation", or "challenge2018"; and <IMAGE_ID> is the image ID that uniquely
identifies the image in Open Images. A sample file could be:
  train/f9e0434389a1d4dd
  train/1a007563ebc18664
  test/ea8bfd4e765304db

"""

import argparse
from concurrent import futures
import os
import re
import sys

import boto3
import botocore
import tqdm

BUCKET_NAME = 'open-images-dataset'
REGEX = r'(test|train|validation|challenge2018)/([a-fA-F0-9]*)'


def check_and_homogenize_one_image(image):
  split, image_id = re.match(REGEX, image).groups()
  yield split, image_id


def check_and_homogenize_image_list(image_list):
  for line_number, image in enumerate(image_list):
    try:
      yield from check_and_homogenize_one_image(image)
    except (ValueError, AttributeError):
      raise ValueError(
          f'ERROR in line {line_number} of the image list. The following image '
          f'string is not recognized: "{image}".')


def read_image_list_file(image_list_file):
  with open(image_list_file, 'r') as f:
    for line in f:
      yield line.strip().replace('.jpg', '')


def download_one_image(bucket, split, image_id, download_folder):
  try:
    bucket.download_file(f'{split}/{image_id}.jpg',
                         os.path.join(download_folder, f'{image_id}.jpg'))
  except botocore.exceptions.ClientError as exception:
    sys.exit(
        f'ERROR when downloading image `{split}/{image_id}`: {str(exception)}')


def download_all_images(args):
  """Downloads all images specified in the input file."""
  bucket = boto3.resource(
      's3', config=botocore.config.Config(
          signature_version=botocore.UNSIGNED)).Bucket(BUCKET_NAME)

  download_folder = args['download_folder'] or os.getcwd()

  if not os.path.exists(download_folder):
    os.makedirs(download_folder)

  try:
    image_list = list(
        check_and_homogenize_image_list(
            read_image_list_file(args['image_list'])))
  except ValueError as exception:
    sys.exit(exception)

  progress_bar = tqdm.tqdm(
      total=len(image_list), desc='Downloading images', leave=True)
  with futures.ThreadPoolExecutor(
      max_workers=args['num_processes']) as executor:
    all_futures = [
        executor.submit(download_one_image, bucket, split, image_id,
                        download_folder) for (split, image_id) in image_list
    ]
    for future in futures.as_completed(all_futures):
      future.result()
      progress_bar.update(1)
  progress_bar.close()


if __name__ == '__main__':
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  parser.add_argument(
      'image_list',
      type=str,
      default=None,
      help=('Filename that contains the split + image IDs of the images to '
            'download. Check the document'))
  parser.add_argument(
      '--num_processes',
      type=int,
      default=5,
      help='Number of parallel processes to use (default is 5).')
  parser.add_argument(
      '--download_folder',
      type=str,
      default=None,
      help='Folder where to download the images.')
  download_all_images(vars(parser.parse_args()))

下载好以后图片保存在train中,我们可以做同样的处理了

for i in need_names:
    save_path = os.path.join('openimage/train_imgs',i.replace(" ","_"))
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)
    
all_imageids = []
for i in tqdm.tqdm(range(need_anns.shape[0])):
    image_id = need_anns.iloc[i,0]
    image_dir = os.path.join('train',image_id)
    all_imageids.append(image_dir)
    bbox = need_anns.iloc[i,4:8].to_numpy().astype(np.float32)
    labelname = need_anns.iloc[i,2]
    displayname = labelname2displayname[labelname]
    try:
        img = Image.open(os.path.join('openimage',image_dir+'.jpg'))
        w,h = img.size 
        xmin = int(w*bbox[0])
        xmax = int(w*bbox[1])
        ymin = int(h*bbox[2])
        ymax = int(h*bbox[3])
        crop = img.crop((xmin,ymin,xmax,ymax))
        save_name = image_id+"_"+str(i)+'.jpg'
        save_path = os.path.join('openimage/train_imgs',displayname,save_name)
        crop.save(save_path,quality=100)
    except Exception as e:
        print("wrong image id:",e)
        continue
for i in need_names:
    name = i.replace(" ","_")
    save_path = os.path.join('openimage/train_imgs',name)
    print(f"{name} image num:{len(os.listdir(save_path))}")



 11%|█         | 14409/129548 [02:09<26:42, 71.85it/s] 

wrong image id: cannot write empty image as JPEG


 24%|██▍       | 31367/129548 [04:43<17:53, 91.42it/s]  

wrong image id: cannot write empty image as JPEG


100%|██████████| 129548/129548 [20:20<00:00, 106.15it/s]


Bicycle image num:40161
Bicycle_wheel image num:59520
Stationary_bicycle image num:338
Bicycle_helmet image num:15951
Motorcycle image num:13382
Unicycle image num:194
除了以上方法还有更快的方式,上面的方法对于每个框都要重新读取一次图片,但实际上,可以读一次图把相同的框都给取出来,如下:
a= need_anns[['ImageID','LabelName','XMin','XMax','YMin','YMax']].groupby(["ImageID"])
for i,d in enumerate(a):
    if i<2:
        print(d[0]+":")
        print(d[1])
    else:
        break
00002f4ff380c64c:
              ImageID  LabelName      XMin      XMax      YMin      YMax
158  00002f4ff380c64c   /m/0199g  0.000000  0.155556  0.654867  0.876106
159  00002f4ff380c64c   /m/0199g  0.168889  0.402963  0.648230  0.873894
160  00002f4ff380c64c   /m/0199g  0.414815  0.659259  0.654867  0.887168
161  00002f4ff380c64c  /m/01bqk0  0.060741  0.151111  0.736726  0.873894
162  00002f4ff380c64c  /m/01bqk0  0.165926  0.260741  0.743363  0.878319
163  00002f4ff380c64c  /m/01bqk0  0.308148  0.400000  0.743363  0.878319
164  00002f4ff380c64c  /m/01bqk0  0.413333  0.505185  0.743363  0.884956
165  00002f4ff380c64c  /m/01bqk0  0.561481  0.656296  0.736726  0.880531
000091f4a275d0fb:
              ImageID LabelName      XMin      XMax      YMin      YMax
868  000091f4a275d0fb  /m/0199g  0.017143  0.998571  0.225806  0.997849

4、object365

等补充

本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

从目标检测数据集中扣出所需类别进行分类 的相关文章

随机推荐

  • Java线程:volatile关键字

    本文转载至 http lavasoft blog 51cto com 62575 222076 Java线程 volatile关键字 Java 语言包含两种内在的同步机制 同步块 或方法 和 volatile 变量 这两种机制的提出都是为了
  • 自制个人图床

    如何自制个人图床 有时候我们想要将自己的图片以链接的形式展示 就得需要使用图床 或者上传到自己的服务器 别人的图床会担心图片链接过期 然而自己的服务器会占用内存资源 所以我们就自制个人图床 首先你得有服务器和域名 好了废话不多说直接上教程
  • 2021-10-21

    当打开一个页面 需要第一行显示当前用户能够领取奖励的按钮 应用场景 1 当某些游戏有在线领奖的活动 比如在线10分钟 20分钟 以此类推可以领取一些奖励 当有很多时 页面装不下的时候 我们希望显示的第一个就是玩家可以领取的奖励 比如10分钟
  • C++—类和对象

    文章目录 1 类 2 对象 2 1 创建对象 2 2 对象的操作 2 3 构造函数 2 4 析构函数 3 静态成员 4 this指针 5 友元 一切我们研究的事物 都可以叫做对象 对象具有状态 操作和行为 通常用一个数值来描述对象的状态 对
  • DVWA ----Buete Force

    DVWA Buete Force 暴力破解 low 直接使用Burip suite来进行暴力破解 medium 与low的方法一样 但是在破解速度上比较慢 因为在源代码中多了sleep 函数 high 同样使用Burip suite进行暴力
  • RK3588开发板上使用Qt+OpenCV捕获摄像头图像

    在Qt下没有专门的视频采集与播放工具 这里使用了OpenCV所带的库函数捕获摄像头的视频图像 硬件环境 讯为RK3588开发板 OV5695 MIPI接口 摄像头 软件版本 OS ubuntu20 04镜像固件 QT 5 12 8 Qt C
  • 安全运营场景下的语言模型应用

    接上篇 将安全运营的定义为 使用算法能力提取关键信息 以此来规避算法误判漏判带来的责任问题 同时提升运营人员的工作效率 在这篇尝试对语言模型的使用方法做一下讨论和分享 1 语言模型 先聊一下语言模型 这里刻意规避了 大模型 这个词 主要是对
  • 【Python】循环语句

    目录 1 while 循环 2 for 循环 3 continue 4 break 1 while 循环 基本语法格式 while 条件 循环体 条件为真 则执行循环体代码 条件为假 则结束循环 例1 打印 1 10 的整数 num 1 w
  • pyspark合并两个dataframe_PySpark源码解析,教你用Python调用高效Scala接口

    在数据科学领域 Python 一直占据比较重要的地位 仍然有大量的数据工程师在使用各类 Python 数据处理和科学计算的库 例如 numpy Pandas scikit learn 等 相较于Scala语言而言 Python具有其独有的优
  • Mybatis 快速入门之mybatis与spring集成

    目录 一 基本概念撰述 1 SqlSessionFactory对象 只有创建了SqlSessionFactory对象 才能调用openSession 方法得到SqlSession对象 2 dao接口的代理对象 例如StudentDao接口
  • Hadoop Ls命令添加显示条数限制參数

    前言 在hadoop的FsShell命令中 预计非常多人比較经常使用的就是hadoop fs ls lsr cat等等这种与Linux系统中差点儿一致的文件系统相关的命令 可是细致想想 这里还是有一些些的不同的 首先 从规模的本身来看 单机
  • adfs服务器获取信息失败,为什么 elasticsearch 获取节点信息失败?

    在 spring boot 项目中即成集成 elasticsearch dao层数据与es交互使用的的是 spring data elasticsearch 首先安装了服务器端的 es 服务 和 head 插件 es 服务启动正常 node
  • C++中关于count的用法总结

    华为OD机试真题 2022 2023 真题目录 点这里 华为OD机试真题 信号发射和接收 试读 点这里 华为OD机试真题 租车骑绿道 试读 点这里 C 中关于count的用法总结 下面是关于字符串中count的两种用法 STL容器 数组的用
  • JS逆向笔记之断点分类

    JS逆向笔记之断点分类 文章目录 JS逆向笔记之断点分类 1 JS断点 2 DOM断点 3 XHR断点 4 事件监听器断点 1 JS断点 1 Sources断点 Sources断点添加的流程是 F12 Ctrl Shift I 打开开发工具
  • Python-opencv读取深度图像

    由于实验需要用到Kinect2 0采集的深度图像 但是用以下程序读取深度图片的时候显不方便观察 temp img cup depth png depth filename os path join image dir depth img t
  • Error during job, obtaining debugging information... FAILED: Execution Error, return code 2 from org

    create table userbehavior partitioned2 user id string item id string category id string behavior type string partitioned
  • 【亚稳态、建立时间和保持时间】亚稳态的产生原因、危害及解决方法

    一 亚稳态的产生原因 如图所示 当 sys clk 时钟信号上升沿踩到 Rx 信号的变化间隙时 此时输出的 Rx reg1 信号就会出现亚稳态 其输出信号就会出现震荡 毛刺或者固定在某一电压值 而不是等于 D 端输入的值 经过震荡之后 Q
  • 模拟电路设计(4)--- J-FET的结构和工作原理

    场效应管和BJT在工作过程中有很大区别 BJT的电荷载体是空穴或是被击出的少量 少子 而场效应管的电荷则是多几个数量级的自由电子 多子 J FET晶体管 N沟道J FET晶体管结构示意图 以N沟道J FET来说明 结合J FET的电路符号示
  • OA项目之左侧菜单&动态选项卡

    目录 1 左侧导航 参考地址 http layui org cn doc element nav html 2 导入数据表及无限级分类 1 数据导入 此步骤在第一次文章已完成 2 无限级分类 父亲找儿子的过程 将对应的儿子放在父亲下面 形成
  • 从目标检测数据集中扣出所需类别进行分类

    文章目录 1 获取VOC数据集中两轮车 2 接着做COCO数据集的分类数据获取 3 YOLO 格式数据 4 openimage数据获取 获取标签 根据displayname 获取 labelname 并指定我们想要的类别 根据标签名找到对应