引言
利用yolov8的检测模型实现数据集的自标注,针对VOC数据集,.xml文件,labelimg标注工具
VOC格式的数据集自标注实现
yolov8模型的训练可以参考笔者的博客
【YOLO】YOLOv8实操:环境配置/自定义数据集准备/模型训练/预测
训练好自定义的模型,就可以执行下面的代码实现模型自标注数据集
修改下面三个参数即可:
weight_path = "/media/ll/L/llr/model/yolov8/weights/best.pt" # 模型路径
imgdir = r'/media/ll/L/llr/DATASET/subwayDatasets/bjdt/images' # 图片路径
xmldir = r'/media/ll/L/llr/DATASET/ZED_DATA/GZG/bjdt_daytime/xml' # 标注文件保存路径
完整代码如下:
"""
Fuction:使用预训练模型权重对图像集进行识别后自动标注
Author: Alian
Create_Date:2023.03.30
Finishe_Date:2023.03.30
"""
import os
from os import getcwd
import glob
from xml.etree import ElementTree as ET
from utils.general import *
from utils import torch_utils
from ultralytics import YOLO
def create_object(root, xyxy, names,cls):
_object = ET.SubElement(root, 'object')
name = ET.SubElement(_object, 'name')
name.text = str(names[int(cls)])
pose = ET.SubElement(_object, 'pose')
pose.text = 'Unspecified'
truncated = ET.SubElement(_object, 'truncated')
truncated.text = '0'
difficult = ET.SubElement(_object, 'difficult')
difficult.text = '0'
bndbox = ET.SubElement(_object, 'bndbox')
xmin = ET.SubElement(bndbox, 'xmin')
xmin.text = '%s' % int(xyxy[0])
ymin = ET.SubElement(bndbox, 'ymin')
ymin.text = '%s' % int(xyxy[1])
xmax = ET.SubElement(bndbox, 'xmax')
xmax.text = '%s' % int(xyxy[2])
ymax = ET.SubElement(bndbox, 'ymax')
ymax.text = '%s' % int(xyxy[3])
def create_tree(image_path, h, w):
annotation = ET.Element('annotation')
folder = ET.SubElement(annotation, 'folder')
folder.text = os.path.dirname(image_path)
filename = ET.SubElement(annotation, 'filename')
filename.text = os.path.basename(image_path)
path = ET.SubElement(annotation, 'path')
path.text = image_path
source = ET.SubElement(annotation, 'source')
database = ET.SubElement(source, 'database')
database.text = 'Unknown'
size = ET.SubElement(annotation, 'size')
width = ET.SubElement(size, 'width')
width.text = str(w)
height = ET.SubElement(size, 'height')
height.text = str(h)
depth = ET.SubElement(size, 'depth')
depth.text = '3'
segmented = ET.SubElement(annotation, 'segmented')
segmented.text = '0'
return annotation
def pretty_xml(element, indent, newline, level=0):
if element:
if (element.text is None) or element.text.isspace():
element.text = newline + indent * (level + 1)
else:
element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
temp = list(element)
for subelement in temp:
if temp.index(subelement) < (len(temp) - 1):
subelement.tail = newline + indent * (level + 1)
else:
subelement.tail = newline + indent * level
pretty_xml(subelement, indent, newline, level=level + 1)
def Auto_label(weight,imgdir,xmldir):
model = YOLO(weight)
img_list = glob.glob('%s/*.*' % imgdir)
for img_path in img_list:
results = model(img_path,show=False,save=False)[0]
annotation = create_tree(img_path, results.orig_shape[0], results.orig_shape[1])
det = results.boxes
names = results.names
cls = det.cls
for i in range(len(det)):
create_object(annotation,det.xyxy[i],names,cls[i])
tree = ET.ElementTree(annotation)
root = tree.getroot()
pretty_xml(root, '\t', '\n')
tree.write(img_path.replace(imgdir,xmldir).replace('.jpg','.xml'), encoding='utf-8')
if __name__ == '__main__':
weight_path = "/media/ll/L/llr/model/yolov8/weights/best.pt"
imgdir = r'/media/ll/L/llr/DATASET/subwayDatasets/bjdt/images'
xmldir = r'/media/ll/L/llr/DATASET/ZED_DATA/GZG/bjdt_daytime/xml'
Auto_label(weight_path,imgdir,xmldir)
综上,就实现了利用yolov8的检测模型实现数据集的自动标注,不过标注结果最好人工复查下,但是已经省下很多标注时间啦
后续会更新COCO数据格式的自动标注博客,即.json文件,labelme标注工具的
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)