深度学习之数据集转换脚本（voc的xml格式转yolo的txt格式篇）

在做目标检测时，有些代码库采用的是yolo的txt格式，有些则采用的是voc的xml格式，下面提供了一个脚本，可以自动将voc格式的数据集转化为yolo格式的数据集。

季月三吃蛋炒饭

2554人浏览 · 2022-09-23 21:13:34

季月三吃蛋炒饭 · 2022-09-23 21:13:34 发布

voc格式

voc格式的数据集为以下格式,其中Main文件夹里面的txt文件是按比例划分好的训练、验证和测试集，文件里面的每一行代表一张图片（若没有提前划分，可以使用voc数据集划分脚本来划分）：

yolo格式

yolo格式下一般分为images和labels文件夹，在这两个文件夹里面又包含有train，val(以及test)文件夹，如下面所示：

转换脚本

1. xml转txt

接下来采用脚本，自动将voc格式转化为yolo格式，设置好xml文件的路径和要保存的txt文件路径，执行后便可以转换。（转换后原xml文件依旧存在）

import os
import glob
import xml.etree.ElementTree as ET
import tqdm

def get_classes(classes_path):
    with open(classes_path, encoding='utf-8') as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names, len(class_names)


def convert(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)


if __name__ == '__main__':
    # 设置xml文件的路径和要保存的txt文件路径
    xml_root_path = r'D:\dataset\RTTS\VOC2007\Annotations'
    txt_save_path = r'D:\dataset\RTTS\VOC2007\labels'
    if not os.path.exists(txt_save_path):
        os.makedirs(txt_save_path)
    xml_paths = glob.glob(os.path.join(xml_root_path, '*.xml'))
    classes_path = 'labels.txt'
    classes, _      = get_classes(classes_path)

    for xml_id in xml_paths:
        txt_id = os.path.join(txt_save_path, (xml_id.split('\\')[-1])[:-4] + '.txt')
        txt = open(txt_id, 'w')
        xml = open(xml_id, encoding='utf-8')
        tree = ET.parse(xml)
        root = tree.getroot()
        size = root.find('size')
        w = int(size.find('width').text)
        h = int(size.find('height').text)
        for obj in root.iter('object'):
            difficult = 0
            if obj.find('difficult') != None:
                difficult = obj.find('difficult').text
            cls = obj.find('name').text
            if cls not in classes or int(difficult) == 1:
                continue
            cls_id = classes.index(cls)
            xmlbox = obj.find('bndbox')
            b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('xmax').text)),
                 int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('ymax').text)))
            box = convert((w, h), b)
            txt.write(str(cls_id) + ' ' + ' '.join([str(a) for a in box]) + '\n')
        txt.close()

2. 构建目录结构

输入 voc数据集的根目录（voc2007的上一级文件夹）和转化后的yolo格式数据存放的目录，就可以自动根据Main文件夹里面的txt文件来移动文件（复制一份），生成train、val、test（如果有）。

import os
import tqdm
import shutil
voc_root = r'D:\dataset\RTTS'    # voc数据集的根目录
yolo_root = r'D:\dataset\RTTS'   # 转化后的yolo格式数据存放的目录

if not os.path.exists(os.path.join(yolo_root, 'images/train')):
    os.makedirs(os.path.join(yolo_root, 'images/train'))
if not os.path.exists(os.path.join(yolo_root, 'images/val')):
    os.makedirs(os.path.join(yolo_root, 'images/val'))
if not os.path.exists(os.path.join(yolo_root, 'images/test')):
    os.makedirs(os.path.join(yolo_root, 'images/test'))

if not os.path.exists(os.path.join(yolo_root, 'labels/train')):
    os.makedirs(os.path.join(yolo_root, 'labels/train'))
if not os.path.exists(os.path.join(yolo_root, 'labels/val')):
    os.makedirs(os.path.join(yolo_root, 'labels/val'))
if not os.path.exists(os.path.join(yolo_root, 'labels/test')):
    os.makedirs(os.path.join(yolo_root, 'labels/test'))

train_file = open(os.path.join(voc_root,  r'VOC2007\ImageSets\Main\train.txt'), 'r')
val_file   = open(os.path.join(voc_root,  r'VOC2007\ImageSets\Main\val.txt'), 'r')
test_file  = open(os.path.join(voc_root,  r'VOC2007\ImageSets\Main\test.txt'), 'r')
train_ids  = train_file.readlines()
val_ids    = val_file.readlines()
test_ids   = test_file.readlines()

for id in tqdm.tqdm(train_ids):
    img_path = os.path.join(voc_root, os.path.join(r'VOC2007\JPEGImages', id[:-1] + '.png'))
    label_path = os.path.join(voc_root, os.path.join(r'VOC2007\labels', id[:-1] + '.txt'))
    shutil.copy(img_path, os.path.join(yolo_root, 'images/train'))
    shutil.copy(label_path, os.path.join(yolo_root, 'labels/train'))
print('训练集转化完成')

for id in tqdm.tqdm(val_ids):
    img_path = os.path.join(voc_root, os.path.join(r'VOC2007\JPEGImages', id[:-1] + '.png'))
    label_path = os.path.join(voc_root, os.path.join(r'VOC2007\labels', id[:-1] + '.txt'))
    shutil.copy(img_path, os.path.join(yolo_root, 'images/val'))
    shutil.copy(label_path, os.path.join(yolo_root, 'labels/val'))
print('验证集转化完成')

for id in tqdm.tqdm(test_ids):
    img_path = os.path.join(voc_root, os.path.join(r'VOC2007\JPEGImages', id[:-1] + '.png'))
    label_path = os.path.join(voc_root, os.path.join(r'VOC2007\labels', id[:-1] + '.txt'))
    shutil.copy(img_path, os.path.join(yolo_root, 'images/test'))
    shutil.copy(label_path, os.path.join(yolo_root, 'labels/test'))
print('测试集转化完成')