FGVC-Aircraft 数据集划分 - python 代码
文件路径说明src_dir 为数据集压缩包解压后未改动的data目录运行代码会在dst_dir下生成fgvc_family、fgvc_manufacturer、fgvc_variant 三个文件夹代码import pandas as pdimport os, shutil, timefrom tqdm import tqdmdef my_mkdir(path):if not os.path.exi
·
划分好的数据集下载
FGVC Aircraft dataset by classes folder
文件路径说明
src_dir
为数据集压缩包解压后未改动的data
目录
运行代码会在dst_dir
下生成fgvc_family
、fgvc_manufacturer
、fgvc_variant
三个文件夹
注:fgvc_family
、fgvc_variant
划分下F/A-18
将会被替换为F_A-18
以免破坏文件路径。
注:解压数据集命令:tar -xzvf fgvc-aircraft-2013b.tar.gz
代码
import pandas as pd
import os, shutil, time
from tqdm import tqdm
def my_mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
# ===== settings =====
src_dir = '/home/gpc/disk_1/datasets/fgvc-aircraft-2013b/data'
img_dir = os.path.join(src_dir, 'images')
dst_dir = '/home/gpc/disk_1/datasets'
if __name__ == '__main__':
begin = time.time()
for method in ['family', 'manufacturer', 'variant']:
method_dir = os.path.join(dst_dir, 'fgvc_{}'.format(method))
my_mkdir(method_dir)
for dataset in ['train', 'val', 'trainval', 'test']:
dataset_dir = os.path.join(method_dir, dataset)
my_mkdir(dataset_dir)
txt = pd.read_csv(os.path.join(src_dir, 'images_{}_{}.txt'.format(method, dataset)),
header=None).to_numpy().flatten()
for info in tqdm(txt, desc='Copying {} {}'.format(method, dataset)):
if '/' in info:
info = info.replace('/', '_')
img, cls = info.split(' ', 1)
cls_dir = os.path.join(dataset_dir, cls)
my_mkdir(cls_dir)
shutil.copyfile(os.path.join(img_dir, '{}.jpg'.format(img)),
os.path.join(cls_dir, '{}.jpg'.format(img)))
print('\nAll Done, {} s used.'.format(time.time() - begin))
更多推荐
所有评论(0)