1import os, shutil, glob, random, time2def newdir(out_dir):3 if os.path.exists(out_dir): shutil.rmtree(out_dir)4 os.makedirs(out_dir)5def copy_files_to(files, dst, overwrite=False):6 if not os.path.exists(dst):7 os.makedirs(dst)8 for i, f in enumerate(files):9 fn = os.path.basename(f)10 f2 = dst + '/' + fn11 if os.path.exists(f2):12 if os.path.samefile(f, f2):13 print("ignoring same file:", f, f2)14 continue15 if not overwrite:16 raise Exception("file %s already exists." % (f2))17 else:18 print("overwriting %s to %s ..." % (f, f2))19 os.remove(f2)20 shutil.copy(f, f2)21def split_train_val(data_dir, train_dir=None, val_dir=None, val_split=0.1, num_val=None, ext='.jpg', shuffle=True,22 sort=False):23 if not train_dir:24 train_dir=data_dir+'_train'25 if not val_dir:26 val_dir=data_dir+'_val'27 newdir(train_dir)28 newdir(val_dir)29 fs = glob.glob(data_dir + '/*' + ext)30 if sort:31 fs.sort()32 elif shuffle:33 random.shuffle(fs)34 if not num_val:35 num_val = int(len(fs) * val_split)36 val_files = fs[:num_val]37 train_files = fs[num_val:]38 copy_files_to(train_files, train_dir)39 copy_files_to(val_files, val_dir)40def split_list(lis, ratios, shuffle=True):41 if not lis:42 return None43 if len(lis) < len(ratios):44 raise Exception("List is not long enough to be split.")45 import numpy as np46 if shuffle:47 random.shuffle(lis)48 ratios = np.array(ratios)49 ratios = ratios / ratios.sum()50 nums = ratios * len(lis)51 nums = np.round(nums).astype(int)52 total = len(lis)53 splits = []54 current_index = 055 for i, num in enumerate(nums):56 end_point = min(current_index + num, total)57 batch = lis[current_index:end_point]58 splits.append(batch)59 current_index = end_point60 return splits61def split_files(src_files, dst_dirs_and_ratios: '{"output":{"train":0.8,"val":0.2}}', shuffle=True, remake_dirs=True):62 def compress_tree(tree, root=''):63 leaves = {}64 def parse_tree(root, dic):65 for k, v in dic.items():66 p = os.path.join(root, k)67 if isinstance(v, dict):68 parse_tree(p, v)69 else:70 '''Now it should be a number'''71 leaves[p] = v72 parse_tree(root=root, dic=tree)73 return leaves74 tree = compress_tree(dst_dirs_and_ratios)75 out_dirs = list(tree.keys())76 ratios = list(tree.values())77 out_file_lists = split_list(src_files, ratios, shuffle=shuffle)78 for dir, files in zip(out_dirs, out_file_lists):79 if remake_dirs and os.path.exists(dir):80 shutil.rmtree(dir)81 time.sleep(0.01)82 print('Coping %s files to %s : %s' % (len(files), dir, files))83 copy_files_to(files, dir)84def split_dir(src_dir, dst_dirs_and_ratios: '{"output":{"train":0.8,"val":0.2}}', shuffle=True, remake_dirs=True,85 glob_strings=['*.jpg', '*.png']):86 fs = []87 for string in glob_strings:88 fs += glob.glob(os.path.join(src_dir, string))89 split_files(90 src_files=fs, dst_dirs_and_ratios=dst_dirs_and_ratios, shuffle=shuffle, remake_dirs=remake_dirs91 )92 print('Split dir %s finished.' % (src_dir))93def split_train_val_imagefolder(data_dir, train_dir, val_dir, val_split=0.1, num_val_cls=None, ext='.jpg', shuffle=True,94 sort=False):95 newdir(train_dir)96 newdir(val_dir)97 for cls in os.listdir(data_dir):98 cls_dir = data_dir + '/' + cls99 train_cls_dir = train_dir + '/' + cls100 val_cls_dir = val_dir + '/' + cls101 split_train_val(cls_dir, train_dir=train_cls_dir, val_dir=val_cls_dir, val_split=val_split, num_val=num_val_cls,102 ext=ext, shuffle=shuffle, sort=sort)103def merge_dirs(src_dirs, dst_dir):104 if os.path.exists(dst_dir):105 shutil.rmtree(dst_dir)106 a = 0107 os.makedirs(dst_dir)108 for dir in src_dirs:109 fs = glob.glob(dir + '/*')110 copy_files_to(fs, dst_dir)111if __name__ == '__main__':112 split_train_val_imagefolder(113 data_dir='/home/ars/disk/datasets/cifar-10-python/cofar10-imagenet-format',114 train_dir='/home/ars/disk/datasets/cifar-10-python/cofar10-trainval/train/raw',115 val_dir='/home/ars/disk/datasets/cifar-10-python/cofar10-trainval/val',116 # val_split=0.1117 num_val_cls=500118 )119 split_train_val_imagefolder(120 data_dir='/home/ars/disk/datasets/cifar-10-python/cofar10-trainval/train/raw',121 train_dir='/home/ars/disk/datasets/cifar-10-python/cofar10-trainval/train/unlabeled_unmerged',122 val_dir='/home/ars/disk/datasets/cifar-10-python/cofar10-trainval/train/labeled',123 # val_split=0.1124 num_val_cls=500...

...4import shutil5from sys import argv6def check_valid_dir(directory):7 return os.path.exists(directory) and os.path.isdir(directory)8def copy_files_to(files, src, dest):9 for current in files:10 dir_name = os.path.join(dest, os.path.dirname(current))11 if not os.path.exists(dir_name):12 os.makedirs(dir_name)13 shutil.copy(os.path.join(src, current), os.path.join(dest, current))14if len(argv) != 4:15 print("Invalid number of parameters")16 print("Usage:")17 print(argv[0], "<old_src> <new_src> <dest_dir>")18 exit(1)19_, old, new, dst = argv20if not check_valid_dir(old) or not check_valid_dir(new) or not check_valid_dir(dst):21 print(f'All parameters must exist and be directories')22 exit(1)23new_files = []24old_files = []25to_delete = []26to_replace_files = []27to_add_files = []28for dirname, dirnames, filenames in os.walk(old):29 path = os.path.relpath(dirname, old)30 for old_filename in filenames:31 old_files.append(os.path.join(path, old_filename))32for dirname, dirnames, filenames in os.walk(new):33 path = os.path.relpath(dirname, new)34 for new_filename in filenames:35 new_files.append(os.path.join(path, new_filename))36for old_file in old_files:37 if old_file not in new_files:38 to_delete.append(old_file)39for new_file in new_files:40 if new_file not in old_files:41 to_add_files.append(new_file)42for to_delete_file in to_delete:43 old_files.remove(to_delete_file)44for found_file in old_files:45 file_old = os.path.join(old, found_file)46 file_new = os.path.join(new, found_file)47 equals = filecmp.cmp(file_old, file_new, False)48 if not equals:49 to_replace_files.append(found_file)50copy_files_to(to_replace_files, new, dst)51copy_files_to(to_add_files, new, dst)52if len(to_delete) > 0:53 to_delete_file = os.path.join(dst, "to_delete.txt")54 f_delete = open(to_delete_file, "w")55 for dfile in to_delete:56 f_delete.write(dfile + "\n")57 f_delete.close()58 print(f"Files to be deleted were listed in {to_delete_file}")...

