update load_data_CUHK-PEDES.py and ImageNet-logs
This commit is contained in:
@@ -16,10 +16,21 @@ def execute(cmds, idx, num):
|
||||
def command(prefix, cmd):
|
||||
#print ('{:}{:}'.format(prefix, cmd))
|
||||
#if execute: os.system(cmd)
|
||||
#xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
|
||||
#xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
|
||||
#xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s; pmap $$; echo \"\")'.format(prefix, cmd)
|
||||
xcmd = '(echo {:} $(date +\"%Y-%h-%d--%T\") \"PID:\"$$; {:}; sleep 0.1s)'.format(prefix, cmd)
|
||||
return xcmd
|
||||
|
||||
|
||||
def mkILSVRC2012(destination):
|
||||
destination = destination.resolve()
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
os.system('rm -rf {:}'.format(destination))
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
(destination/'train').mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def main(source, destination, xtype):
|
||||
assert source.exists(), '{:} does not exist'.format(source)
|
||||
assert (source/'train' ).exists(), '{:}/train does not exist'.format(source)
|
||||
@@ -28,25 +39,21 @@ def main(source, destination, xtype):
|
||||
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
|
||||
#assert num_process > 0, 'invalid num_process : {:}'.format(num_process)
|
||||
source = source.resolve()
|
||||
destination = destination.resolve()
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
os.system('rm -rf {:}'.format(destination))
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
(destination/'train').mkdir(parents=True, exist_ok=True)
|
||||
mkILSVRC2012(destination)
|
||||
|
||||
subdirs = list( (source / 'train').glob('n*') )
|
||||
all_commands = []
|
||||
assert len(subdirs) == 1000, 'ILSVRC2012 should contain 1000 classes instead of {:}.'.format( len(subdirs) )
|
||||
if xtype == 'tar' : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
|
||||
elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
|
||||
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
|
||||
all_commands.append( cmd )
|
||||
for idx, subdir in enumerate(subdirs):
|
||||
name = subdir.name
|
||||
if xtype == 'tar' : cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'tar -xf {:} -C {:}'.format(source/'train'/'{:}'.format(name), destination / 'train'))
|
||||
elif xtype == 'zip': cmd = command('{:03d}/{:03d}-th: '.format(idx, len(subdirs)), 'unzip -qd {:} {:}'.format(destination / 'train', source/'train'/'{:}'.format(name)))
|
||||
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
|
||||
all_commands.append( cmd )
|
||||
if xtype == 'tar' : cmd = command('', 'tar -xf {:} -C {:}'.format(source/'val.tar', destination))
|
||||
elif xtype == 'zip': cmd = command('', 'unzip -qd {:} {:}'.format(destination, source/'val.zip'))
|
||||
else : raise ValueError('invalid unzip type : {:}'.format(xtype))
|
||||
all_commands.append( cmd )
|
||||
#print ('Collect all commands done : {:} lines'.format( len(all_commands) ))
|
||||
|
||||
for i, cmd in enumerate(all_commands):
|
||||
@@ -70,4 +77,18 @@ if __name__ == '__main__':
|
||||
assert len(sys.argv) == 4, 'invalid argv : {:}'.format(sys.argv)
|
||||
source, destination = Path(sys.argv[1]), Path(sys.argv[2])
|
||||
#num_process = int(sys.argv[3])
|
||||
main(source, destination, sys.argv[3])
|
||||
if sys.argv[3] == 'wget':
|
||||
with open(source) as f:
|
||||
content = f.readlines()
|
||||
content = [x.strip() for x in content]
|
||||
assert len(content) == 1000, 'invalid lines={:} from {:}'.format( len(content), source )
|
||||
mkILSVRC2012(destination)
|
||||
all_commands = []
|
||||
cmd = command('make-val', 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/val.tar --directory-prefix={:} ; tar -xf {:} -C {:} ; rm {:}'.format(destination, destination / 'val.tar', destination, destination / 'val.tar'))
|
||||
all_commands.append(cmd)
|
||||
for idx, name in enumerate(content):
|
||||
cmd = command('{:03d}/{:03d}-th: '.format(idx, len(content)), 'wget -q http://10.127.2.44:8000/ILSVRC2012-TAR/train/{:}.tar --directory-prefix={:} ; tar -xf {:}.tar -C {:} ; rm {:}.tar'.format(name, destination / 'train', destination / 'train' / name, destination / 'train', destination / 'train' / name))
|
||||
all_commands.append(cmd)
|
||||
for i, cmd in enumerate(all_commands): print(cmd)
|
||||
else:
|
||||
main(source, destination, sys.argv[3])
|
||||
|
15
data/load_data_CUHK-PEDES.py
Executable file
15
data/load_data_CUHK-PEDES.py
Executable file
@@ -0,0 +1,15 @@
|
||||
import json
|
||||
|
||||
def main():
|
||||
xpath = 'caption_all.json'
|
||||
with open(xpath, 'r') as cfile:
|
||||
cap_data = json.load(cfile)
|
||||
print ('There are {:} images'.format( len(cap_data) ))
|
||||
IDs = set()
|
||||
for idx, data in enumerate( cap_data ):
|
||||
IDs.add( data['id'] )
|
||||
assert len( data['captions'] ) > 0, 'invalid {:}-th caption length : {:} {:}'.format(idx, data['captions'], len(data['captions']))
|
||||
print ('IDs :: min={:}, max={:}, num={:}'.format(min(IDs), max(IDs), len(IDs)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
15895
data/logs/GDAS_V1-imagenet-seed-3993.txt
Executable file
15895
data/logs/GDAS_V1-imagenet-seed-3993.txt
Executable file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user