行人重识别代码实战(一)

代码描述

代码来源: https://github.com/layumi/Person_reID_baseline_pytorch

详细信息可见README.md

prepare.py 文件简单讲解

文件代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
from shutil import copyfile

# You only need to change this line to your dataset download path
download_path = 'C:/Users/葛宇/学习/ReId/数据集/Market-1501-v15.09.15'

if not os.path.isdir(download_path):
print('please change the download_path')

save_path = download_path + '/pytorch'
if not os.path.isdir(save_path):
os.mkdir(save_path)
#-----------------------------------------
#query
query_path = download_path + '/query'
query_save_path = download_path + '/pytorch/query'
if not os.path.isdir(query_save_path):
os.mkdir(query_save_path)

for root, dirs, files in os.walk(query_path, topdown=True):
for name in files:
if not name[-3:]=='jpg':
continue
ID = name.split('_')
src_path = query_path + '/' + name
dst_path = query_save_path + '/' + ID[0]
if not os.path.isdir(dst_path):
os.mkdir(dst_path)
copyfile(src_path, dst_path + '/' + name)

#-----------------------------------------
#multi-query
query_path = download_path + '/gt_bbox'
# for dukemtmc-reid, we do not need multi-query
if os.path.isdir(query_path):
query_save_path = download_path + '/pytorch/multi-query'
if not os.path.isdir(query_save_path):
os.mkdir(query_save_path)

for root, dirs, files in os.walk(query_path, topdown=True):
for name in files:
if not name[-3:]=='jpg':
continue
ID = name.split('_')
src_path = query_path + '/' + name
dst_path = query_save_path + '/' + ID[0]
if not os.path.isdir(dst_path):
os.mkdir(dst_path)
copyfile(src_path, dst_path + '/' + name)

#-----------------------------------------
#gallery
gallery_path = download_path + '/bounding_box_test'
gallery_save_path = download_path + '/pytorch/gallery'
if not os.path.isdir(gallery_save_path):
os.mkdir(gallery_save_path)

for root, dirs, files in os.walk(gallery_path, topdown=True):
for name in files:
if not name[-3:]=='jpg':
continue
ID = name.split('_')
src_path = gallery_path + '/' + name
dst_path = gallery_save_path + '/' + ID[0]
if not os.path.isdir(dst_path):
os.mkdir(dst_path)
copyfile(src_path, dst_path + '/' + name)

#---------------------------------------
#train_all
train_path = download_path + '/bounding_box_train'
train_save_path = download_path + '/pytorch/train_all'
if not os.path.isdir(train_save_path):
os.mkdir(train_save_path)

for root, dirs, files in os.walk(train_path, topdown=True):
for name in files:
if not name[-3:]=='jpg':
continue
ID = name.split('_')
src_path = train_path + '/' + name
dst_path = train_save_path + '/' + ID[0]
if not os.path.isdir(dst_path):
os.mkdir(dst_path)
copyfile(src_path, dst_path + '/' + name)


#---------------------------------------
#train_val
train_path = download_path + '/bounding_box_train'
train_save_path = download_path + '/pytorch/train'
val_save_path = download_path + '/pytorch/val'
if not os.path.isdir(train_save_path):
os.mkdir(train_save_path)
os.mkdir(val_save_path)

for root, dirs, files in os.walk(train_path, topdown=True):
for name in files:
if not name[-3:]=='jpg':
continue
ID = name.split('_')
src_path = train_path + '/' + name
dst_path = train_save_path + '/' + ID[0]
if not os.path.isdir(dst_path):
os.mkdir(dst_path)
dst_path = val_save_path + '/' + ID[0] #first image is used as val image
os.mkdir(dst_path)
copyfile(src_path, dst_path + '/' + name)

代码比较冗长,我总结一下。
这个脚本的作用是将原始数据集重新按图片ID分类。主要操作是通过调用系统函数加之必要的循环判断来实现。可不必细究。运行之前先将第五行download_path = 'C:/Users/葛宇/学习/ReId/数据集/Market-1501-v15.09.15'改为你自己的地址。
按照原作者的使用教程,第一步要做的就是运行prepare.py文件。
运行之后会发现在原先的Market1501数据集中(详见之前的博客)多出了一个文件夹“pytorch”,打开看看:

里面是6个文件夹:

  1. gallery中包含752个文件夹项目,均按ID排序:
  2. multi中是1051个文件夹项目,从0001到1501
  3. query中包含750个文件项目,不含0000和-1
  4. train中包含751个文件项目,不含0000和-1
  5. train all中包含751个文件项目,不含0000和-1
  6. val中包含751个文件项目,不含0000和-1

具体情况可以自己尝试打开观察。

文章作者: GeYu
文章链接: https://nuistgy.github.io/2019/08/22/行人重识别代码实战(一)/
版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Yu's Blog