1ccffcf9 by 乔峰昇

submit code

0 parents
Showing 129 changed files with 18829 additions and 0 deletions
1 __pycache__/
2 runs/
3 *.onnx
4 *.jpg
1 # Default ignored files
2 /shelf/
3 /workspace.xml
1 <component name="InspectionProjectProfileManager">
2 <profile version="1.0">
3 <option name="myName" value="Project Default" />
4 <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5 <option name="ignoredPackages">
6 <value>
7 <list size="2">
8 <item index="0" class="java.lang.String" itemvalue="psutil" />
9 <item index="1" class="java.lang.String" itemvalue="thop" />
10 </list>
11 </value>
12 </option>
13 </inspection_tool>
14 </profile>
15 </component>
...\ No newline at end of file ...\ No newline at end of file
1 <component name="InspectionProjectProfileManager">
2 <settings>
3 <option name="USE_PROJECT_PROFILE" value="false" />
4 <version value="1.0" />
5 </settings>
6 </component>
...\ No newline at end of file ...\ No newline at end of file
1 <?xml version="1.0" encoding="UTF-8"?>
2 <project version="4">
3 <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (workenv)" project-jdk-type="Python SDK" />
4 </project>
...\ No newline at end of file ...\ No newline at end of file
1 <?xml version="1.0" encoding="UTF-8"?>
2 <project version="4">
3 <component name="ProjectModuleManager">
4 <modules>
5 <module fileurl="file://$PROJECT_DIR$/.idea/tamper_det.iml" filepath="$PROJECT_DIR$/.idea/tamper_det.iml" />
6 </modules>
7 </component>
8 </project>
...\ No newline at end of file ...\ No newline at end of file
1 <?xml version="1.0" encoding="UTF-8"?>
2 <module type="PYTHON_MODULE" version="4">
3 <component name="NewModuleRootManager">
4 <content url="file://$MODULE_DIR$" />
5 <orderEntry type="jdk" jdkName="Python 3.6 (workenv)" jdkType="Python SDK" />
6 <orderEntry type="sourceFolder" forTests="false" />
7 </component>
8 <component name="PyDocumentationSettings">
9 <option name="format" value="GOOGLE" />
10 <option name="myDocStringFormat" value="Google" />
11 </component>
12 </module>
...\ No newline at end of file ...\ No newline at end of file
1 <?xml version="1.0" encoding="UTF-8"?>
2 <project version="4">
3 <component name="VcsDirectoryMappings">
4 <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 </component>
6 </project>
...\ No newline at end of file ...\ No newline at end of file
1 ## 五大银行OCR+关键字段信息提取
2
3 python bank_ocr_inference.py
4 其中函数extract_bank_info()为总的流水信息提取函数,参数为bill_ocr()返回的OCR所有识别结果results
5
6 ## yolov5推理
7 python inference.py
8
9 ## OCR+yolov5整体Pipeline
10 python pipeline.py
...\ No newline at end of file ...\ No newline at end of file
1 import base64
2 import os
3 import time
4
5 import cv2
6 import numpy as np
7 import requests
8 import tqdm
9
10
11 def image_to_base64(image):
12 image = cv2.imencode('.png', image)[1]
13 return image
14
15
16 def path_to_file(file_path):
17 f = open(file_path, 'rb')
18 return f
19
20
21 # 流水OCR接口
22 def bill_ocr(image):
23 f = image_to_base64(image)
24 resp = requests.post(url=r'http://192.168.10.11:9001/gen_ocr', files={'file': f})
25 results = resp.json()
26 ocr_results = results['ocr_results']
27 return ocr_results
28
29
30 # 提取民生银行信息
31 def extract_minsheng_info(ocr_results):
32 name_prefix = '客户姓名:'
33 account_prefix = '客户账号:'
34 results = []
35 for value in ocr_results.values():
36 if name_prefix in value[1]:
37 if name_prefix == value[1]:
38 tmp_value, max_dis = [], 999999
39 top_right_x = value[0][2]
40 top_right_y = value[0][3]
41 for tmp in ocr_results.values():
42 if tmp[1] != name_prefix:
43 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
44 tmp[0][0] - top_right_x) < max_dis:
45 tmp_value = tmp
46 max_dis = abs(tmp[0][0] - top_right_x)
47 else:
48 continue
49 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
50 tmp_value[0][5],
51 value[0][6], value[0][7]]
52 results.append([value[1] + tmp_value[1], new_position])
53 else:
54 results.append([value[1], value[0]])
55 if account_prefix in value[1]:
56 if account_prefix == value[1]:
57 tmp_value, max_dis = [], 999999
58 top_right_x = value[0][2]
59 top_right_y = value[0][3]
60 for tmp in ocr_results.values():
61 if tmp[1] != account_prefix:
62 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
63 tmp[0][0] - top_right_x) < max_dis:
64 tmp_value = tmp
65 max_dis = abs(tmp[0][0] - top_right_x)
66 else:
67 continue
68 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
69 tmp_value[0][5],
70 value[0][6], value[0][7]]
71 results.append([value[1] + tmp_value[1], new_position])
72 else:
73 results.append([value[1], value[0]])
74 return results
75
76
77 # 提取工商银行信息
78 def extract_gongshang_info(ocr_results):
79 name_prefix = '户名:'
80 account_prefix = '卡号:'
81 results = []
82 for value in ocr_results.values():
83 if name_prefix in value[1]:
84 if name_prefix == value[1]:
85 tmp_value, max_dis = [], 999999
86 top_right_x = value[0][2]
87 top_right_y = value[0][3]
88 for tmp in ocr_results.values():
89 if tmp[1] != name_prefix:
90 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
91 tmp[0][0] - top_right_x) < max_dis:
92 tmp_value = tmp
93 max_dis = abs(tmp[0][0] - top_right_x)
94 else:
95 continue
96 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
97 tmp_value[0][5],
98 value[0][6], value[0][7]]
99 results.append([value[1] + tmp_value[1], new_position])
100 else:
101 results.append([value[1], value[0]])
102 if account_prefix in value[1]:
103 if account_prefix == value[1]:
104 tmp_value, max_dis = [], 999999
105 top_right_x = value[0][2]
106 top_right_y = value[0][3]
107 for tmp in ocr_results.values():
108 if tmp[1] != account_prefix:
109 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
110 tmp[0][0] - top_right_x) < max_dis:
111 tmp_value = tmp
112 max_dis = abs(tmp[0][0] - top_right_x)
113 else:
114 continue
115 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
116 tmp_value[0][5],
117 value[0][6], value[0][7]]
118 results.append([value[1] + tmp_value[1], new_position])
119 else:
120 results.append([value[1], value[0]])
121 return results
122
123
124 # 提取中国银行信息
125 def extract_zhongguo_info(ocr_results):
126 name_prefix = '客户姓名:'
127 account_prefix = '借记卡号:'
128 results = []
129 for value in ocr_results.values():
130 if name_prefix in value[1]:
131 if name_prefix == value[1]:
132 tmp_value, max_dis = [], 999999
133 top_right_x = value[0][2]
134 top_right_y = value[0][3]
135 for tmp in ocr_results.values():
136 if tmp[1] != name_prefix:
137 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
138 tmp[0][0] - top_right_x) < max_dis:
139 tmp_value = tmp
140 max_dis = abs(tmp[0][0] - top_right_x)
141 else:
142 continue
143 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
144 tmp_value[0][5],
145 value[0][6], value[0][7]]
146 results.append([value[1] + tmp_value[1], new_position])
147 else:
148 results.append([value[1], value[0]])
149 if account_prefix in value[1]:
150 if account_prefix == value[1]:
151 tmp_value, max_dis = [], 999999
152 top_right_x = value[0][2]
153 top_right_y = value[0][3]
154 for tmp in ocr_results.values():
155 if tmp[1] != account_prefix:
156 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
157 tmp[0][0] - top_right_x) < max_dis:
158 tmp_value = tmp
159 max_dis = abs(tmp[0][0] - top_right_x)
160 else:
161 continue
162 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
163 tmp_value[0][5],
164 value[0][6], value[0][7]]
165 results.append([value[1] + tmp_value[1], new_position])
166 else:
167 results.append([value[1], value[0]])
168 return results
169
170
171 # 提取建设银行信息
172 def extract_jianshe_info(ocr_results):
173 name_prefixes = ['客户名称:', '户名:']
174 account_prefixes = ['卡号/账号:', '卡号:']
175 results = []
176 for value in ocr_results.values():
177 for name_prefix in name_prefixes:
178 if name_prefix in value[1]:
179 if name_prefix == value[1]:
180 tmp_value, max_dis = [], 999999
181 top_right_x = value[0][2]
182 top_right_y = value[0][3]
183 for tmp in ocr_results.values():
184 if tmp[1] != name_prefix:
185 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
186 tmp[0][0] - top_right_x) < max_dis:
187 tmp_value = tmp
188 max_dis = abs(tmp[0][0] - top_right_x)
189 else:
190 continue
191 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
192 tmp_value[0][5],
193 value[0][6], value[0][7]]
194 results.append([value[1] + tmp_value[1], new_position])
195 break
196 else:
197 results.append([value[1], value[0]])
198 break
199 for account_prefix in account_prefixes:
200 if account_prefix in value[1]:
201 if account_prefix == value[1]:
202 tmp_value, max_dis = [], 999999
203 top_right_x = value[0][2]
204 top_right_y = value[0][3]
205 for tmp in ocr_results.values():
206 if tmp[1] != account_prefix:
207 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
208 tmp[0][0] - top_right_x) < max_dis:
209 tmp_value = tmp
210 max_dis = abs(tmp[0][0] - top_right_x)
211 else:
212 continue
213 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
214 tmp_value[0][5],
215 value[0][6], value[0][7]]
216 results.append([value[1] + tmp_value[1], new_position])
217 break
218 else:
219 results.append([value[1], value[0]])
220 break
221 return results
222
223
224 # 提取农业银行信息(比较复杂,目前训练的版式都支持)
225 def extract_nongye_info(ocr_results):
226 name_prefixes = ['客户名:', '户名:']
227 account_prefixes = ['账号:']
228 results = []
229 is_account = True
230 for value in ocr_results.values():
231 for name_prefix in name_prefixes:
232 if name_prefix in value[1] and account_prefixes[0][:-1] not in value[1]:
233 if name_prefix == value[1]:
234 tmp_value, max_dis = [], 999999
235 top_right_x = value[0][2]
236 top_right_y = value[0][3]
237 for tmp in ocr_results.values():
238 if tmp[1] != name_prefix:
239 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
240 tmp[0][0] - top_right_x) < max_dis:
241 tmp_value = tmp
242 max_dis = abs(tmp[0][0] - top_right_x)
243 else:
244 continue
245 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
246 tmp_value[0][5],
247 value[0][6], value[0][7]]
248 results.append([value[1] + tmp_value[1], new_position])
249 break
250 else:
251 results.append([value[1], value[0]])
252 break
253 if name_prefix in value[1] and account_prefixes[0][:-1] in value[1] and len(value[1].split(":")[0]) <= 5:
254 is_account = False
255 if len(value[1]) == 5:
256 tmp_value, max_dis = [], 999999
257 top_right_x = value[0][2]
258 top_right_y = value[0][3]
259 tmp_info = {}
260 for tmp in ocr_results.values():
261 if tmp[1] != value[1]:
262 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2:
263 tmp_info[abs(tmp[0][0] - top_right_x)] = tmp
264 else:
265 continue
266 tmp_info_id = sorted(tmp_info.keys())
267 if not tmp_info[tmp_info_id[0]][1].isdigit() and len(tmp_info[tmp_info_id[0]][1]) > 19:
268 tmp_value = tmp_info[tmp_info_id[0]]
269 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
270 tmp_value[0][5],
271 value[0][6], value[0][7]]
272 results.append([value[1] + tmp_value[1], new_position])
273 if tmp_info[tmp_info_id[0]][1].isdigit():
274 tmp_value = tmp_info[tmp_info_id[1]]
275 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
276 tmp_value[0][5],
277 value[0][6], value[0][7]]
278 results.append([value[1] + tmp_value[1], new_position])
279 break
280 elif len(value[1]) < 25:
281 tmp_info = {}
282 top_right_x = value[0][2]
283 top_right_y = value[0][3]
284 for tmp in ocr_results.values():
285 if tmp[1] != value[1]:
286 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2:
287 tmp_info[abs(tmp[0][0] - top_right_x)] = tmp
288 else:
289 continue
290 tmp_info_id = sorted(tmp_info.keys())
291 tmp_value = tmp_info[tmp_info_id[0]]
292 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
293 tmp_value[0][5],
294 value[0][6], value[0][7]]
295 results.append([value[1] + tmp_value[1], new_position])
296 break
297 else:
298 results.append([value[1], value[0]])
299 break
300 if is_account:
301 for account_prefix in account_prefixes:
302 if account_prefix in value[1]:
303 if account_prefix == value[1]:
304 tmp_value, max_dis = [], 999999
305 top_right_x = value[0][2]
306 top_right_y = value[0][3]
307 for tmp in ocr_results.values():
308 if tmp[1] != account_prefix:
309 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
310 tmp[0][0] - top_right_x) < max_dis:
311 tmp_value = tmp
312 max_dis = abs(tmp[0][0] - top_right_x)
313 else:
314 continue
315 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
316 tmp_value[0][5],
317 value[0][6], value[0][7]]
318 results.append([value[1] + tmp_value[1], new_position])
319 break
320 else:
321 results.append([value[1], value[0]])
322 break
323 else:
324 break
325 return results
326
327
328 # 提取银行流水信息总接口
329 def extract_bank_info(ocr_results):
330 results = []
331 for value in ocr_results.values():
332 if value[1].__contains__('建设'):
333 results = extract_jianshe_info(ocr_results)
334 break
335 elif value[1].__contains__('民生'):
336 results = extract_minsheng_info(ocr_results)
337 break
338 elif value[1].__contains__('农业'):
339 results = extract_nongye_info(ocr_results)
340 break
341 elif value[1].__contains__('中国银行'):
342 results = extract_zhongguo_info(ocr_results)
343 break
344 elif value[1].__contains__('中国邮政储蓄'):
345 results = extract_youchu_info(ocr_results)
346 if len(results) == 0:
347 results = extract_gongshang_info(ocr_results)
348
349 return results
350
351
352 def extract_youchu_info(ocr_results):
353 name_prefixes = ['户名:']
354 account_prefixes = ['账号:', '卡号:']
355 results = []
356 for value in ocr_results.values():
357 for name_prefix in name_prefixes:
358 if name_prefix in value[1]:
359 if name_prefix == value[1]:
360 tmp_value, max_dis = [], 999999
361 top_right_x = value[0][2]
362 top_right_y = value[0][3]
363 for tmp in ocr_results.values():
364 if tmp[1] != name_prefix:
365 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
366 tmp[0][0] - top_right_x) < max_dis:
367 tmp_value = tmp
368 max_dis = abs(tmp[0][0] - top_right_x)
369 else:
370 continue
371 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
372 tmp_value[0][5],
373 value[0][6], value[0][7]]
374 results.append([value[1] + tmp_value[1], new_position])
375 break
376 else:
377 results.append([value[1], value[0]])
378 break
379 for account_prefix in account_prefixes:
380 if account_prefix in value[1]:
381 if account_prefix == value[1]:
382 tmp_value, max_dis = [], 999999
383 top_right_x = value[0][2]
384 top_right_y = value[0][3]
385 for tmp in ocr_results.values():
386 if tmp[1] != account_prefix:
387 if abs(tmp[0][1] - top_right_y) < abs(value[0][3] - value[0][5]) / 2 and abs(
388 tmp[0][0] - top_right_x) < max_dis:
389 tmp_value = tmp
390 max_dis = abs(tmp[0][0] - top_right_x)
391 else:
392 continue
393 new_position = [value[0][0], value[0][1], tmp_value[0][2], tmp_value[0][3], tmp_value[0][4],
394 tmp_value[0][5],
395 value[0][6], value[0][7]]
396 results.append([value[1] + tmp_value[1], new_position])
397 break
398 else:
399 results.append([value[1], value[0]])
400 break
401 return results
402
403
404 if __name__ == '__main__':
405 img = cv2.imread('/home/situ/下载/邮储对账单/飞书20221020-155202.jpg')
406 ocr_results = bill_ocr(img)
407 results = extract_youchu_info(ocr_results)
408 print(results)
409 # path = '/data/situ_invoice_bill_data/new_data/qfs_bank_bill_data/minsheng/authentic/images/val'
410 # save_path='/data/situ_invoice_bill_data/new_data/results'
411 # bank='minsheng'
412 # if not os.path.exists(os.path.join(save_path,bank)):
413 # os.makedirs(os.path.join(save_path,bank))
414 # save_path=os.path.join(save_path,bank)
415 # for j in tqdm.tqdm(os.listdir(path)):
416 # # if True:
417 # img=cv2.imread(os.path.join(path,j))
418 # # img = cv2.imread('/data/situ_invoice_bill_data/new_data/results/nongye/6/_1597382769.6449914page_23_img_0.jpg')
419 # st = time.time()
420 # ocr_result = bill_ocr(img)
421 # et1 = time.time()
422 # result = extract_bank_info(ocr_result)
423 # et2 = time.time()
424 # for i in range(len(result)):
425 # cv2.rectangle(img, (result[i][1][0], result[i][1][1]), (result[i][1][4], result[i][1][5]), (0, 0, 255), 2)
426 # # cv2.imshow('img',img)
427 # # cv2.waitKey(0)
428 # cv2.imwrite(os.path.join(save_path,j),img)
429 # print('spend:{} ocr:{} extract:{}'.format(et2 - st, et1 - st, et2 - et1))
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Run YOLOv5 benchmarks on all supported export formats
4
5 Format | `export.py --include` | Model
6 --- | --- | ---
7 PyTorch | - | yolov5s.pt
8 TorchScript | `torchscript` | yolov5s.torchscript
9 ONNX | `onnx` | yolov5s.onnx
10 OpenVINO | `openvino` | yolov5s_openvino_model/
11 TensorRT | `engine` | yolov5s.engine
12 CoreML | `coreml` | yolov5s.mlmodel
13 TensorFlow SavedModel | `saved_model` | yolov5s_saved_model/
14 TensorFlow GraphDef | `pb` | yolov5s.pb
15 TensorFlow Lite | `tflite` | yolov5s.tflite
16 TensorFlow Edge TPU | `edgetpu` | yolov5s_edgetpu.tflite
17 TensorFlow.js | `tfjs` | yolov5s_web_model/
18
19 Requirements:
20 $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU
21 $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPU
22 $ pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com # TensorRT
23
24 Usage:
25 $ python utils/benchmarks.py --weights yolov5s.pt --img 640
26 """
27
28 import argparse
29 import platform
30 import sys
31 import time
32 from pathlib import Path
33
34 import pandas as pd
35
36 FILE = Path(__file__).resolve()
37 ROOT = FILE.parents[0] # YOLOv5 root directory
38 if str(ROOT) not in sys.path:
39 sys.path.append(str(ROOT)) # add ROOT to PATH
40 # ROOT = ROOT.relative_to(Path.cwd()) # relative
41
42 import export
43 from models.experimental import attempt_load
44 from models.yolo import SegmentationModel
45 from segment.val import run as val_seg
46 from utils import notebook_init
47 from utils.general import LOGGER, check_yaml, file_size, print_args
48 from utils.torch_utils import select_device
49 from val import run as val_det
50
51
52 def run(
53 weights=ROOT / 'yolov5s.pt', # weights path
54 imgsz=640, # inference size (pixels)
55 batch_size=1, # batch size
56 data=ROOT / 'data/coco128.yaml', # dataset.yaml path
57 device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
58 half=False, # use FP16 half-precision inference
59 test=False, # test exports only
60 pt_only=False, # test PyTorch only
61 hard_fail=False, # throw error on benchmark failure
62 ):
63 y, t = [], time.time()
64 device = select_device(device)
65 model_type = type(attempt_load(weights, fuse=False)) # DetectionModel, SegmentationModel, etc.
66 for i, (name, f, suffix, cpu, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, CPU, GPU)
67 try:
68 assert i not in (9, 10), 'inference not supported' # Edge TPU and TF.js are unsupported
69 assert i != 5 or platform.system() == 'Darwin', 'inference only supported on macOS>=10.13' # CoreML
70 if 'cpu' in device.type:
71 assert cpu, 'inference not supported on CPU'
72 if 'cuda' in device.type:
73 assert gpu, 'inference not supported on GPU'
74
75 # Export
76 if f == '-':
77 w = weights # PyTorch format
78 else:
79 w = export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # all others
80 assert suffix in str(w), 'export failed'
81
82 # Validate
83 if model_type == SegmentationModel:
84 result = val_seg(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half)
85 metric = result[0][7] # (box(p, r, map50, map), mask(p, r, map50, map), *loss(box, obj, cls))
86 else: # DetectionModel:
87 result = val_det(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half)
88 metric = result[0][3] # (p, r, map50, map, *loss(box, obj, cls))
89 speed = result[2][1] # times (preprocess, inference, postprocess)
90 y.append([name, round(file_size(w), 1), round(metric, 4), round(speed, 2)]) # MB, mAP, t_inference
91 except Exception as e:
92 if hard_fail:
93 assert type(e) is AssertionError, f'Benchmark --hard-fail for {name}: {e}'
94 LOGGER.warning(f'WARNING ⚠️ Benchmark failure for {name}: {e}')
95 y.append([name, None, None, None]) # mAP, t_inference
96 if pt_only and i == 0:
97 break # break after PyTorch
98
99 # Print results
100 LOGGER.info('\n')
101 parse_opt()
102 notebook_init() # print system info
103 c = ['Format', 'Size (MB)', 'mAP50-95', 'Inference time (ms)'] if map else ['Format', 'Export', '', '']
104 py = pd.DataFrame(y, columns=c)
105 LOGGER.info(f'\nBenchmarks complete ({time.time() - t:.2f}s)')
106 LOGGER.info(str(py if map else py.iloc[:, :2]))
107 if hard_fail and isinstance(hard_fail, str):
108 metrics = py['mAP50-95'].array # values to compare to floor
109 floor = eval(hard_fail) # minimum metric floor to pass, i.e. = 0.29 mAP for YOLOv5n
110 assert all(x > floor for x in metrics if pd.notna(x)), f'HARD FAIL: mAP50-95 < floor {floor}'
111 return py
112
113
114 def test(
115 weights=ROOT / 'yolov5s.pt', # weights path
116 imgsz=640, # inference size (pixels)
117 batch_size=1, # batch size
118 data=ROOT / 'data/coco128.yaml', # dataset.yaml path
119 device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
120 half=False, # use FP16 half-precision inference
121 test=False, # test exports only
122 pt_only=False, # test PyTorch only
123 hard_fail=False, # throw error on benchmark failure
124 ):
125 y, t = [], time.time()
126 device = select_device(device)
127 for i, (name, f, suffix, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, gpu-capable)
128 try:
129 w = weights if f == '-' else \
130 export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # weights
131 assert suffix in str(w), 'export failed'
132 y.append([name, True])
133 except Exception:
134 y.append([name, False]) # mAP, t_inference
135
136 # Print results
137 LOGGER.info('\n')
138 parse_opt()
139 notebook_init() # print system info
140 py = pd.DataFrame(y, columns=['Format', 'Export'])
141 LOGGER.info(f'\nExports complete ({time.time() - t:.2f}s)')
142 LOGGER.info(str(py))
143 return py
144
145
146 def parse_opt():
147 parser = argparse.ArgumentParser()
148 parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
149 parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
150 parser.add_argument('--batch-size', type=int, default=1, help='batch size')
151 parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
152 parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
153 parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
154 parser.add_argument('--test', action='store_true', help='test exports only')
155 parser.add_argument('--pt-only', action='store_true', help='test PyTorch only')
156 parser.add_argument('--hard-fail', nargs='?', const=True, default=False, help='Exception on error or < min metric')
157 opt = parser.parse_args()
158 opt.data = check_yaml(opt.data) # check YAML
159 print_args(vars(opt))
160 return opt
161
162
163 def main(opt):
164 test(**vars(opt)) if opt.test else run(**vars(opt))
165
166
167 if __name__ == "__main__":
168 opt = parse_opt()
169 main(opt)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
3 # Example usage: python train.py --data Argoverse.yaml
4 # parent
5 # ├── yolov5
6 # └── datasets
7 # └── Argoverse ← downloads here (31.3 GB)
8
9
10 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 path: ../datasets/Argoverse # dataset root dir
12 train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
13 val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
14 test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
15
16 # Classes
17 names:
18 0: person
19 1: bicycle
20 2: car
21 3: motorcycle
22 4: bus
23 5: truck
24 6: traffic_light
25 7: stop_sign
26
27
28 # Download script/URL (optional) ---------------------------------------------------------------------------------------
29 download: |
30 import json
31
32 from tqdm import tqdm
33 from utils.general import download, Path
34
35
36 def argoverse2yolo(set):
37 labels = {}
38 a = json.load(open(set, "rb"))
39 for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
40 img_id = annot['image_id']
41 img_name = a['images'][img_id]['name']
42 img_label_name = f'{img_name[:-3]}txt'
43
44 cls = annot['category_id'] # instance class id
45 x_center, y_center, width, height = annot['bbox']
46 x_center = (x_center + width / 2) / 1920.0 # offset and scale
47 y_center = (y_center + height / 2) / 1200.0 # offset and scale
48 width /= 1920.0 # scale
49 height /= 1200.0 # scale
50
51 img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
52 if not img_dir.exists():
53 img_dir.mkdir(parents=True, exist_ok=True)
54
55 k = str(img_dir / img_label_name)
56 if k not in labels:
57 labels[k] = []
58 labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
59
60 for k in labels:
61 with open(k, "w") as f:
62 f.writelines(labels[k])
63
64
65 # Download
66 dir = Path('../datasets/Argoverse') # dataset root dir
67 urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
68 download(urls, dir=dir, delete=False)
69
70 # Convert
71 annotations_dir = 'Argoverse-HD/annotations/'
72 (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
73 for d in "train.json", "val.json":
74 argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan
3 # Example usage: python train.py --data GlobalWheat2020.yaml
4 # parent
5 # ├── yolov5
6 # └── datasets
7 # └── GlobalWheat2020 ← downloads here (7.0 GB)
8
9
10 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 path: ../datasets/GlobalWheat2020 # dataset root dir
12 train: # train images (relative to 'path') 3422 images
13 - images/arvalis_1
14 - images/arvalis_2
15 - images/arvalis_3
16 - images/ethz_1
17 - images/rres_1
18 - images/inrae_1
19 - images/usask_1
20 val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
21 - images/ethz_1
22 test: # test images (optional) 1276 images
23 - images/utokyo_1
24 - images/utokyo_2
25 - images/nau_1
26 - images/uq_1
27
28 # Classes
29 names:
30 0: wheat_head
31
32
33 # Download script/URL (optional) ---------------------------------------------------------------------------------------
34 download: |
35 from utils.general import download, Path
36
37
38 # Download
39 dir = Path(yaml['path']) # dataset root dir
40 urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
41 'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
42 download(urls, dir=dir)
43
44 # Make Directories
45 for p in 'annotations', 'images', 'labels':
46 (dir / p).mkdir(parents=True, exist_ok=True)
47
48 # Move
49 for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
50 'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
51 (dir / p).rename(dir / 'images' / p) # move to /images
52 f = (dir / p).with_suffix('.json') # json file
53 if f.exists():
54 f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
3 # Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
4 # Example usage: python classify/train.py --data imagenet
5 # parent
6 # ├── yolov5
7 # └── datasets
8 # └── imagenet ← downloads here (144 GB)
9
10
11 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12 path: ../datasets/imagenet # dataset root dir
13 train: train # train images (relative to 'path') 1281167 images
14 val: val # val images (relative to 'path') 50000 images
15 test: # test images (optional)
16
17 # Classes
18 names:
19 0: tench
20 1: goldfish
21 2: great white shark
22 3: tiger shark
23 4: hammerhead shark
24 5: electric ray
25 6: stingray
26 7: cock
27 8: hen
28 9: ostrich
29 10: brambling
30 11: goldfinch
31 12: house finch
32 13: junco
33 14: indigo bunting
34 15: American robin
35 16: bulbul
36 17: jay
37 18: magpie
38 19: chickadee
39 20: American dipper
40 21: kite
41 22: bald eagle
42 23: vulture
43 24: great grey owl
44 25: fire salamander
45 26: smooth newt
46 27: newt
47 28: spotted salamander
48 29: axolotl
49 30: American bullfrog
50 31: tree frog
51 32: tailed frog
52 33: loggerhead sea turtle
53 34: leatherback sea turtle
54 35: mud turtle
55 36: terrapin
56 37: box turtle
57 38: banded gecko
58 39: green iguana
59 40: Carolina anole
60 41: desert grassland whiptail lizard
61 42: agama
62 43: frilled-necked lizard
63 44: alligator lizard
64 45: Gila monster
65 46: European green lizard
66 47: chameleon
67 48: Komodo dragon
68 49: Nile crocodile
69 50: American alligator
70 51: triceratops
71 52: worm snake
72 53: ring-necked snake
73 54: eastern hog-nosed snake
74 55: smooth green snake
75 56: kingsnake
76 57: garter snake
77 58: water snake
78 59: vine snake
79 60: night snake
80 61: boa constrictor
81 62: African rock python
82 63: Indian cobra
83 64: green mamba
84 65: sea snake
85 66: Saharan horned viper
86 67: eastern diamondback rattlesnake
87 68: sidewinder
88 69: trilobite
89 70: harvestman
90 71: scorpion
91 72: yellow garden spider
92 73: barn spider
93 74: European garden spider
94 75: southern black widow
95 76: tarantula
96 77: wolf spider
97 78: tick
98 79: centipede
99 80: black grouse
100 81: ptarmigan
101 82: ruffed grouse
102 83: prairie grouse
103 84: peacock
104 85: quail
105 86: partridge
106 87: grey parrot
107 88: macaw
108 89: sulphur-crested cockatoo
109 90: lorikeet
110 91: coucal
111 92: bee eater
112 93: hornbill
113 94: hummingbird
114 95: jacamar
115 96: toucan
116 97: duck
117 98: red-breasted merganser
118 99: goose
119 100: black swan
120 101: tusker
121 102: echidna
122 103: platypus
123 104: wallaby
124 105: koala
125 106: wombat
126 107: jellyfish
127 108: sea anemone
128 109: brain coral
129 110: flatworm
130 111: nematode
131 112: conch
132 113: snail
133 114: slug
134 115: sea slug
135 116: chiton
136 117: chambered nautilus
137 118: Dungeness crab
138 119: rock crab
139 120: fiddler crab
140 121: red king crab
141 122: American lobster
142 123: spiny lobster
143 124: crayfish
144 125: hermit crab
145 126: isopod
146 127: white stork
147 128: black stork
148 129: spoonbill
149 130: flamingo
150 131: little blue heron
151 132: great egret
152 133: bittern
153 134: crane (bird)
154 135: limpkin
155 136: common gallinule
156 137: American coot
157 138: bustard
158 139: ruddy turnstone
159 140: dunlin
160 141: common redshank
161 142: dowitcher
162 143: oystercatcher
163 144: pelican
164 145: king penguin
165 146: albatross
166 147: grey whale
167 148: killer whale
168 149: dugong
169 150: sea lion
170 151: Chihuahua
171 152: Japanese Chin
172 153: Maltese
173 154: Pekingese
174 155: Shih Tzu
175 156: King Charles Spaniel
176 157: Papillon
177 158: toy terrier
178 159: Rhodesian Ridgeback
179 160: Afghan Hound
180 161: Basset Hound
181 162: Beagle
182 163: Bloodhound
183 164: Bluetick Coonhound
184 165: Black and Tan Coonhound
185 166: Treeing Walker Coonhound
186 167: English foxhound
187 168: Redbone Coonhound
188 169: borzoi
189 170: Irish Wolfhound
190 171: Italian Greyhound
191 172: Whippet
192 173: Ibizan Hound
193 174: Norwegian Elkhound
194 175: Otterhound
195 176: Saluki
196 177: Scottish Deerhound
197 178: Weimaraner
198 179: Staffordshire Bull Terrier
199 180: American Staffordshire Terrier
200 181: Bedlington Terrier
201 182: Border Terrier
202 183: Kerry Blue Terrier
203 184: Irish Terrier
204 185: Norfolk Terrier
205 186: Norwich Terrier
206 187: Yorkshire Terrier
207 188: Wire Fox Terrier
208 189: Lakeland Terrier
209 190: Sealyham Terrier
210 191: Airedale Terrier
211 192: Cairn Terrier
212 193: Australian Terrier
213 194: Dandie Dinmont Terrier
214 195: Boston Terrier
215 196: Miniature Schnauzer
216 197: Giant Schnauzer
217 198: Standard Schnauzer
218 199: Scottish Terrier
219 200: Tibetan Terrier
220 201: Australian Silky Terrier
221 202: Soft-coated Wheaten Terrier
222 203: West Highland White Terrier
223 204: Lhasa Apso
224 205: Flat-Coated Retriever
225 206: Curly-coated Retriever
226 207: Golden Retriever
227 208: Labrador Retriever
228 209: Chesapeake Bay Retriever
229 210: German Shorthaired Pointer
230 211: Vizsla
231 212: English Setter
232 213: Irish Setter
233 214: Gordon Setter
234 215: Brittany
235 216: Clumber Spaniel
236 217: English Springer Spaniel
237 218: Welsh Springer Spaniel
238 219: Cocker Spaniels
239 220: Sussex Spaniel
240 221: Irish Water Spaniel
241 222: Kuvasz
242 223: Schipperke
243 224: Groenendael
244 225: Malinois
245 226: Briard
246 227: Australian Kelpie
247 228: Komondor
248 229: Old English Sheepdog
249 230: Shetland Sheepdog
250 231: collie
251 232: Border Collie
252 233: Bouvier des Flandres
253 234: Rottweiler
254 235: German Shepherd Dog
255 236: Dobermann
256 237: Miniature Pinscher
257 238: Greater Swiss Mountain Dog
258 239: Bernese Mountain Dog
259 240: Appenzeller Sennenhund
260 241: Entlebucher Sennenhund
261 242: Boxer
262 243: Bullmastiff
263 244: Tibetan Mastiff
264 245: French Bulldog
265 246: Great Dane
266 247: St. Bernard
267 248: husky
268 249: Alaskan Malamute
269 250: Siberian Husky
270 251: Dalmatian
271 252: Affenpinscher
272 253: Basenji
273 254: pug
274 255: Leonberger
275 256: Newfoundland
276 257: Pyrenean Mountain Dog
277 258: Samoyed
278 259: Pomeranian
279 260: Chow Chow
280 261: Keeshond
281 262: Griffon Bruxellois
282 263: Pembroke Welsh Corgi
283 264: Cardigan Welsh Corgi
284 265: Toy Poodle
285 266: Miniature Poodle
286 267: Standard Poodle
287 268: Mexican hairless dog
288 269: grey wolf
289 270: Alaskan tundra wolf
290 271: red wolf
291 272: coyote
292 273: dingo
293 274: dhole
294 275: African wild dog
295 276: hyena
296 277: red fox
297 278: kit fox
298 279: Arctic fox
299 280: grey fox
300 281: tabby cat
301 282: tiger cat
302 283: Persian cat
303 284: Siamese cat
304 285: Egyptian Mau
305 286: cougar
306 287: lynx
307 288: leopard
308 289: snow leopard
309 290: jaguar
310 291: lion
311 292: tiger
312 293: cheetah
313 294: brown bear
314 295: American black bear
315 296: polar bear
316 297: sloth bear
317 298: mongoose
318 299: meerkat
319 300: tiger beetle
320 301: ladybug
321 302: ground beetle
322 303: longhorn beetle
323 304: leaf beetle
324 305: dung beetle
325 306: rhinoceros beetle
326 307: weevil
327 308: fly
328 309: bee
329 310: ant
330 311: grasshopper
331 312: cricket
332 313: stick insect
333 314: cockroach
334 315: mantis
335 316: cicada
336 317: leafhopper
337 318: lacewing
338 319: dragonfly
339 320: damselfly
340 321: red admiral
341 322: ringlet
342 323: monarch butterfly
343 324: small white
344 325: sulphur butterfly
345 326: gossamer-winged butterfly
346 327: starfish
347 328: sea urchin
348 329: sea cucumber
349 330: cottontail rabbit
350 331: hare
351 332: Angora rabbit
352 333: hamster
353 334: porcupine
354 335: fox squirrel
355 336: marmot
356 337: beaver
357 338: guinea pig
358 339: common sorrel
359 340: zebra
360 341: pig
361 342: wild boar
362 343: warthog
363 344: hippopotamus
364 345: ox
365 346: water buffalo
366 347: bison
367 348: ram
368 349: bighorn sheep
369 350: Alpine ibex
370 351: hartebeest
371 352: impala
372 353: gazelle
373 354: dromedary
374 355: llama
375 356: weasel
376 357: mink
377 358: European polecat
378 359: black-footed ferret
379 360: otter
380 361: skunk
381 362: badger
382 363: armadillo
383 364: three-toed sloth
384 365: orangutan
385 366: gorilla
386 367: chimpanzee
387 368: gibbon
388 369: siamang
389 370: guenon
390 371: patas monkey
391 372: baboon
392 373: macaque
393 374: langur
394 375: black-and-white colobus
395 376: proboscis monkey
396 377: marmoset
397 378: white-headed capuchin
398 379: howler monkey
399 380: titi
400 381: Geoffroy's spider monkey
401 382: common squirrel monkey
402 383: ring-tailed lemur
403 384: indri
404 385: Asian elephant
405 386: African bush elephant
406 387: red panda
407 388: giant panda
408 389: snoek
409 390: eel
410 391: coho salmon
411 392: rock beauty
412 393: clownfish
413 394: sturgeon
414 395: garfish
415 396: lionfish
416 397: pufferfish
417 398: abacus
418 399: abaya
419 400: academic gown
420 401: accordion
421 402: acoustic guitar
422 403: aircraft carrier
423 404: airliner
424 405: airship
425 406: altar
426 407: ambulance
427 408: amphibious vehicle
428 409: analog clock
429 410: apiary
430 411: apron
431 412: waste container
432 413: assault rifle
433 414: backpack
434 415: bakery
435 416: balance beam
436 417: balloon
437 418: ballpoint pen
438 419: Band-Aid
439 420: banjo
440 421: baluster
441 422: barbell
442 423: barber chair
443 424: barbershop
444 425: barn
445 426: barometer
446 427: barrel
447 428: wheelbarrow
448 429: baseball
449 430: basketball
450 431: bassinet
451 432: bassoon
452 433: swimming cap
453 434: bath towel
454 435: bathtub
455 436: station wagon
456 437: lighthouse
457 438: beaker
458 439: military cap
459 440: beer bottle
460 441: beer glass
461 442: bell-cot
462 443: bib
463 444: tandem bicycle
464 445: bikini
465 446: ring binder
466 447: binoculars
467 448: birdhouse
468 449: boathouse
469 450: bobsleigh
470 451: bolo tie
471 452: poke bonnet
472 453: bookcase
473 454: bookstore
474 455: bottle cap
475 456: bow
476 457: bow tie
477 458: brass
478 459: bra
479 460: breakwater
480 461: breastplate
481 462: broom
482 463: bucket
483 464: buckle
484 465: bulletproof vest
485 466: high-speed train
486 467: butcher shop
487 468: taxicab
488 469: cauldron
489 470: candle
490 471: cannon
491 472: canoe
492 473: can opener
493 474: cardigan
494 475: car mirror
495 476: carousel
496 477: tool kit
497 478: carton
498 479: car wheel
499 480: automated teller machine
500 481: cassette
501 482: cassette player
502 483: castle
503 484: catamaran
504 485: CD player
505 486: cello
506 487: mobile phone
507 488: chain
508 489: chain-link fence
509 490: chain mail
510 491: chainsaw
511 492: chest
512 493: chiffonier
513 494: chime
514 495: china cabinet
515 496: Christmas stocking
516 497: church
517 498: movie theater
518 499: cleaver
519 500: cliff dwelling
520 501: cloak
521 502: clogs
522 503: cocktail shaker
523 504: coffee mug
524 505: coffeemaker
525 506: coil
526 507: combination lock
527 508: computer keyboard
528 509: confectionery store
529 510: container ship
530 511: convertible
531 512: corkscrew
532 513: cornet
533 514: cowboy boot
534 515: cowboy hat
535 516: cradle
536 517: crane (machine)
537 518: crash helmet
538 519: crate
539 520: infant bed
540 521: Crock Pot
541 522: croquet ball
542 523: crutch
543 524: cuirass
544 525: dam
545 526: desk
546 527: desktop computer
547 528: rotary dial telephone
548 529: diaper
549 530: digital clock
550 531: digital watch
551 532: dining table
552 533: dishcloth
553 534: dishwasher
554 535: disc brake
555 536: dock
556 537: dog sled
557 538: dome
558 539: doormat
559 540: drilling rig
560 541: drum
561 542: drumstick
562 543: dumbbell
563 544: Dutch oven
564 545: electric fan
565 546: electric guitar
566 547: electric locomotive
567 548: entertainment center
568 549: envelope
569 550: espresso machine
570 551: face powder
571 552: feather boa
572 553: filing cabinet
573 554: fireboat
574 555: fire engine
575 556: fire screen sheet
576 557: flagpole
577 558: flute
578 559: folding chair
579 560: football helmet
580 561: forklift
581 562: fountain
582 563: fountain pen
583 564: four-poster bed
584 565: freight car
585 566: French horn
586 567: frying pan
587 568: fur coat
588 569: garbage truck
589 570: gas mask
590 571: gas pump
591 572: goblet
592 573: go-kart
593 574: golf ball
594 575: golf cart
595 576: gondola
596 577: gong
597 578: gown
598 579: grand piano
599 580: greenhouse
600 581: grille
601 582: grocery store
602 583: guillotine
603 584: barrette
604 585: hair spray
605 586: half-track
606 587: hammer
607 588: hamper
608 589: hair dryer
609 590: hand-held computer
610 591: handkerchief
611 592: hard disk drive
612 593: harmonica
613 594: harp
614 595: harvester
615 596: hatchet
616 597: holster
617 598: home theater
618 599: honeycomb
619 600: hook
620 601: hoop skirt
621 602: horizontal bar
622 603: horse-drawn vehicle
623 604: hourglass
624 605: iPod
625 606: clothes iron
626 607: jack-o'-lantern
627 608: jeans
628 609: jeep
629 610: T-shirt
630 611: jigsaw puzzle
631 612: pulled rickshaw
632 613: joystick
633 614: kimono
634 615: knee pad
635 616: knot
636 617: lab coat
637 618: ladle
638 619: lampshade
639 620: laptop computer
640 621: lawn mower
641 622: lens cap
642 623: paper knife
643 624: library
644 625: lifeboat
645 626: lighter
646 627: limousine
647 628: ocean liner
648 629: lipstick
649 630: slip-on shoe
650 631: lotion
651 632: speaker
652 633: loupe
653 634: sawmill
654 635: magnetic compass
655 636: mail bag
656 637: mailbox
657 638: tights
658 639: tank suit
659 640: manhole cover
660 641: maraca
661 642: marimba
662 643: mask
663 644: match
664 645: maypole
665 646: maze
666 647: measuring cup
667 648: medicine chest
668 649: megalith
669 650: microphone
670 651: microwave oven
671 652: military uniform
672 653: milk can
673 654: minibus
674 655: miniskirt
675 656: minivan
676 657: missile
677 658: mitten
678 659: mixing bowl
679 660: mobile home
680 661: Model T
681 662: modem
682 663: monastery
683 664: monitor
684 665: moped
685 666: mortar
686 667: square academic cap
687 668: mosque
688 669: mosquito net
689 670: scooter
690 671: mountain bike
691 672: tent
692 673: computer mouse
693 674: mousetrap
694 675: moving van
695 676: muzzle
696 677: nail
697 678: neck brace
698 679: necklace
699 680: nipple
700 681: notebook computer
701 682: obelisk
702 683: oboe
703 684: ocarina
704 685: odometer
705 686: oil filter
706 687: organ
707 688: oscilloscope
708 689: overskirt
709 690: bullock cart
710 691: oxygen mask
711 692: packet
712 693: paddle
713 694: paddle wheel
714 695: padlock
715 696: paintbrush
716 697: pajamas
717 698: palace
718 699: pan flute
719 700: paper towel
720 701: parachute
721 702: parallel bars
722 703: park bench
723 704: parking meter
724 705: passenger car
725 706: patio
726 707: payphone
727 708: pedestal
728 709: pencil case
729 710: pencil sharpener
730 711: perfume
731 712: Petri dish
732 713: photocopier
733 714: plectrum
734 715: Pickelhaube
735 716: picket fence
736 717: pickup truck
737 718: pier
738 719: piggy bank
739 720: pill bottle
740 721: pillow
741 722: ping-pong ball
742 723: pinwheel
743 724: pirate ship
744 725: pitcher
745 726: hand plane
746 727: planetarium
747 728: plastic bag
748 729: plate rack
749 730: plow
750 731: plunger
751 732: Polaroid camera
752 733: pole
753 734: police van
754 735: poncho
755 736: billiard table
756 737: soda bottle
757 738: pot
758 739: potter's wheel
759 740: power drill
760 741: prayer rug
761 742: printer
762 743: prison
763 744: projectile
764 745: projector
765 746: hockey puck
766 747: punching bag
767 748: purse
768 749: quill
769 750: quilt
770 751: race car
771 752: racket
772 753: radiator
773 754: radio
774 755: radio telescope
775 756: rain barrel
776 757: recreational vehicle
777 758: reel
778 759: reflex camera
779 760: refrigerator
780 761: remote control
781 762: restaurant
782 763: revolver
783 764: rifle
784 765: rocking chair
785 766: rotisserie
786 767: eraser
787 768: rugby ball
788 769: ruler
789 770: running shoe
790 771: safe
791 772: safety pin
792 773: salt shaker
793 774: sandal
794 775: sarong
795 776: saxophone
796 777: scabbard
797 778: weighing scale
798 779: school bus
799 780: schooner
800 781: scoreboard
801 782: CRT screen
802 783: screw
803 784: screwdriver
804 785: seat belt
805 786: sewing machine
806 787: shield
807 788: shoe store
808 789: shoji
809 790: shopping basket
810 791: shopping cart
811 792: shovel
812 793: shower cap
813 794: shower curtain
814 795: ski
815 796: ski mask
816 797: sleeping bag
817 798: slide rule
818 799: sliding door
819 800: slot machine
820 801: snorkel
821 802: snowmobile
822 803: snowplow
823 804: soap dispenser
824 805: soccer ball
825 806: sock
826 807: solar thermal collector
827 808: sombrero
828 809: soup bowl
829 810: space bar
830 811: space heater
831 812: space shuttle
832 813: spatula
833 814: motorboat
834 815: spider web
835 816: spindle
836 817: sports car
837 818: spotlight
838 819: stage
839 820: steam locomotive
840 821: through arch bridge
841 822: steel drum
842 823: stethoscope
843 824: scarf
844 825: stone wall
845 826: stopwatch
846 827: stove
847 828: strainer
848 829: tram
849 830: stretcher
850 831: couch
851 832: stupa
852 833: submarine
853 834: suit
854 835: sundial
855 836: sunglass
856 837: sunglasses
857 838: sunscreen
858 839: suspension bridge
859 840: mop
860 841: sweatshirt
861 842: swimsuit
862 843: swing
863 844: switch
864 845: syringe
865 846: table lamp
866 847: tank
867 848: tape player
868 849: teapot
869 850: teddy bear
870 851: television
871 852: tennis ball
872 853: thatched roof
873 854: front curtain
874 855: thimble
875 856: threshing machine
876 857: throne
877 858: tile roof
878 859: toaster
879 860: tobacco shop
880 861: toilet seat
881 862: torch
882 863: totem pole
883 864: tow truck
884 865: toy store
885 866: tractor
886 867: semi-trailer truck
887 868: tray
888 869: trench coat
889 870: tricycle
890 871: trimaran
891 872: tripod
892 873: triumphal arch
893 874: trolleybus
894 875: trombone
895 876: tub
896 877: turnstile
897 878: typewriter keyboard
898 879: umbrella
899 880: unicycle
900 881: upright piano
901 882: vacuum cleaner
902 883: vase
903 884: vault
904 885: velvet
905 886: vending machine
906 887: vestment
907 888: viaduct
908 889: violin
909 890: volleyball
910 891: waffle iron
911 892: wall clock
912 893: wallet
913 894: wardrobe
914 895: military aircraft
915 896: sink
916 897: washing machine
917 898: water bottle
918 899: water jug
919 900: water tower
920 901: whiskey jug
921 902: whistle
922 903: wig
923 904: window screen
924 905: window shade
925 906: Windsor tie
926 907: wine bottle
927 908: wing
928 909: wok
929 910: wooden spoon
930 911: wool
931 912: split-rail fence
932 913: shipwreck
933 914: yawl
934 915: yurt
935 916: website
936 917: comic book
937 918: crossword
938 919: traffic sign
939 920: traffic light
940 921: dust jacket
941 922: menu
942 923: plate
943 924: guacamole
944 925: consomme
945 926: hot pot
946 927: trifle
947 928: ice cream
948 929: ice pop
949 930: baguette
950 931: bagel
951 932: pretzel
952 933: cheeseburger
953 934: hot dog
954 935: mashed potato
955 936: cabbage
956 937: broccoli
957 938: cauliflower
958 939: zucchini
959 940: spaghetti squash
960 941: acorn squash
961 942: butternut squash
962 943: cucumber
963 944: artichoke
964 945: bell pepper
965 946: cardoon
966 947: mushroom
967 948: Granny Smith
968 949: strawberry
969 950: orange
970 951: lemon
971 952: fig
972 953: pineapple
973 954: banana
974 955: jackfruit
975 956: custard apple
976 957: pomegranate
977 958: hay
978 959: carbonara
979 960: chocolate syrup
980 961: dough
981 962: meatloaf
982 963: pizza
983 964: pot pie
984 965: burrito
985 966: red wine
986 967: espresso
987 968: cup
988 969: eggnog
989 970: alp
990 971: bubble
991 972: cliff
992 973: coral reef
993 974: geyser
994 975: lakeshore
995 976: promontory
996 977: shoal
997 978: seashore
998 979: valley
999 980: volcano
1000 981: baseball player
1001 982: bridegroom
1002 983: scuba diver
1003 984: rapeseed
1004 985: daisy
1005 986: yellow lady's slipper
1006 987: corn
1007 988: acorn
1008 989: rose hip
1009 990: horse chestnut seed
1010 991: coral fungus
1011 992: agaric
1012 993: gyromitra
1013 994: stinkhorn mushroom
1014 995: earth star
1015 996: hen-of-the-woods
1016 997: bolete
1017 998: ear
1018 999: toilet paper
1019
1020
1021 # Download script/URL (optional)
1022 download: data/scripts/get_imagenet.sh
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Objects365 dataset https://www.objects365.org/ by Megvii
3 # Example usage: python train.py --data Objects365.yaml
4 # parent
5 # ├── yolov5
6 # └── datasets
7 # └── Objects365 ← downloads here (712 GB = 367G data + 345G zips)
8
9
10 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 path: ../datasets/Objects365 # dataset root dir
12 train: images/train # train images (relative to 'path') 1742289 images
13 val: images/val # val images (relative to 'path') 80000 images
14 test: # test images (optional)
15
16 # Classes
17 names:
18 0: Person
19 1: Sneakers
20 2: Chair
21 3: Other Shoes
22 4: Hat
23 5: Car
24 6: Lamp
25 7: Glasses
26 8: Bottle
27 9: Desk
28 10: Cup
29 11: Street Lights
30 12: Cabinet/shelf
31 13: Handbag/Satchel
32 14: Bracelet
33 15: Plate
34 16: Picture/Frame
35 17: Helmet
36 18: Book
37 19: Gloves
38 20: Storage box
39 21: Boat
40 22: Leather Shoes
41 23: Flower
42 24: Bench
43 25: Potted Plant
44 26: Bowl/Basin
45 27: Flag
46 28: Pillow
47 29: Boots
48 30: Vase
49 31: Microphone
50 32: Necklace
51 33: Ring
52 34: SUV
53 35: Wine Glass
54 36: Belt
55 37: Monitor/TV
56 38: Backpack
57 39: Umbrella
58 40: Traffic Light
59 41: Speaker
60 42: Watch
61 43: Tie
62 44: Trash bin Can
63 45: Slippers
64 46: Bicycle
65 47: Stool
66 48: Barrel/bucket
67 49: Van
68 50: Couch
69 51: Sandals
70 52: Basket
71 53: Drum
72 54: Pen/Pencil
73 55: Bus
74 56: Wild Bird
75 57: High Heels
76 58: Motorcycle
77 59: Guitar
78 60: Carpet
79 61: Cell Phone
80 62: Bread
81 63: Camera
82 64: Canned
83 65: Truck
84 66: Traffic cone
85 67: Cymbal
86 68: Lifesaver
87 69: Towel
88 70: Stuffed Toy
89 71: Candle
90 72: Sailboat
91 73: Laptop
92 74: Awning
93 75: Bed
94 76: Faucet
95 77: Tent
96 78: Horse
97 79: Mirror
98 80: Power outlet
99 81: Sink
100 82: Apple
101 83: Air Conditioner
102 84: Knife
103 85: Hockey Stick
104 86: Paddle
105 87: Pickup Truck
106 88: Fork
107 89: Traffic Sign
108 90: Balloon
109 91: Tripod
110 92: Dog
111 93: Spoon
112 94: Clock
113 95: Pot
114 96: Cow
115 97: Cake
116 98: Dinning Table
117 99: Sheep
118 100: Hanger
119 101: Blackboard/Whiteboard
120 102: Napkin
121 103: Other Fish
122 104: Orange/Tangerine
123 105: Toiletry
124 106: Keyboard
125 107: Tomato
126 108: Lantern
127 109: Machinery Vehicle
128 110: Fan
129 111: Green Vegetables
130 112: Banana
131 113: Baseball Glove
132 114: Airplane
133 115: Mouse
134 116: Train
135 117: Pumpkin
136 118: Soccer
137 119: Skiboard
138 120: Luggage
139 121: Nightstand
140 122: Tea pot
141 123: Telephone
142 124: Trolley
143 125: Head Phone
144 126: Sports Car
145 127: Stop Sign
146 128: Dessert
147 129: Scooter
148 130: Stroller
149 131: Crane
150 132: Remote
151 133: Refrigerator
152 134: Oven
153 135: Lemon
154 136: Duck
155 137: Baseball Bat
156 138: Surveillance Camera
157 139: Cat
158 140: Jug
159 141: Broccoli
160 142: Piano
161 143: Pizza
162 144: Elephant
163 145: Skateboard
164 146: Surfboard
165 147: Gun
166 148: Skating and Skiing shoes
167 149: Gas stove
168 150: Donut
169 151: Bow Tie
170 152: Carrot
171 153: Toilet
172 154: Kite
173 155: Strawberry
174 156: Other Balls
175 157: Shovel
176 158: Pepper
177 159: Computer Box
178 160: Toilet Paper
179 161: Cleaning Products
180 162: Chopsticks
181 163: Microwave
182 164: Pigeon
183 165: Baseball
184 166: Cutting/chopping Board
185 167: Coffee Table
186 168: Side Table
187 169: Scissors
188 170: Marker
189 171: Pie
190 172: Ladder
191 173: Snowboard
192 174: Cookies
193 175: Radiator
194 176: Fire Hydrant
195 177: Basketball
196 178: Zebra
197 179: Grape
198 180: Giraffe
199 181: Potato
200 182: Sausage
201 183: Tricycle
202 184: Violin
203 185: Egg
204 186: Fire Extinguisher
205 187: Candy
206 188: Fire Truck
207 189: Billiards
208 190: Converter
209 191: Bathtub
210 192: Wheelchair
211 193: Golf Club
212 194: Briefcase
213 195: Cucumber
214 196: Cigar/Cigarette
215 197: Paint Brush
216 198: Pear
217 199: Heavy Truck
218 200: Hamburger
219 201: Extractor
220 202: Extension Cord
221 203: Tong
222 204: Tennis Racket
223 205: Folder
224 206: American Football
225 207: earphone
226 208: Mask
227 209: Kettle
228 210: Tennis
229 211: Ship
230 212: Swing
231 213: Coffee Machine
232 214: Slide
233 215: Carriage
234 216: Onion
235 217: Green beans
236 218: Projector
237 219: Frisbee
238 220: Washing Machine/Drying Machine
239 221: Chicken
240 222: Printer
241 223: Watermelon
242 224: Saxophone
243 225: Tissue
244 226: Toothbrush
245 227: Ice cream
246 228: Hot-air balloon
247 229: Cello
248 230: French Fries
249 231: Scale
250 232: Trophy
251 233: Cabbage
252 234: Hot dog
253 235: Blender
254 236: Peach
255 237: Rice
256 238: Wallet/Purse
257 239: Volleyball
258 240: Deer
259 241: Goose
260 242: Tape
261 243: Tablet
262 244: Cosmetics
263 245: Trumpet
264 246: Pineapple
265 247: Golf Ball
266 248: Ambulance
267 249: Parking meter
268 250: Mango
269 251: Key
270 252: Hurdle
271 253: Fishing Rod
272 254: Medal
273 255: Flute
274 256: Brush
275 257: Penguin
276 258: Megaphone
277 259: Corn
278 260: Lettuce
279 261: Garlic
280 262: Swan
281 263: Helicopter
282 264: Green Onion
283 265: Sandwich
284 266: Nuts
285 267: Speed Limit Sign
286 268: Induction Cooker
287 269: Broom
288 270: Trombone
289 271: Plum
290 272: Rickshaw
291 273: Goldfish
292 274: Kiwi fruit
293 275: Router/modem
294 276: Poker Card
295 277: Toaster
296 278: Shrimp
297 279: Sushi
298 280: Cheese
299 281: Notepaper
300 282: Cherry
301 283: Pliers
302 284: CD
303 285: Pasta
304 286: Hammer
305 287: Cue
306 288: Avocado
307 289: Hamimelon
308 290: Flask
309 291: Mushroom
310 292: Screwdriver
311 293: Soap
312 294: Recorder
313 295: Bear
314 296: Eggplant
315 297: Board Eraser
316 298: Coconut
317 299: Tape Measure/Ruler
318 300: Pig
319 301: Showerhead
320 302: Globe
321 303: Chips
322 304: Steak
323 305: Crosswalk Sign
324 306: Stapler
325 307: Camel
326 308: Formula 1
327 309: Pomegranate
328 310: Dishwasher
329 311: Crab
330 312: Hoverboard
331 313: Meat ball
332 314: Rice Cooker
333 315: Tuba
334 316: Calculator
335 317: Papaya
336 318: Antelope
337 319: Parrot
338 320: Seal
339 321: Butterfly
340 322: Dumbbell
341 323: Donkey
342 324: Lion
343 325: Urinal
344 326: Dolphin
345 327: Electric Drill
346 328: Hair Dryer
347 329: Egg tart
348 330: Jellyfish
349 331: Treadmill
350 332: Lighter
351 333: Grapefruit
352 334: Game board
353 335: Mop
354 336: Radish
355 337: Baozi
356 338: Target
357 339: French
358 340: Spring Rolls
359 341: Monkey
360 342: Rabbit
361 343: Pencil Case
362 344: Yak
363 345: Red Cabbage
364 346: Binoculars
365 347: Asparagus
366 348: Barbell
367 349: Scallop
368 350: Noddles
369 351: Comb
370 352: Dumpling
371 353: Oyster
372 354: Table Tennis paddle
373 355: Cosmetics Brush/Eyeliner Pencil
374 356: Chainsaw
375 357: Eraser
376 358: Lobster
377 359: Durian
378 360: Okra
379 361: Lipstick
380 362: Cosmetics Mirror
381 363: Curling
382 364: Table Tennis
383
384
385 # Download script/URL (optional) ---------------------------------------------------------------------------------------
386 download: |
387 from tqdm import tqdm
388
389 from utils.general import Path, check_requirements, download, np, xyxy2xywhn
390
391 check_requirements(('pycocotools>=2.0',))
392 from pycocotools.coco import COCO
393
394 # Make Directories
395 dir = Path(yaml['path']) # dataset root dir
396 for p in 'images', 'labels':
397 (dir / p).mkdir(parents=True, exist_ok=True)
398 for q in 'train', 'val':
399 (dir / p / q).mkdir(parents=True, exist_ok=True)
400
401 # Train, Val Splits
402 for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
403 print(f"Processing {split} in {patches} patches ...")
404 images, labels = dir / 'images' / split, dir / 'labels' / split
405
406 # Download
407 url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
408 if split == 'train':
409 download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False) # annotations json
410 download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, delete=False, threads=8)
411 elif split == 'val':
412 download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir, delete=False) # annotations json
413 download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, delete=False, threads=8)
414 download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, delete=False, threads=8)
415
416 # Move
417 for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
418 f.rename(images / f.name) # move to /images/{split}
419
420 # Labels
421 coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
422 names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
423 for cid, cat in enumerate(names):
424 catIds = coco.getCatIds(catNms=[cat])
425 imgIds = coco.getImgIds(catIds=catIds)
426 for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
427 width, height = im["width"], im["height"]
428 path = Path(im["file_name"]) # image filename
429 try:
430 with open(labels / path.with_suffix('.txt').name, 'a') as file:
431 annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
432 for a in coco.loadAnns(annIds):
433 x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
434 xyxy = np.array([x, y, x + w, y + h])[None] # pixels(1,4)
435 x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0] # normalized and clipped
436 file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
437 except Exception as e:
438 print(e)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
3 # Example usage: python train.py --data SKU-110K.yaml
4 # parent
5 # ├── yolov5
6 # └── datasets
7 # └── SKU-110K ← downloads here (13.6 GB)
8
9
10 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 path: ../datasets/SKU-110K # dataset root dir
12 train: train.txt # train images (relative to 'path') 8219 images
13 val: val.txt # val images (relative to 'path') 588 images
14 test: test.txt # test images (optional) 2936 images
15
16 # Classes
17 names:
18 0: object
19
20
21 # Download script/URL (optional) ---------------------------------------------------------------------------------------
22 download: |
23 import shutil
24 from tqdm import tqdm
25 from utils.general import np, pd, Path, download, xyxy2xywh
26
27
28 # Download
29 dir = Path(yaml['path']) # dataset root dir
30 parent = Path(dir.parent) # download dir
31 urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
32 download(urls, dir=parent, delete=False)
33
34 # Rename directories
35 if dir.exists():
36 shutil.rmtree(dir)
37 (parent / 'SKU110K_fixed').rename(dir) # rename dir
38 (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
39
40 # Convert labels
41 names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names
42 for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
43 x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations
44 images, unique_images = x[:, 0], np.unique(x[:, 0])
45 with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
46 f.writelines(f'./images/{s}\n' for s in unique_images)
47 for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
48 cls = 0 # single-class dataset
49 with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
50 for r in x[images == im]:
51 w, h = r[6], r[7] # image width, height
52 xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance
53 f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
3 # Example usage: python train.py --data VOC.yaml
4 # parent
5 # ├── yolov5
6 # └── datasets
7 # └── VOC ← downloads here (2.8 GB)
8
9
10 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 path: /home/qfs/WorkSpace/ps_tamper/yolov5_ps/VOCdevkit
12 train: # train images (relative to 'path') 16551 images
13 - images/train
14 val: # val images (relative to 'path') 4952 images
15 - images/val
16 test: # test images (optional)
17 - images/val
18
19 # Classes
20 names:
21
22 0: tampered
23 #1: bicycle
24 #2: bird
25 #3: boat
26 #4: bottle
27 #5: bus
28 #6: car
29 #7: cat
30 #8: chair
31 #9: cow
32 #10: diningtable
33 #11: dog
34 #12: horse
35 #13: motorbike
36 #14: person
37 #15: pottedplant
38 #16: sheep
39 #17: sofa
40 # 18: train
41 # 19: tvmonitor
42
43
44 # Download script/URL (optional) ---------------------------------------------------------------------------------------
45 download: |
46 import xml.etree.ElementTree as ET
47
48 from tqdm import tqdm
49 from utils.general import download, Path
50
51
52 def convert_label(path, lb_path, year, image_id):
53 def convert_box(size, box):
54 dw, dh = 1. / size[0], 1. / size[1]
55 x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
56 return x * dw, y * dh, w * dw, h * dh
57
58 in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
59 out_file = open(lb_path, 'w')
60 tree = ET.parse(in_file)
61 root = tree.getroot()
62 size = root.find('size')
63 w = int(size.find('width').text)
64 h = int(size.find('height').text)
65
66 names = list(yaml['names'].values()) # names list
67 for obj in root.iter('object'):
68 cls = obj.find('name').text
69 if cls in names and int(obj.find('difficult').text) != 1:
70 xmlbox = obj.find('bndbox')
71 bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
72 cls_id = names.index(cls) # class id
73 out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
74
75
76 # Download
77 dir = Path(yaml['path']) # dataset root dir
78 url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
79 urls = [f'{url}VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
80 f'{url}VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
81 f'{url}VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
82 download(urls, dir=dir / 'images', delete=False, curl=True, threads=3)
83
84 # Convert
85 path = dir / 'images/VOCdevkit'
86 for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
87 imgs_path = dir / 'images' / f'{image_set}{year}'
88 lbs_path = dir / 'labels' / f'{image_set}{year}'
89 imgs_path.mkdir(exist_ok=True, parents=True)
90 lbs_path.mkdir(exist_ok=True, parents=True)
91
92 with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
93 image_ids = f.read().strip().split()
94 for id in tqdm(image_ids, desc=f'{image_set}{year}'):
95 f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
96 lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
97 f.rename(imgs_path / f.name) # move image
98 convert_label(path, lb_path, year, id) # convert labels to YOLO format
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
3 # Example usage: python train.py --data VisDrone.yaml
4 # parent
5 # ├── yolov5
6 # └── datasets
7 # └── VisDrone ← downloads here (2.3 GB)
8
9
10 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 path: ../datasets/VisDrone # dataset root dir
12 train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
13 val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
14 test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
15
16 # Classes
17 names:
18 0: pedestrian
19 1: people
20 2: bicycle
21 3: car
22 4: van
23 5: truck
24 6: tricycle
25 7: awning-tricycle
26 8: bus
27 9: motor
28
29
30 # Download script/URL (optional) ---------------------------------------------------------------------------------------
31 download: |
32 from utils.general import download, os, Path
33
34 def visdrone2yolo(dir):
35 from PIL import Image
36 from tqdm import tqdm
37
38 def convert_box(size, box):
39 # Convert VisDrone box to YOLO xywh box
40 dw = 1. / size[0]
41 dh = 1. / size[1]
42 return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
43
44 (dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
45 pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
46 for f in pbar:
47 img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
48 lines = []
49 with open(f, 'r') as file: # read annotation.txt
50 for row in [x.split(',') for x in file.read().strip().splitlines()]:
51 if row[4] == '0': # VisDrone 'ignored regions' class 0
52 continue
53 cls = int(row[5]) - 1
54 box = convert_box(img_size, tuple(map(int, row[:4])))
55 lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
56 with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
57 fl.writelines(lines) # write label.txt
58
59
60 # Download
61 dir = Path(yaml['path']) # dataset root dir
62 urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
63 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
64 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
65 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
66 download(urls, dir=dir, curl=True, threads=4)
67
68 # Convert
69 for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
70 visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # COCO 2017 dataset http://cocodataset.org by Microsoft
3 # Example usage: python train.py --data coco.yaml
4 # parent
5 # ├── yolov5
6 # └── datasets
7 # └── coco ← downloads here (20.1 GB)
8
9
10 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 path: ../datasets/coco # dataset root dir
12 train: train2017.txt # train images (relative to 'path') 118287 images
13 val: val2017.txt # val images (relative to 'path') 5000 images
14 test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
15
16 # Classes
17 names:
18 0: person
19 1: bicycle
20 2: car
21 3: motorcycle
22 4: airplane
23 5: bus
24 6: train
25 7: truck
26 8: boat
27 9: traffic light
28 10: fire hydrant
29 11: stop sign
30 12: parking meter
31 13: bench
32 14: bird
33 15: cat
34 16: dog
35 17: horse
36 18: sheep
37 19: cow
38 20: elephant
39 21: bear
40 22: zebra
41 23: giraffe
42 24: backpack
43 25: umbrella
44 26: handbag
45 27: tie
46 28: suitcase
47 29: frisbee
48 30: skis
49 31: snowboard
50 32: sports ball
51 33: kite
52 34: baseball bat
53 35: baseball glove
54 36: skateboard
55 37: surfboard
56 38: tennis racket
57 39: bottle
58 40: wine glass
59 41: cup
60 42: fork
61 43: knife
62 44: spoon
63 45: bowl
64 46: banana
65 47: apple
66 48: sandwich
67 49: orange
68 50: broccoli
69 51: carrot
70 52: hot dog
71 53: pizza
72 54: donut
73 55: cake
74 56: chair
75 57: couch
76 58: potted plant
77 59: bed
78 60: dining table
79 61: toilet
80 62: tv
81 63: laptop
82 64: mouse
83 65: remote
84 66: keyboard
85 67: cell phone
86 68: microwave
87 69: oven
88 70: toaster
89 71: sink
90 72: refrigerator
91 73: book
92 74: clock
93 75: vase
94 76: scissors
95 77: teddy bear
96 78: hair drier
97 79: toothbrush
98
99
100 # Download script/URL (optional)
101 download: |
102 from utils.general import download, Path
103
104
105 # Download labels
106 segments = False # segment or box labels
107 dir = Path(yaml['path']) # dataset root dir
108 url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
109 urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
110 download(urls, dir=dir.parent)
111
112 # Download data
113 urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
114 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
115 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
116 download(urls, dir=dir / 'images', threads=3)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
3 # Example usage: python train.py --data coco128.yaml
4 # parent
5 # ├── yolov5
6 # └── datasets
7 # └── coco128-seg ← downloads here (7 MB)
8
9
10 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 path: ../datasets/coco128-seg # dataset root dir
12 train: images/train2017 # train images (relative to 'path') 128 images
13 val: images/train2017 # val images (relative to 'path') 128 images
14 test: # test images (optional)
15
16 # Classes
17 names:
18 0: person
19 1: bicycle
20 2: car
21 3: motorcycle
22 4: airplane
23 5: bus
24 6: train
25 7: truck
26 8: boat
27 9: traffic light
28 10: fire hydrant
29 11: stop sign
30 12: parking meter
31 13: bench
32 14: bird
33 15: cat
34 16: dog
35 17: horse
36 18: sheep
37 19: cow
38 20: elephant
39 21: bear
40 22: zebra
41 23: giraffe
42 24: backpack
43 25: umbrella
44 26: handbag
45 27: tie
46 28: suitcase
47 29: frisbee
48 30: skis
49 31: snowboard
50 32: sports ball
51 33: kite
52 34: baseball bat
53 35: baseball glove
54 36: skateboard
55 37: surfboard
56 38: tennis racket
57 39: bottle
58 40: wine glass
59 41: cup
60 42: fork
61 43: knife
62 44: spoon
63 45: bowl
64 46: banana
65 47: apple
66 48: sandwich
67 49: orange
68 50: broccoli
69 51: carrot
70 52: hot dog
71 53: pizza
72 54: donut
73 55: cake
74 56: chair
75 57: couch
76 58: potted plant
77 59: bed
78 60: dining table
79 61: toilet
80 62: tv
81 63: laptop
82 64: mouse
83 65: remote
84 66: keyboard
85 67: cell phone
86 68: microwave
87 69: oven
88 70: toaster
89 71: sink
90 72: refrigerator
91 73: book
92 74: clock
93 75: vase
94 76: scissors
95 77: teddy bear
96 78: hair drier
97 79: toothbrush
98
99
100 # Download script/URL (optional)
101 download: https://ultralytics.com/assets/coco128-seg.zip
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
3 # Example usage: python train.py --data coco128.yaml
4 # parent
5 # ├── yolov5
6 # └── datasets
7 # └── coco128 ← downloads here (7 MB)
8
9
10 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 path: ../datasets/coco128 # dataset root dir
12 train: images/train2017 # train images (relative to 'path') 128 images
13 val: images/train2017 # val images (relative to 'path') 128 images
14 test: # test images (optional)
15
16 # Classes
17 names:
18 0: person
19 1: bicycle
20 2: car
21 3: motorcycle
22 4: airplane
23 5: bus
24 6: train
25 7: truck
26 8: boat
27 9: traffic light
28 10: fire hydrant
29 11: stop sign
30 12: parking meter
31 13: bench
32 14: bird
33 15: cat
34 16: dog
35 17: horse
36 18: sheep
37 19: cow
38 20: elephant
39 21: bear
40 22: zebra
41 23: giraffe
42 24: backpack
43 25: umbrella
44 26: handbag
45 27: tie
46 28: suitcase
47 29: frisbee
48 30: skis
49 31: snowboard
50 32: sports ball
51 33: kite
52 34: baseball bat
53 35: baseball glove
54 36: skateboard
55 37: surfboard
56 38: tennis racket
57 39: bottle
58 40: wine glass
59 41: cup
60 42: fork
61 43: knife
62 44: spoon
63 45: bowl
64 46: banana
65 47: apple
66 48: sandwich
67 49: orange
68 50: broccoli
69 51: carrot
70 52: hot dog
71 53: pizza
72 54: donut
73 55: cake
74 56: chair
75 57: couch
76 58: potted plant
77 59: bed
78 60: dining table
79 61: toilet
80 62: tv
81 63: laptop
82 64: mouse
83 65: remote
84 66: keyboard
85 67: cell phone
86 68: microwave
87 69: oven
88 70: toaster
89 71: sink
90 72: refrigerator
91 73: book
92 74: clock
93 75: vase
94 76: scissors
95 77: teddy bear
96 78: hair drier
97 79: toothbrush
98
99
100 # Download script/URL (optional)
101 download: https://ultralytics.com/assets/coco128.zip
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Hyperparameters for Objects365 training
3 # python train.py --weights yolov5m.pt --data Objects365.yaml --evolve
4 # See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials
5
6 lr0: 0.00258
7 lrf: 0.17
8 momentum: 0.779
9 weight_decay: 0.00058
10 warmup_epochs: 1.33
11 warmup_momentum: 0.86
12 warmup_bias_lr: 0.0711
13 box: 0.0539
14 cls: 0.299
15 cls_pw: 0.825
16 obj: 0.632
17 obj_pw: 1.0
18 iou_t: 0.2
19 anchor_t: 3.44
20 anchors: 3.2
21 fl_gamma: 0.0
22 hsv_h: 0.0188
23 hsv_s: 0.704
24 hsv_v: 0.36
25 degrees: 0.0
26 translate: 0.0902
27 scale: 0.491
28 shear: 0.0
29 perspective: 0.0
30 flipud: 0.0
31 fliplr: 0.5
32 mosaic: 1.0
33 mixup: 0.0
34 copy_paste: 0.0
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Hyperparameters for VOC training
3 # python train.py --batch 128 --weights yolov5m6.pt --data VOC.yaml --epochs 50 --img 512 --hyp hyp.scratch-med.yaml --evolve
4 # See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials
5
6 # YOLOv5 Hyperparameter Evolution Results
7 # Best generation: 467
8 # Last generation: 996
9 # metrics/precision, metrics/recall, metrics/mAP_0.5, metrics/mAP_0.5:0.95, val/box_loss, val/obj_loss, val/cls_loss
10 # 0.87729, 0.85125, 0.91286, 0.72664, 0.0076739, 0.0042529, 0.0013865
11
12 lr0: 0.00334
13 lrf: 0.15135
14 momentum: 0.74832
15 weight_decay: 0.00025
16 warmup_epochs: 3.3835
17 warmup_momentum: 0.59462
18 warmup_bias_lr: 0.18657
19 box: 0.02
20 cls: 0.21638
21 cls_pw: 0.5
22 obj: 0.51728
23 obj_pw: 0.67198
24 iou_t: 0.2
25 anchor_t: 3.3744
26 fl_gamma: 0.0
27 hsv_h: 0.01041
28 hsv_s: 0.54703
29 hsv_v: 0.27739
30 degrees: 0.0
31 translate: 0.04591
32 scale: 0.75544
33 shear: 0.0
34 perspective: 0.0
35 flipud: 0.0
36 fliplr: 0.5
37 mosaic: 0.85834
38 mixup: 0.04266
39 copy_paste: 0.0
40 anchors: 3.412
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Hyperparameters for high-augmentation COCO training from scratch
3 # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
4 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
5
6 lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
7 lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
8 momentum: 0.937 # SGD momentum/Adam beta1
9 weight_decay: 0.0005 # optimizer weight decay 5e-4
10 warmup_epochs: 3.0 # warmup epochs (fractions ok)
11 warmup_momentum: 0.8 # warmup initial momentum
12 warmup_bias_lr: 0.1 # warmup initial bias lr
13 box: 0.05 # box loss gain
14 cls: 0.3 # cls loss gain
15 cls_pw: 1.0 # cls BCELoss positive_weight
16 obj: 0.7 # obj loss gain (scale with pixels)
17 obj_pw: 1.0 # obj BCELoss positive_weight
18 iou_t: 0.20 # IoU training threshold
19 anchor_t: 4.0 # anchor-multiple threshold
20 # anchors: 3 # anchors per output layer (0 to ignore)
21 fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
22 hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
23 hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
24 hsv_v: 0.4 # image HSV-Value augmentation (fraction)
25 degrees: 0.0 # image rotation (+/- deg)
26 translate: 0.1 # image translation (+/- fraction)
27 scale: 0.9 # image scale (+/- gain)
28 shear: 0.0 # image shear (+/- deg)
29 perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
30 flipud: 0.0 # image flip up-down (probability)
31 fliplr: 0.5 # image flip left-right (probability)
32 mosaic: 1.0 # image mosaic (probability)
33 mixup: 0.1 # image mixup (probability)
34 copy_paste: 0.1 # segment copy-paste (probability)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Hyperparameters for low-augmentation COCO training from scratch
3 # python train.py --batch 64 --cfg yolov5n6.yaml --weights '' --data coco.yaml --img 640 --epochs 300 --linear
4 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
5
6 lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
7 lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
8 momentum: 0.937 # SGD momentum/Adam beta1
9 weight_decay: 0.0005 # optimizer weight decay 5e-4
10 warmup_epochs: 3.0 # warmup epochs (fractions ok)
11 warmup_momentum: 0.8 # warmup initial momentum
12 warmup_bias_lr: 0.1 # warmup initial bias lr
13 box: 0.05 # box loss gain
14 cls: 0.5 # cls loss gain
15 cls_pw: 1.0 # cls BCELoss positive_weight
16 obj: 1.0 # obj loss gain (scale with pixels)
17 obj_pw: 1.0 # obj BCELoss positive_weight
18 iou_t: 0.20 # IoU training threshold
19 anchor_t: 4.0 # anchor-multiple threshold
20 # anchors: 3 # anchors per output layer (0 to ignore)
21 fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
22 hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
23 hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
24 hsv_v: 0.4 # image HSV-Value augmentation (fraction)
25 degrees: 0.0 # image rotation (+/- deg)
26 translate: 0.1 # image translation (+/- fraction)
27 scale: 0.5 # image scale (+/- gain)
28 shear: 0.0 # image shear (+/- deg)
29 perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
30 flipud: 0.0 # image flip up-down (probability)
31 fliplr: 0.5 # image flip left-right (probability)
32 mosaic: 1.0 # image mosaic (probability)
33 mixup: 0.0 # image mixup (probability)
34 copy_paste: 0.0 # segment copy-paste (probability)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Hyperparameters for medium-augmentation COCO training from scratch
3 # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
4 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
5
6 lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
7 lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
8 momentum: 0.937 # SGD momentum/Adam beta1
9 weight_decay: 0.0005 # optimizer weight decay 5e-4
10 warmup_epochs: 3.0 # warmup epochs (fractions ok)
11 warmup_momentum: 0.8 # warmup initial momentum
12 warmup_bias_lr: 0.1 # warmup initial bias lr
13 box: 0.05 # box loss gain
14 cls: 0.3 # cls loss gain
15 cls_pw: 1.0 # cls BCELoss positive_weight
16 obj: 0.7 # obj loss gain (scale with pixels)
17 obj_pw: 1.0 # obj BCELoss positive_weight
18 iou_t: 0.20 # IoU training threshold
19 anchor_t: 4.0 # anchor-multiple threshold
20 # anchors: 3 # anchors per output layer (0 to ignore)
21 fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
22 hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
23 hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
24 hsv_v: 0.4 # image HSV-Value augmentation (fraction)
25 degrees: 0.0 # image rotation (+/- deg)
26 translate: 0.1 # image translation (+/- fraction)
27 scale: 0.9 # image scale (+/- gain)
28 shear: 0.0 # image shear (+/- deg)
29 perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
30 flipud: 0.0 # image flip up-down (probability)
31 fliplr: 0.5 # image flip left-right (probability)
32 mosaic: 1.0 # image mosaic (probability)
33 mixup: 0.1 # image mixup (probability)
34 copy_paste: 0.0 # segment copy-paste (probability)
1 #!/bin/bash
2 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
3 # Download latest models from https://github.com/ultralytics/yolov5/releases
4 # Example usage: bash data/scripts/download_weights.sh
5 # parent
6 # └── yolov5
7 # ├── yolov5s.pt ← downloads here
8 # ├── yolov5m.pt
9 # └── ...
10
11 python - <<EOF
12 from utils.downloads import attempt_download
13
14 p5 = ['n', 's', 'm', 'l', 'x'] # P5 models
15 p6 = [f'{x}6' for x in p5] # P6 models
16 cls = [f'{x}-cls' for x in p5] # classification models
17
18 for x in p5 + p6 + cls:
19 attempt_download(f'weights/yolov5{x}.pt')
20
21 EOF
1 #!/bin/bash
2 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
3 # Download COCO 2017 dataset http://cocodataset.org
4 # Example usage: bash data/scripts/get_coco.sh
5 # parent
6 # ├── yolov5
7 # └── datasets
8 # └── coco ← downloads here
9
10 # Arguments (optional) Usage: bash data/scripts/get_coco.sh --train --val --test --segments
11 if [ "$#" -gt 0 ]; then
12 for opt in "$@"; do
13 case "${opt}" in
14 --train) train=true ;;
15 --val) val=true ;;
16 --test) test=true ;;
17 --segments) segments=true ;;
18 esac
19 done
20 else
21 train=true
22 val=true
23 test=false
24 segments=false
25 fi
26
27 # Download/unzip labels
28 d='../datasets' # unzip directory
29 url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
30 if [ "$segments" == "true" ]; then
31 f='coco2017labels-segments.zip' # 168 MB
32 else
33 f='coco2017labels.zip' # 168 MB
34 fi
35 echo 'Downloading' $url$f ' ...'
36 curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
37
38 # Download/unzip images
39 d='../datasets/coco/images' # unzip directory
40 url=http://images.cocodataset.org/zips/
41 if [ "$train" == "true" ]; then
42 f='train2017.zip' # 19G, 118k images
43 echo 'Downloading' $url$f '...'
44 curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
45 fi
46 if [ "$val" == "true" ]; then
47 f='val2017.zip' # 1G, 5k images
48 echo 'Downloading' $url$f '...'
49 curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
50 fi
51 if [ "$test" == "true" ]; then
52 f='test2017.zip' # 7G, 41k images (optional)
53 echo 'Downloading' $url$f '...'
54 curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
55 fi
56 wait # finish background tasks
1 #!/bin/bash
2 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
3 # Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
4 # Example usage: bash data/scripts/get_coco128.sh
5 # parent
6 # ├── yolov5
7 # └── datasets
8 # └── coco128 ← downloads here
9
10 # Download/unzip images and labels
11 d='../datasets' # unzip directory
12 url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
13 f='coco128.zip' # or 'coco128-segments.zip', 68 MB
14 echo 'Downloading' $url$f ' ...'
15 curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
16
17 wait # finish background tasks
1 #!/bin/bash
2 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
3 # Download ILSVRC2012 ImageNet dataset https://image-net.org
4 # Example usage: bash data/scripts/get_imagenet.sh
5 # parent
6 # ├── yolov5
7 # └── datasets
8 # └── imagenet ← downloads here
9
10 # Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
11 if [ "$#" -gt 0 ]; then
12 for opt in "$@"; do
13 case "${opt}" in
14 --train) train=true ;;
15 --val) val=true ;;
16 esac
17 done
18 else
19 train=true
20 val=true
21 fi
22
23 # Make dir
24 d='../datasets/imagenet' # unzip directory
25 mkdir -p $d && cd $d
26
27 # Download/unzip train
28 if [ "$train" == "true" ]; then
29 wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar # download 138G, 1281167 images
30 mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
31 tar -xf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
32 find . -name "*.tar" | while read NAME; do
33 mkdir -p "${NAME%.tar}"
34 tar -xf "${NAME}" -C "${NAME%.tar}"
35 rm -f "${NAME}"
36 done
37 cd ..
38 fi
39
40 # Download/unzip val
41 if [ "$val" == "true" ]; then
42 wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar # download 6.3G, 50000 images
43 mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xf ILSVRC2012_img_val.tar
44 wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash # move into subdirs
45 fi
46
47 # Delete corrupted image (optional: PNG under JPEG name that may cause dataloaders to fail)
48 # rm train/n04266014/n04266014_10835.JPEG
49
50 # TFRecords (optional)
51 # wget https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_lsvrc_2015_synsets.txt
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
3 # -------- DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command! --------
4 # Example usage: python train.py --data xView.yaml
5 # parent
6 # ├── yolov5
7 # └── datasets
8 # └── xView ← downloads here (20.7 GB)
9
10
11 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12 path: ../datasets/xView # dataset root dir
13 train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
14 val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
15
16 # Classes
17 names:
18 0: Fixed-wing Aircraft
19 1: Small Aircraft
20 2: Cargo Plane
21 3: Helicopter
22 4: Passenger Vehicle
23 5: Small Car
24 6: Bus
25 7: Pickup Truck
26 8: Utility Truck
27 9: Truck
28 10: Cargo Truck
29 11: Truck w/Box
30 12: Truck Tractor
31 13: Trailer
32 14: Truck w/Flatbed
33 15: Truck w/Liquid
34 16: Crane Truck
35 17: Railway Vehicle
36 18: Passenger Car
37 19: Cargo Car
38 20: Flat Car
39 21: Tank car
40 22: Locomotive
41 23: Maritime Vessel
42 24: Motorboat
43 25: Sailboat
44 26: Tugboat
45 27: Barge
46 28: Fishing Vessel
47 29: Ferry
48 30: Yacht
49 31: Container Ship
50 32: Oil Tanker
51 33: Engineering Vehicle
52 34: Tower crane
53 35: Container Crane
54 36: Reach Stacker
55 37: Straddle Carrier
56 38: Mobile Crane
57 39: Dump Truck
58 40: Haul Truck
59 41: Scraper/Tractor
60 42: Front loader/Bulldozer
61 43: Excavator
62 44: Cement Mixer
63 45: Ground Grader
64 46: Hut/Tent
65 47: Shed
66 48: Building
67 49: Aircraft Hangar
68 50: Damaged Building
69 51: Facility
70 52: Construction Site
71 53: Vehicle Lot
72 54: Helipad
73 55: Storage Tank
74 56: Shipping container lot
75 57: Shipping Container
76 58: Pylon
77 59: Tower
78
79
80 # Download script/URL (optional) ---------------------------------------------------------------------------------------
81 download: |
82 import json
83 import os
84 from pathlib import Path
85
86 import numpy as np
87 from PIL import Image
88 from tqdm import tqdm
89
90 from utils.datasets import autosplit
91 from utils.general import download, xyxy2xywhn
92
93
94 def convert_labels(fname=Path('xView/xView_train.geojson')):
95 # Convert xView geoJSON labels to YOLO format
96 path = fname.parent
97 with open(fname) as f:
98 print(f'Loading {fname}...')
99 data = json.load(f)
100
101 # Make dirs
102 labels = Path(path / 'labels' / 'train')
103 os.system(f'rm -rf {labels}')
104 labels.mkdir(parents=True, exist_ok=True)
105
106 # xView classes 11-94 to 0-59
107 xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
108 12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
109 29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
110 47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
111
112 shapes = {}
113 for feature in tqdm(data['features'], desc=f'Converting {fname}'):
114 p = feature['properties']
115 if p['bounds_imcoords']:
116 id = p['image_id']
117 file = path / 'train_images' / id
118 if file.exists(): # 1395.tif missing
119 try:
120 box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
121 assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
122 cls = p['type_id']
123 cls = xview_class2index[int(cls)] # xView class to 0-60
124 assert 59 >= cls >= 0, f'incorrect class index {cls}'
125
126 # Write YOLO label
127 if id not in shapes:
128 shapes[id] = Image.open(file).size
129 box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
130 with open((labels / id).with_suffix('.txt'), 'a') as f:
131 f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt
132 except Exception as e:
133 print(f'WARNING: skipping one label for {file}: {e}')
134
135
136 # Download manually from https://challenge.xviewdataset.org
137 dir = Path(yaml['path']) # dataset root dir
138 # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels
139 # 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images
140 # 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels)
141 # download(urls, dir=dir, delete=False)
142
143 # Convert labels
144 convert_labels(dir / 'xView_train.geojson')
145
146 # Move images
147 images = Path(dir / 'images')
148 images.mkdir(parents=True, exist_ok=True)
149 Path(dir / 'train_images').rename(dir / 'images' / 'train')
150 Path(dir / 'val_images').rename(dir / 'images' / 'val')
151
152 # Split
153 autosplit(dir / 'images' / 'train')
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
4
5 Usage - sources:
6 $ python detect.py --weights yolov5s.pt --source 0 # webcam
7 img.jpg # image
8 vid.mp4 # video
9 path/ # directory
10 'path/*.jpg' # glob
11 'https://youtu.be/Zgi9g1ksQHc' # YouTube
12 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
13
14 Usage - formats:
15 $ python detect.py --weights yolov5s.pt # PyTorch
16 yolov5s.torchscript # TorchScript
17 yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
18 yolov5s.xml # OpenVINO
19 yolov5s.engine # TensorRT
20 yolov5s.mlmodel # CoreML (macOS-only)
21 yolov5s_saved_model # TensorFlow SavedModel
22 yolov5s.pb # TensorFlow GraphDef
23 yolov5s.tflite # TensorFlow Lite
24 yolov5s_edgetpu.tflite # TensorFlow Edge TPU
25 yolov5s_paddle_model # PaddlePaddle
26 """
27
28 import argparse
29 import os
30 import platform
31 import sys
32 from pathlib import Path
33
34 import torch
35
36 FILE = Path(__file__).resolve()
37 ROOT = FILE.parents[0] # YOLOv5 root directory
38 if str(ROOT) not in sys.path:
39 sys.path.append(str(ROOT)) # add ROOT to PATH
40 ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
41
42 from models.common import DetectMultiBackend
43 from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
44 from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
45 increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
46 from utils.plots import Annotator, colors, save_one_box
47 from utils.torch_utils import select_device, smart_inference_mode
48
49
50 @smart_inference_mode()
51 def run(
52 weights=ROOT / 'yolov5s.pt', # model path or triton URL
53 source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam)
54 data=ROOT / 'data/coco128.yaml', # dataset.yaml path
55 imgsz=(640, 640), # inference size (height, width)
56 conf_thres=0.25, # confidence threshold
57 iou_thres=0.45, # NMS IOU threshold
58 max_det=1000, # maximum detections per image
59 device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
60 view_img=False, # show results
61 save_txt=False, # save results to *.txt
62 save_conf=False, # save confidences in --save-txt labels
63 save_crop=False, # save cropped prediction boxes
64 nosave=False, # do not save images/videos
65 classes=None, # filter by class: --class 0, or --class 0 2 3
66 agnostic_nms=False, # class-agnostic NMS
67 augment=False, # augmented inference
68 visualize=False, # visualize features
69 update=False, # update all models
70 project=ROOT / 'runs/detect', # save results to project/name
71 name='exp', # save results to project/name
72 exist_ok=False, # existing project/name ok, do not increment
73 line_thickness=3, # bounding box thickness (pixels)
74 hide_labels=False, # hide labels
75 hide_conf=False, # hide confidences
76 half=False, # use FP16 half-precision inference
77 dnn=False, # use OpenCV DNN for ONNX inference
78 vid_stride=1, # video frame-rate stride
79 ):
80 source = str(source)
81 save_img = not nosave and not source.endswith('.txt') # save inference images
82 is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
83 is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
84 webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
85 screenshot = source.lower().startswith('screen')
86 if is_url and is_file:
87 source = check_file(source) # download
88
89 # Directories
90 save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
91 (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
92
93 # Load model
94 device = select_device(device)
95 model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
96 stride, names, pt = model.stride, model.names, model.pt
97 imgsz = check_img_size(imgsz, s=stride) # check image size
98
99 # Dataloader
100 bs = 1 # batch_size
101 if webcam:
102 view_img = check_imshow()
103 dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
104 bs = len(dataset)
105 elif screenshot:
106 dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
107 else:
108 dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
109 vid_path, vid_writer = [None] * bs, [None] * bs
110
111 # Run inference
112 model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
113 seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
114 for path, im, im0s, vid_cap, s in dataset:
115 with dt[0]:
116 im = torch.from_numpy(im).to(model.device)
117 im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
118 im /= 255 # 0 - 255 to 0.0 - 1.0
119 if len(im.shape) == 3:
120 im = im[None] # expand for batch dim
121
122 # Inference
123 with dt[1]:
124 visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
125 pred = model(im, augment=augment, visualize=visualize)
126
127 # NMS
128 with dt[2]:
129 pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
130
131 # Second-stage classifier (optional)
132 # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
133
134 # Process predictions
135 for i, det in enumerate(pred): # per image
136 seen += 1
137 if webcam: # batch_size >= 1
138 p, im0, frame = path[i], im0s[i].copy(), dataset.count
139 s += f'{i}: '
140 else:
141 p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
142
143 p = Path(p) # to Path
144 save_path = str(save_dir / p.name) # im.jpg
145 txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
146 s += '%gx%g ' % im.shape[2:] # print string
147 gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
148 imc = im0.copy() if save_crop else im0 # for save_crop
149 annotator = Annotator(im0, line_width=line_thickness, example=str(names))
150 if len(det):
151 # Rescale boxes from img_size to im0 size
152 det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
153
154 # Print results
155 for c in det[:, 5].unique():
156 n = (det[:, 5] == c).sum() # detections per class
157 s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
158
159 # Write results
160 for *xyxy, conf, cls in reversed(det):
161 if save_txt: # Write to file
162 xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
163 line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
164 with open(f'{txt_path}.txt', 'a') as f:
165 f.write(('%g ' * len(line)).rstrip() % line + '\n')
166
167 if save_img or save_crop or view_img: # Add bbox to image
168 c = int(cls) # integer class
169 label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
170 annotator.box_label(xyxy, label, color=colors(c, True))
171 if save_crop:
172 save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
173
174 # Stream results
175 im0 = annotator.result()
176 if view_img:
177 if platform.system() == 'Linux' and p not in windows:
178 windows.append(p)
179 cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
180 cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
181 cv2.imshow(str(p), im0)
182 cv2.waitKey(1) # 1 millisecond
183
184 # Save results (image with detections)
185 if save_img:
186 if dataset.mode == 'image':
187 cv2.imwrite(save_path, im0)
188 else: # 'video' or 'stream'
189 if vid_path[i] != save_path: # new video
190 vid_path[i] = save_path
191 if isinstance(vid_writer[i], cv2.VideoWriter):
192 vid_writer[i].release() # release previous video writer
193 if vid_cap: # video
194 fps = vid_cap.get(cv2.CAP_PROP_FPS)
195 w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
196 h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
197 else: # stream
198 fps, w, h = 30, im0.shape[1], im0.shape[0]
199 save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
200 vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
201 vid_writer[i].write(im0)
202
203 # Print time (inference-only)
204 LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
205
206 # Print results
207 t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
208 LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
209 if save_txt or save_img:
210 s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
211 LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
212 if update:
213 strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)
214
215
216 def parse_opt():
217 parser = argparse.ArgumentParser()
218 parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp/weights/best.pt', help='model path or triton URL')
219 parser.add_argument('--source', type=str, default=ROOT / 'data/images/crop_img', help='file/dir/URL/glob/screen/0(webcam)')
220 parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
221 parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
222 parser.add_argument('--conf-thres', type=float, default=0.3, help='confidence threshold')
223 parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
224 parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
225 parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
226 parser.add_argument('--view-img', action='store_true', help='show results')
227 parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
228 parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
229 parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
230 parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
231 parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
232 parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
233 parser.add_argument('--augment', action='store_true', help='augmented inference')
234 parser.add_argument('--visualize', action='store_true', help='visualize features')
235 parser.add_argument('--update', action='store_true', help='update all models')
236 parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
237 parser.add_argument('--name', default='exp', help='save results to project/name')
238 parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
239 parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
240 parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
241 parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
242 parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
243 parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
244 parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
245 opt = parser.parse_args()
246 opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
247 print_args(vars(opt))
248 return opt
249
250
251 def main(opt):
252 check_requirements(exclude=('tensorboard', 'thop'))
253 run(**vars(opt))
254
255
256 if __name__ == "__main__":
257 opt = parse_opt()
258 main(opt)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Export a YOLOv5 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit
4
5 Format | `export.py --include` | Model
6 --- | --- | ---
7 PyTorch | - | yolov5s.pt
8 TorchScript | `torchscript` | yolov5s.torchscript
9 ONNX | `onnx` | yolov5s.onnx
10 OpenVINO | `openvino` | yolov5s_openvino_model/
11 TensorRT | `engine` | yolov5s.engine
12 CoreML | `coreml` | yolov5s.mlmodel
13 TensorFlow SavedModel | `saved_model` | yolov5s_saved_model/
14 TensorFlow GraphDef | `pb` | yolov5s.pb
15 TensorFlow Lite | `tflite` | yolov5s.tflite
16 TensorFlow Edge TPU | `edgetpu` | yolov5s_edgetpu.tflite
17 TensorFlow.js | `tfjs` | yolov5s_web_model/
18 PaddlePaddle | `paddle` | yolov5s_paddle_model/
19
20 Requirements:
21 $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU
22 $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPU
23
24 Usage:
25 $ python export.py --weights yolov5s.pt --include torchscript onnx openvino engine coreml tflite ...
26
27 Inference:
28 $ python detect.py --weights yolov5s.pt # PyTorch
29 yolov5s.torchscript # TorchScript
30 yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
31 yolov5s.xml # OpenVINO
32 yolov5s.engine # TensorRT
33 yolov5s.mlmodel # CoreML (macOS-only)
34 yolov5s_saved_model # TensorFlow SavedModel
35 yolov5s.pb # TensorFlow GraphDef
36 yolov5s.tflite # TensorFlow Lite
37 yolov5s_edgetpu.tflite # TensorFlow Edge TPU
38 yolov5s_paddle_model # PaddlePaddle
39
40 TensorFlow.js:
41 $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
42 $ npm install
43 $ ln -s ../../yolov5/yolov5s_web_model public/yolov5s_web_model
44 $ npm start
45 """
46
47 import argparse
48 import json
49 import os
50 import platform
51 import re
52 import subprocess
53 import sys
54 import time
55 import warnings
56 from pathlib import Path
57
58 import pandas as pd
59 import torch
60 from torch.utils.mobile_optimizer import optimize_for_mobile
61
62 FILE = Path(__file__).resolve()
63 ROOT = FILE.parents[0] # YOLOv5 root directory
64 if str(ROOT) not in sys.path:
65 sys.path.append(str(ROOT)) # add ROOT to PATH
66 if platform.system() != 'Windows':
67 ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
68
69 from models.experimental import attempt_load
70 from models.yolo import ClassificationModel, Detect, DetectionModel, SegmentationModel
71 from utils.dataloaders import LoadImages
72 from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_version,
73 check_yaml, colorstr, file_size, get_default_args, print_args, url2file, yaml_save)
74 from utils.torch_utils import select_device, smart_inference_mode
75
76 MACOS = platform.system() == 'Darwin' # macOS environment
77
78
79 def export_formats():
80 # YOLOv5 export formats
81 x = [
82 ['PyTorch', '-', '.pt', True, True],
83 ['TorchScript', 'torchscript', '.torchscript', True, True],
84 ['ONNX', 'onnx', '.onnx', True, True],
85 ['OpenVINO', 'openvino', '_openvino_model', True, False],
86 ['TensorRT', 'engine', '.engine', False, True],
87 ['CoreML', 'coreml', '.mlmodel', True, False],
88 ['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True],
89 ['TensorFlow GraphDef', 'pb', '.pb', True, True],
90 ['TensorFlow Lite', 'tflite', '.tflite', True, False],
91 ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False],
92 ['TensorFlow.js', 'tfjs', '_web_model', False, False],
93 ['PaddlePaddle', 'paddle', '_paddle_model', True, True],]
94 return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])
95
96
97 def try_export(inner_func):
98 # YOLOv5 export decorator, i..e @try_export
99 inner_args = get_default_args(inner_func)
100
101 def outer_func(*args, **kwargs):
102 prefix = inner_args['prefix']
103 try:
104 with Profile() as dt:
105 f, model = inner_func(*args, **kwargs)
106 LOGGER.info(f'{prefix} export success ✅ {dt.t:.1f}s, saved as {f} ({file_size(f):.1f} MB)')
107 return f, model
108 except Exception as e:
109 LOGGER.info(f'{prefix} export failure ❌ {dt.t:.1f}s: {e}')
110 return None, None
111
112 return outer_func
113
114
115 @try_export
116 def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
117 # YOLOv5 TorchScript model export
118 LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')
119 f = file.with_suffix('.torchscript')
120
121 ts = torch.jit.trace(model, im, strict=False)
122 d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names}
123 extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap()
124 if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
125 optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
126 else:
127 ts.save(str(f), _extra_files=extra_files)
128 return f, None
129
130
131 @try_export
132 def export_onnx(model, im, file, opset, dynamic, simplify, prefix=colorstr('ONNX:')):
133 # YOLOv5 ONNX export
134 check_requirements('onnx')
135 import onnx
136
137 LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
138 f = file.with_suffix('.onnx')
139
140 output_names = ['output0', 'output1'] if isinstance(model, SegmentationModel) else ['output0']
141 if dynamic:
142 dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}} # shape(1,3,640,640)
143 if isinstance(model, SegmentationModel):
144 dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
145 dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'} # shape(1,32,160,160)
146 elif isinstance(model, DetectionModel):
147 dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
148
149 torch.onnx.export(
150 model.cpu() if dynamic else model, # --dynamic only compatible with cpu
151 im.cpu() if dynamic else im,
152 f,
153 verbose=False,
154 opset_version=opset,
155 do_constant_folding=True,
156 input_names=['images'],
157 output_names=output_names,
158 dynamic_axes=dynamic or None)
159
160 # Checks
161 model_onnx = onnx.load(f) # load onnx model
162 onnx.checker.check_model(model_onnx) # check onnx model
163
164 # Metadata
165 d = {'stride': int(max(model.stride)), 'names': model.names}
166 for k, v in d.items():
167 meta = model_onnx.metadata_props.add()
168 meta.key, meta.value = k, str(v)
169 onnx.save(model_onnx, f)
170
171 # Simplify
172 if simplify:
173 try:
174 cuda = torch.cuda.is_available()
175 check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1'))
176 import onnxsim
177
178 LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
179 model_onnx, check = onnxsim.simplify(model_onnx)
180 assert check, 'assert check failed'
181 onnx.save(model_onnx, f)
182 except Exception as e:
183 LOGGER.info(f'{prefix} simplifier failure: {e}')
184 return f, model_onnx
185
186
187 @try_export
188 def export_openvino(file, metadata, half, prefix=colorstr('OpenVINO:')):
189 # YOLOv5 OpenVINO export
190 check_requirements('openvino-dev') # requires openvino-dev: https://pypi.org/project/openvino-dev/
191 import openvino.inference_engine as ie
192
193 LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...')
194 f = str(file).replace('.pt', f'_openvino_model{os.sep}')
195
196 cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} --data_type {'FP16' if half else 'FP32'}"
197 subprocess.run(cmd.split(), check=True, env=os.environ) # export
198 yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata) # add metadata.yaml
199 return f, None
200
201
202 @try_export
203 def export_paddle(model, im, file, metadata, prefix=colorstr('PaddlePaddle:')):
204 # YOLOv5 Paddle export
205 check_requirements(('paddlepaddle', 'x2paddle'))
206 import x2paddle
207 from x2paddle.convert import pytorch2paddle
208
209 LOGGER.info(f'\n{prefix} starting export with X2Paddle {x2paddle.__version__}...')
210 f = str(file).replace('.pt', f'_paddle_model{os.sep}')
211
212 pytorch2paddle(module=model, save_dir=f, jit_type='trace', input_examples=[im]) # export
213 yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata) # add metadata.yaml
214 return f, None
215
216
217 @try_export
218 def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')):
219 # YOLOv5 CoreML export
220 check_requirements('coremltools')
221 import coremltools as ct
222
223 LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...')
224 f = file.with_suffix('.mlmodel')
225
226 ts = torch.jit.trace(model, im, strict=False) # TorchScript model
227 ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])
228 bits, mode = (8, 'kmeans_lut') if int8 else (16, 'linear') if half else (32, None)
229 if bits < 32:
230 if MACOS: # quantization only supported on macOS
231 with warnings.catch_warnings():
232 warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress numpy==1.20 float warning
233 ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode)
234 else:
235 print(f'{prefix} quantization only supported on macOS, skipping...')
236 ct_model.save(f)
237 return f, ct_model
238
239
240 @try_export
241 def export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
242 # YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt
243 assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`'
244 try:
245 import tensorrt as trt
246 except Exception:
247 if platform.system() == 'Linux':
248 check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com')
249 import tensorrt as trt
250
251 if trt.__version__[0] == '7': # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012
252 grid = model.model[-1].anchor_grid
253 model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid]
254 export_onnx(model, im, file, 12, dynamic, simplify) # opset 12
255 model.model[-1].anchor_grid = grid
256 else: # TensorRT >= 8
257 check_version(trt.__version__, '8.0.0', hard=True) # require tensorrt>=8.0.0
258 export_onnx(model, im, file, 12, dynamic, simplify) # opset 12
259 onnx = file.with_suffix('.onnx')
260
261 LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
262 assert onnx.exists(), f'failed to export ONNX file: {onnx}'
263 f = file.with_suffix('.engine') # TensorRT engine file
264 logger = trt.Logger(trt.Logger.INFO)
265 if verbose:
266 logger.min_severity = trt.Logger.Severity.VERBOSE
267
268 builder = trt.Builder(logger)
269 config = builder.create_builder_config()
270 config.max_workspace_size = workspace * 1 << 30
271 # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice
272
273 flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
274 network = builder.create_network(flag)
275 parser = trt.OnnxParser(network, logger)
276 if not parser.parse_from_file(str(onnx)):
277 raise RuntimeError(f'failed to load ONNX file: {onnx}')
278
279 inputs = [network.get_input(i) for i in range(network.num_inputs)]
280 outputs = [network.get_output(i) for i in range(network.num_outputs)]
281 for inp in inputs:
282 LOGGER.info(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}')
283 for out in outputs:
284 LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}')
285
286 if dynamic:
287 if im.shape[0] <= 1:
288 LOGGER.warning(f"{prefix} WARNING ⚠️ --dynamic model requires maximum --batch-size argument")
289 profile = builder.create_optimization_profile()
290 for inp in inputs:
291 profile.set_shape(inp.name, (1, *im.shape[1:]), (max(1, im.shape[0] // 2), *im.shape[1:]), im.shape)
292 config.add_optimization_profile(profile)
293
294 LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and half else 32} engine as {f}')
295 if builder.platform_has_fast_fp16 and half:
296 config.set_flag(trt.BuilderFlag.FP16)
297 with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
298 t.write(engine.serialize())
299 return f, None
300
301
302 @try_export
303 def export_saved_model(model,
304 im,
305 file,
306 dynamic,
307 tf_nms=False,
308 agnostic_nms=False,
309 topk_per_class=100,
310 topk_all=100,
311 iou_thres=0.45,
312 conf_thres=0.25,
313 keras=False,
314 prefix=colorstr('TensorFlow SavedModel:')):
315 # YOLOv5 TensorFlow SavedModel export
316 try:
317 import tensorflow as tf
318 except Exception:
319 check_requirements(f"tensorflow{'' if torch.cuda.is_available() else '-macos' if MACOS else '-cpu'}")
320 import tensorflow as tf
321 from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
322
323 from models.tf import TFModel
324
325 LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
326 f = str(file).replace('.pt', '_saved_model')
327 batch_size, ch, *imgsz = list(im.shape) # BCHW
328
329 tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
330 im = tf.zeros((batch_size, *imgsz, ch)) # BHWC order for TensorFlow
331 _ = tf_model.predict(im, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
332 inputs = tf.keras.Input(shape=(*imgsz, ch), batch_size=None if dynamic else batch_size)
333 outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
334 keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)
335 keras_model.trainable = False
336 keras_model.summary()
337 if keras:
338 keras_model.save(f, save_format='tf')
339 else:
340 spec = tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)
341 m = tf.function(lambda x: keras_model(x)) # full model
342 m = m.get_concrete_function(spec)
343 frozen_func = convert_variables_to_constants_v2(m)
344 tfm = tf.Module()
345 tfm.__call__ = tf.function(lambda x: frozen_func(x)[:4] if tf_nms else frozen_func(x), [spec])
346 tfm.__call__(im)
347 tf.saved_model.save(tfm,
348 f,
349 options=tf.saved_model.SaveOptions(experimental_custom_gradients=False) if check_version(
350 tf.__version__, '2.6') else tf.saved_model.SaveOptions())
351 return f, keras_model
352
353
354 @try_export
355 def export_pb(keras_model, file, prefix=colorstr('TensorFlow GraphDef:')):
356 # YOLOv5 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow
357 import tensorflow as tf
358 from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
359
360 LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
361 f = file.with_suffix('.pb')
362
363 m = tf.function(lambda x: keras_model(x)) # full model
364 m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype))
365 frozen_func = convert_variables_to_constants_v2(m)
366 frozen_func.graph.as_graph_def()
367 tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
368 return f, None
369
370
371 @try_export
372 def export_tflite(keras_model, im, file, int8, data, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')):
373 # YOLOv5 TensorFlow Lite export
374 import tensorflow as tf
375
376 LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
377 batch_size, ch, *imgsz = list(im.shape) # BCHW
378 f = str(file).replace('.pt', '-fp16.tflite')
379
380 converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
381 converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
382 converter.target_spec.supported_types = [tf.float16]
383 converter.optimizations = [tf.lite.Optimize.DEFAULT]
384 if int8:
385 from models.tf import representative_dataset_gen
386 dataset = LoadImages(check_dataset(check_yaml(data))['train'], img_size=imgsz, auto=False)
387 converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib=100)
388 converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
389 converter.target_spec.supported_types = []
390 converter.inference_input_type = tf.uint8 # or tf.int8
391 converter.inference_output_type = tf.uint8 # or tf.int8
392 converter.experimental_new_quantizer = True
393 f = str(file).replace('.pt', '-int8.tflite')
394 if nms or agnostic_nms:
395 converter.target_spec.supported_ops.append(tf.lite.OpsSet.SELECT_TF_OPS)
396
397 tflite_model = converter.convert()
398 open(f, "wb").write(tflite_model)
399 return f, None
400
401
402 @try_export
403 def export_edgetpu(file, prefix=colorstr('Edge TPU:')):
404 # YOLOv5 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/
405 cmd = 'edgetpu_compiler --version'
406 help_url = 'https://coral.ai/docs/edgetpu/compiler/'
407 assert platform.system() == 'Linux', f'export only supported on Linux. See {help_url}'
408 if subprocess.run(f'{cmd} >/dev/null', shell=True).returncode != 0:
409 LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}')
410 sudo = subprocess.run('sudo --version >/dev/null', shell=True).returncode == 0 # sudo installed on system
411 for c in (
412 'curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -',
413 'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list',
414 'sudo apt-get update', 'sudo apt-get install edgetpu-compiler'):
415 subprocess.run(c if sudo else c.replace('sudo ', ''), shell=True, check=True)
416 ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]
417
418 LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...')
419 f = str(file).replace('.pt', '-int8_edgetpu.tflite') # Edge TPU model
420 f_tfl = str(file).replace('.pt', '-int8.tflite') # TFLite model
421
422 cmd = f"edgetpu_compiler -s -d -k 10 --out_dir {file.parent} {f_tfl}"
423 subprocess.run(cmd.split(), check=True)
424 return f, None
425
426
427 @try_export
428 def export_tfjs(file, prefix=colorstr('TensorFlow.js:')):
429 # YOLOv5 TensorFlow.js export
430 check_requirements('tensorflowjs')
431 import tensorflowjs as tfjs
432
433 LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...')
434 f = str(file).replace('.pt', '_web_model') # js dir
435 f_pb = file.with_suffix('.pb') # *.pb path
436 f_json = f'{f}/model.json' # *.json path
437
438 cmd = f'tensorflowjs_converter --input_format=tf_frozen_model ' \
439 f'--output_node_names=Identity,Identity_1,Identity_2,Identity_3 {f_pb} {f}'
440 subprocess.run(cmd.split())
441
442 json = Path(f_json).read_text()
443 with open(f_json, 'w') as j: # sort JSON Identity_* in ascending order
444 subst = re.sub(
445 r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, '
446 r'"Identity.?.?": {"name": "Identity.?.?"}, '
447 r'"Identity.?.?": {"name": "Identity.?.?"}, '
448 r'"Identity.?.?": {"name": "Identity.?.?"}}}', r'{"outputs": {"Identity": {"name": "Identity"}, '
449 r'"Identity_1": {"name": "Identity_1"}, '
450 r'"Identity_2": {"name": "Identity_2"}, '
451 r'"Identity_3": {"name": "Identity_3"}}}', json)
452 j.write(subst)
453 return f, None
454
455
456 @smart_inference_mode()
457 def run(
458 data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
459 weights=ROOT / 'yolov5s.pt', # weights path
460 imgsz=(640, 640), # image (height, width)
461 batch_size=1, # batch size
462 device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu
463 include=('torchscript', 'onnx'), # include formats
464 half=False, # FP16 half-precision export
465 inplace=False, # set YOLOv5 Detect() inplace=True
466 keras=False, # use Keras
467 optimize=False, # TorchScript: optimize for mobile
468 int8=False, # CoreML/TF INT8 quantization
469 dynamic=False, # ONNX/TF/TensorRT: dynamic axes
470 simplify=False, # ONNX: simplify model
471 opset=12, # ONNX: opset version
472 verbose=False, # TensorRT: verbose log
473 workspace=4, # TensorRT: workspace size (GB)
474 nms=False, # TF: add NMS to model
475 agnostic_nms=False, # TF: add agnostic NMS to model
476 topk_per_class=100, # TF.js NMS: topk per class to keep
477 topk_all=100, # TF.js NMS: topk for all classes to keep
478 iou_thres=0.45, # TF.js NMS: IoU threshold
479 conf_thres=0.25, # TF.js NMS: confidence threshold
480 ):
481 t = time.time()
482 include = [x.lower() for x in include] # to lowercase
483 fmts = tuple(export_formats()['Argument'][1:]) # --include arguments
484 flags = [x in include for x in fmts]
485 assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {fmts}'
486 jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle = flags # export booleans
487 file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights) # PyTorch weights
488
489 # Load PyTorch model
490 device = select_device(device)
491 if half:
492 assert device.type != 'cpu' or coreml, '--half only compatible with GPU export, i.e. use --device 0'
493 assert not dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both'
494 model = attempt_load(weights, device=device, inplace=True, fuse=True) # load FP32 model
495
496 # Checks
497 imgsz *= 2 if len(imgsz) == 1 else 1 # expand
498 if optimize:
499 assert device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu'
500
501 # Input
502 gs = int(max(model.stride)) # grid size (max stride)
503 imgsz = [check_img_size(x, gs) for x in imgsz] # verify img_size are gs-multiples
504 im = torch.zeros(batch_size, 3, *imgsz).to(device) # image size(1,3,320,192) BCHW iDetection
505
506 # Update model
507 model.eval()
508 for k, m in model.named_modules():
509 if isinstance(m, Detect):
510 m.inplace = inplace
511 m.dynamic = dynamic
512 m.export = True
513
514 for _ in range(2):
515 y = model(im) # dry runs
516 if half and not coreml:
517 im, model = im.half(), model.half() # to FP16
518 shape = tuple((y[0] if isinstance(y, tuple) else y).shape) # model output shape
519 metadata = {'stride': int(max(model.stride)), 'names': model.names} # model metadata
520 LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)")
521
522 # Exports
523 f = [''] * len(fmts) # exported filenames
524 warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning) # suppress TracerWarning
525 if jit: # TorchScript
526 f[0], _ = export_torchscript(model, im, file, optimize)
527 if engine: # TensorRT required before ONNX
528 f[1], _ = export_engine(model, im, file, half, dynamic, simplify, workspace, verbose)
529 if onnx or xml: # OpenVINO requires ONNX
530 f[2], _ = export_onnx(model, im, file, opset, dynamic, simplify)
531 if xml: # OpenVINO
532 f[3], _ = export_openvino(file, metadata, half)
533 if coreml: # CoreML
534 f[4], _ = export_coreml(model, im, file, int8, half)
535 if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats
536 assert not tflite or not tfjs, 'TFLite and TF.js models must be exported separately, please pass only one type.'
537 assert not isinstance(model, ClassificationModel), 'ClassificationModel export to TF formats not yet supported.'
538 f[5], s_model = export_saved_model(model.cpu(),
539 im,
540 file,
541 dynamic,
542 tf_nms=nms or agnostic_nms or tfjs,
543 agnostic_nms=agnostic_nms or tfjs,
544 topk_per_class=topk_per_class,
545 topk_all=topk_all,
546 iou_thres=iou_thres,
547 conf_thres=conf_thres,
548 keras=keras)
549 if pb or tfjs: # pb prerequisite to tfjs
550 f[6], _ = export_pb(s_model, file)
551 if tflite or edgetpu:
552 f[7], _ = export_tflite(s_model, im, file, int8 or edgetpu, data=data, nms=nms, agnostic_nms=agnostic_nms)
553 if edgetpu:
554 f[8], _ = export_edgetpu(file)
555 if tfjs:
556 f[9], _ = export_tfjs(file)
557 if paddle: # PaddlePaddle
558 f[10], _ = export_paddle(model, im, file, metadata)
559
560 # Finish
561 f = [str(x) for x in f if x] # filter out '' and None
562 if any(f):
563 cls, det, seg = (isinstance(model, x) for x in (ClassificationModel, DetectionModel, SegmentationModel)) # type
564 dir = Path('segment' if seg else 'classify' if cls else '')
565 h = '--half' if half else '' # --half FP16 inference arg
566 s = "# WARNING ⚠️ ClassificationModel not yet supported for PyTorch Hub AutoShape inference" if cls else \
567 "# WARNING ⚠️ SegmentationModel not yet supported for PyTorch Hub AutoShape inference" if seg else ''
568 LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)'
569 f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
570 f"\nDetect: python {dir / ('detect.py' if det else 'predict.py')} --weights {f[-1]} {h}"
571 f"\nValidate: python {dir / 'val.py'} --weights {f[-1]} {h}"
572 f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{f[-1]}') {s}"
573 f"\nVisualize: https://netron.app")
574 return f # return list of exported files/dirs
575
576
577 def parse_opt():
578 parser = argparse.ArgumentParser()
579 parser.add_argument('--data', type=str, default=ROOT / 'data/VOC.yaml', help='dataset.yaml path')
580 parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp/weights/best.pt', help='model.pt path(s)')
581 parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)')
582 parser.add_argument('--batch-size', type=int, default=1, help='batch size')
583 parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
584 parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
585 parser.add_argument('--inplace', action='store_true', help='set YOLOv5 Detect() inplace=True')
586 parser.add_argument('--keras', action='store_true', help='TF: use Keras')
587 parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile')
588 parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')
589 parser.add_argument('--dynamic', action='store_true', help='ONNX/TF/TensorRT: dynamic axes')
590 parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
591 parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version')
592 parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
593 parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
594 parser.add_argument('--nms', action='store_true', help='TF: add NMS to model')
595 parser.add_argument('--agnostic-nms', action='store_true', help='TF: add agnostic NMS to model')
596 parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
597 parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
598 parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
599 parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
600 parser.add_argument(
601 '--include',
602 nargs='+',
603 default=['torchscript'],
604 help='torchscript, onnx, openvino, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle')
605 opt = parser.parse_args()
606 print_args(vars(opt))
607 return opt
608
609
610 def main(opt):
611 for opt.weights in (opt.weights if isinstance(opt.weights, list) else [opt.weights]):
612 run(**vars(opt))
613
614
615 if __name__ == "__main__":
616 opt = parse_opt()
617 main(opt)
1 import os
2
3 import cv2
4 import numpy as np
5 from sklearn.metrics import precision_score, recall_score, confusion_matrix
6
7
8 def iou(box, boxes):
9 x1, y1, x2, y2 = box
10 x1s, y1s, x2s, y2s = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
11 area1 = abs(x2 - x1) * abs(y2 - y1)
12 areas = (x2s - x1s) * (y2s - y1s)
13 xx1 = np.maximum(x1, x1s)
14 yy1 = np.maximum(y1, y1s)
15 xx2 = np.minimum(x2, x2s)
16 yy2 = np.minimum(y2, y2s)
17 inner = np.maximum(0, (xx2 - xx1) * (yy2 - yy1))
18 return inner / (area1 + areas - inner)
19
20
21 def get_evaluate_score(true_image_path, true_label_path, predict_label_path, threshold):
22 true_labels = os.listdir(true_label_path)
23 predict_labels = os.listdir(predict_label_path)
24 targets, predicts = [], []
25 for label in true_labels:
26 true_label = open(os.path.join(true_label_path, label)).readlines()
27 img = cv2.imread(os.path.join(true_image_path, label.replace('.txt', '.jpg')))
28 h, w, c = img.shape
29 if len(true_label) == 0:
30 targets.append(0)
31 if label in predict_labels:
32 predicts.append(1)
33 else:
34 predicts.append(0)
35
36 else:
37 targets.append(1)
38 if label not in predict_labels:
39 predicts.append(0)
40 else:
41 tmp = 0
42 predict_label = open(os.path.join(predict_label_path, label)).readlines()
43 boxes = []
44 for pl in predict_label:
45 cls, x1, y1, w1, h1 = [float(i) for i in pl.strip().split(' ')]
46 x1, y1, w1, h1 = int(x1 * w), int(y1 * h), int(w1 * w), int(h1 * h)
47 xx1, yy1, xx2, yy2 = x1 - w1 // 2, y1 - h1 // 2, x1 + w1 // 2, y1 + h1 // 2
48 boxes.append([xx1, yy1, xx2, yy2])
49 for tl in true_label:
50 cls, x1, y1, w1, h1 = [float(i) for i in tl.strip().split(' ')]
51 x1, y1, w1, h1 = int(x1 * w), int(y1 * h), int(w1 * w), int(h1 * h)
52 xx1, yy1, xx2, yy2 = x1 - w1 // 2, y1 - h1 // 2, x1 + w1 // 2, y1 + h1 // 2
53 box1 = [xx1, yy1, xx2, yy2]
54 inner_score = iou(np.array(box1), np.array(boxes))
55 if max(inner_score) > threshold:
56 tmp = 1
57 predicts.append(1)
58 break
59 if tmp == 0:
60 predicts.append(0)
61 p = precision_score(targets, predicts)
62 r = recall_score(targets, predicts)
63 conf = confusion_matrix(targets, predicts)
64 print('precison:', p)
65 print('recall:', r)
66 print(conf)
67 print(f' 预 测 ')
68 print(f' authentic tampered ')
69 print(f'真 authentic \t\t{conf[0, 0]} \t\t{conf[0,1]}')
70 print(f'实 tempered \t\t{conf[1, 0]} \t\t\t{conf[1,1]}')
71 print(f'authentic precision:{conf[0,0]/(conf[0,0]+conf[1,0])}\trecall:{conf[0, 0]/(conf[0, 0]+conf[0, 1])}')
72 print(f'tampered precision:{conf[1, 1]/(conf[0, 1]+conf[1, 1])}\trecall:{conf[1, 1]/(conf[1, 0]+conf[1, 1])}')
73 if __name__ == '__main__':
74 true_image_path = '/data/situ_invoice_bill_data/qfs_train_val_data/gongshang/images/val'
75 true_label_path = '/data/situ_invoice_bill_data/qfs_train_val_data/gongshang/labels/val'
76 predict_label_path = '/home/situ/qfs/invoice_tamper/09_project/project/tamper_det/runs/detect/exp4/labels'
77 threshold = 0.1
78 get_evaluate_score(true_image_path, true_label_path, predict_label_path, threshold)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5
4
5 Usage:
6 import torch
7 model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
8 model = torch.hub.load('ultralytics/yolov5:master', 'custom', 'path/to/yolov5s.onnx') # custom model from branch
9 """
10
11 import torch
12
13
14 def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
15 """Creates or loads a YOLOv5 model
16
17 Arguments:
18 name (str): model name 'yolov5s' or path 'path/to/best.pt'
19 pretrained (bool): load pretrained weights into the model
20 channels (int): number of input channels
21 classes (int): number of model classes
22 autoshape (bool): apply YOLOv5 .autoshape() wrapper to model
23 verbose (bool): print all information to screen
24 device (str, torch.device, None): device to use for model parameters
25
26 Returns:
27 YOLOv5 model
28 """
29 from pathlib import Path
30
31 from models.common import AutoShape, DetectMultiBackend
32 from models.experimental import attempt_load
33 from models.yolo import ClassificationModel, DetectionModel, SegmentationModel
34 from utils.downloads import attempt_download
35 from utils.general import LOGGER, check_requirements, intersect_dicts, logging
36 from utils.torch_utils import select_device
37
38 if not verbose:
39 LOGGER.setLevel(logging.WARNING)
40 check_requirements(exclude=('ipython', 'opencv-python', 'tensorboard', 'thop'))
41 name = Path(name)
42 path = name.with_suffix('.pt') if name.suffix == '' and not name.is_dir() else name # checkpoint path
43 try:
44 device = select_device(device)
45 if pretrained and channels == 3 and classes == 80:
46 try:
47 model = DetectMultiBackend(path, device=device, fuse=autoshape) # detection model
48 if autoshape:
49 if model.pt and isinstance(model.model, ClassificationModel):
50 LOGGER.warning('WARNING ⚠️ YOLOv5 ClassificationModel is not yet AutoShape compatible. '
51 'You must pass torch tensors in BCHW to this model, i.e. shape(1,3,224,224).')
52 elif model.pt and isinstance(model.model, SegmentationModel):
53 LOGGER.warning('WARNING ⚠️ YOLOv5 SegmentationModel is not yet AutoShape compatible. '
54 'You will not be able to run inference with this model.')
55 else:
56 model = AutoShape(model) # for file/URI/PIL/cv2/np inputs and NMS
57 except Exception:
58 model = attempt_load(path, device=device, fuse=False) # arbitrary model
59 else:
60 cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0] # model.yaml path
61 model = DetectionModel(cfg, channels, classes) # create model
62 if pretrained:
63 ckpt = torch.load(attempt_download(path), map_location=device) # load
64 csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
65 csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors']) # intersect
66 model.load_state_dict(csd, strict=False) # load
67 if len(ckpt['model'].names) == classes:
68 model.names = ckpt['model'].names # set class names attribute
69 if not verbose:
70 LOGGER.setLevel(logging.INFO) # reset to default
71 return model.to(device)
72
73 except Exception as e:
74 help_url = 'https://github.com/ultralytics/yolov5/issues/36'
75 s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.'
76 raise Exception(s) from e
77
78
79 def custom(path='path/to/model.pt', autoshape=True, _verbose=True, device=None):
80 # YOLOv5 custom or local model
81 return _create(path, autoshape=autoshape, verbose=_verbose, device=device)
82
83
84 def yolov5n(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
85 # YOLOv5-nano model https://github.com/ultralytics/yolov5
86 return _create('yolov5n', pretrained, channels, classes, autoshape, _verbose, device)
87
88
89 def yolov5s(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
90 # YOLOv5-small model https://github.com/ultralytics/yolov5
91 return _create('yolov5s', pretrained, channels, classes, autoshape, _verbose, device)
92
93
94 def yolov5m(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
95 # YOLOv5-medium model https://github.com/ultralytics/yolov5
96 return _create('yolov5m', pretrained, channels, classes, autoshape, _verbose, device)
97
98
99 def yolov5l(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
100 # YOLOv5-large model https://github.com/ultralytics/yolov5
101 return _create('yolov5l', pretrained, channels, classes, autoshape, _verbose, device)
102
103
104 def yolov5x(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
105 # YOLOv5-xlarge model https://github.com/ultralytics/yolov5
106 return _create('yolov5x', pretrained, channels, classes, autoshape, _verbose, device)
107
108
109 def yolov5n6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
110 # YOLOv5-nano-P6 model https://github.com/ultralytics/yolov5
111 return _create('yolov5n6', pretrained, channels, classes, autoshape, _verbose, device)
112
113
114 def yolov5s6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
115 # YOLOv5-small-P6 model https://github.com/ultralytics/yolov5
116 return _create('yolov5s6', pretrained, channels, classes, autoshape, _verbose, device)
117
118
119 def yolov5m6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
120 # YOLOv5-medium-P6 model https://github.com/ultralytics/yolov5
121 return _create('yolov5m6', pretrained, channels, classes, autoshape, _verbose, device)
122
123
124 def yolov5l6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
125 # YOLOv5-large-P6 model https://github.com/ultralytics/yolov5
126 return _create('yolov5l6', pretrained, channels, classes, autoshape, _verbose, device)
127
128
129 def yolov5x6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
130 # YOLOv5-xlarge-P6 model https://github.com/ultralytics/yolov5
131 return _create('yolov5x6', pretrained, channels, classes, autoshape, _verbose, device)
132
133
134 if __name__ == '__main__':
135 import argparse
136 from pathlib import Path
137
138 import numpy as np
139 from PIL import Image
140
141 from utils.general import cv2, print_args
142
143 # Argparser
144 parser = argparse.ArgumentParser()
145 parser.add_argument('--model', type=str, default='yolov5s', help='model name')
146 opt = parser.parse_args()
147 print_args(vars(opt))
148
149 # Model
150 model = _create(name=opt.model, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True)
151 # model = custom(path='path/to/model.pt') # custom
152
153 # Images
154 imgs = [
155 'data/images/zidane.jpg', # filename
156 Path('data/images/zidane.jpg'), # Path
157 'https://ultralytics.com/images/zidane.jpg', # URI
158 cv2.imread('data/images/bus.jpg')[:, :, ::-1], # OpenCV
159 Image.open('data/images/bus.jpg'), # PIL
160 np.zeros((320, 640, 3))] # numpy
161
162 # Inference
163 results = model(imgs, size=320) # batched inference
164
165 # Results
166 results.print()
167 results.save()
1 import copy
2 import os
3 import sys
4 from pathlib import Path
5 import numpy as np
6 import torch
7
8 from utils.augmentations import letterbox
9
10 FILE = Path(__file__).resolve()
11 ROOT = FILE.parents[0] # YOLOv5 root directory
12 if str(ROOT) not in sys.path:
13 sys.path.append(str(ROOT)) # add ROOT to PATH
14 ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
15 from models.common import DetectMultiBackend
16 from utils.general import (check_img_size, cv2, non_max_suppression, scale_boxes)
17 from utils.torch_utils import select_device, smart_inference_mode
18 from models.yolov5_config import config
19
20 classes = ['tampered']
21
22
23 def gen_result_dict(boxes, label_list=[], std=False):
24 result = {
25 "error_code": 1,
26 "result": []
27 }
28 rs_box = {
29 "class": '',
30 "score": 0,
31 "left": 0,
32 "top": 0,
33 "width": 0,
34 "height": 0
35 }
36
37 if not label_list:
38 label_list = classes
39
40 for box in boxes:
41 result['error_code'] = 0
42 box_dict = copy.deepcopy(rs_box)
43 if std:
44 box_dict['class'] = str(int(box[-1]))
45 else:
46 box_dict['class'] = label_list[int(box[-1])]
47
48 box_dict['left'] = int(round(box[0], 0))
49 box_dict['top'] = int(round(box[1], 0))
50 box_dict['width'] = int(round(box[2], 0) - round(box[0], 0))
51 box_dict['height'] = int(round(box[3], 0) - (round(box[1], 0)))
52 box_dict['score'] = box[-2]
53 result['result'].append(box_dict)
54 return result
55
56
57 def keep_resize_padding(image):
58 h, w, c = image.shape
59 if h >= w:
60 pad1 = (h - w) // 2
61 pad2 = h - w - pad1
62 p1 = np.ones((h, pad1, 3)) * 114.0
63 p2 = np.ones((h, pad2, 3)) * 114.0
64 p1, p2 = p1.astype(np.uint8), p2.astype(np.uint8)
65 new_image = np.hstack((p1, image, p2))
66 else:
67 pad1 = (w - h) // 2
68 pad2 = w - h - pad1
69 p1 = np.ones((pad1, w, 3)) * 114.0
70 p2 = np.ones((pad2, w, 3)) * 114.0
71 p1, p2 = p1.astype(np.uint8), p2.astype(np.uint8)
72 new_image = np.vstack((p1, image, p2))
73 new_image = cv2.resize(new_image, (640, 640))
74 return new_image
75
76
77 class Yolov5:
78 def __init__(self, cfg=None):
79 self.cfg = cfg
80 self.device = select_device(self.cfg.device)
81 self.model = DetectMultiBackend(self.cfg.weights, device=self.device, dnn=False, data=self.cfg.data, fp16=False)
82
83 def detect(self, image):
84 image0 = image.copy()
85 stride, names, pt = self.model.stride, self.model.names, self.model.pt
86 imgsz = check_img_size(self.cfg.imgsz, s=stride) # check image size
87 # Dataloader
88 bs = 1 # batch_size
89 # im = letterbox(image, imgsz, stride=stride, auto=True)[0] # padded resize
90 # hh, ww, cc = im.shape
91 # tlen1 = (640 - hh) // 2
92 # tlen2 = 640 - hh - tlen1
93 # t1 = np.zeros((tlen1, ww, cc))
94 # t2 = np.zeros((tlen2, ww, cc))
95 # im = np.vstack((t1, im, t2))
96 im = keep_resize_padding(image)
97
98 # print(im.shape)
99 im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
100 im = np.ascontiguousarray(im) # contiguous
101 # Run inference
102 self.model.warmup(imgsz=(1 if pt or self.model.triton else bs, 3, *imgsz)) # warmup
103 im = torch.from_numpy(im).to(self.model.device)
104 im = im.half() if self.model.fp16 else im.float() # uint8 to fp16/32
105 im /= 255 # 0 - 255 to 0.0 - 1.0
106
107 if len(im.shape) == 3:
108 im = im[None] # expand for batch dim
109 # Inference
110 pred = self.model(im, augment=False, visualize=False)
111 # print(pred[0].shape)
112 # exit(0)
113 # NMS
114 pred = non_max_suppression(pred, self.cfg.conf_thres, self.cfg.iou_thres, None, False, max_det=self.cfg.max_det)
115 det = pred[0]
116 # if len(det):
117 det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], image0.shape).round()
118 result = gen_result_dict(det.cpu().numpy().tolist())
119 return result
120
121 def plot(self, image, boxes):
122 for box in boxes:
123 cv2.rectangle(image, (box[0], box[1], box[2], box[3]), (0, 0, 255), 2)
124 return image
125
126
127 if __name__ == "__main__":
128 img = cv2.imread(
129 '/data/situ_invoice_bill_data/qfs_train_val_data/train_data/authentic/gongshang/images/val/_1594890232.0110397page_11_img_0_name_au_gongshang.jpg')
130 detector = Yolov5(config)
131 result = detector.detect(img)
132 for i in result['result']:
133 position = list(i.values())[2:]
134 print(position)
135 cv2.rectangle(img, (position[0], position[1]), (position[0] + position[2], position[1] + position[3]),
136 (0, 0, 255))
137 cv2.imshow('w', img)
138 cv2.waitKey(0)
139 print(result)
File mode changed
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Common modules
4 """
5
6 import json
7 import math
8 import platform
9 import warnings
10 from collections import OrderedDict, namedtuple
11 from copy import copy
12 from pathlib import Path
13 from urllib.parse import urlparse
14
15 import cv2
16 import numpy as np
17 import pandas as pd
18 import requests
19 import torch
20 import torch.nn as nn
21 from PIL import Image
22 from torch.cuda import amp
23
24 from utils.dataloaders import exif_transpose, letterbox
25 from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
26 increment_path, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy, xyxy2xywh,
27 yaml_load)
28 from utils.plots import Annotator, colors, save_one_box
29 from utils.torch_utils import copy_attr, smart_inference_mode
30
31
32 def autopad(k, p=None, d=1): # kernel, padding, dilation
33 # Pad to 'same' shape outputs
34 if d > 1:
35 k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
36 if p is None:
37 p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
38 return p
39
40
41 class Conv(nn.Module):
42 # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
43 default_act = nn.SiLU() # default activation
44
45 def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
46 super().__init__()
47 self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
48 self.bn = nn.BatchNorm2d(c2)
49 self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
50
51 def forward(self, x):
52 return self.act(self.bn(self.conv(x)))
53
54 def forward_fuse(self, x):
55 return self.act(self.conv(x))
56
57
58 class DWConv(Conv):
59 # Depth-wise convolution
60 def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
61 super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
62
63
64 class DWConvTranspose2d(nn.ConvTranspose2d):
65 # Depth-wise transpose convolution
66 def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
67 super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
68
69
70 class TransformerLayer(nn.Module):
71 # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
72 def __init__(self, c, num_heads):
73 super().__init__()
74 self.q = nn.Linear(c, c, bias=False)
75 self.k = nn.Linear(c, c, bias=False)
76 self.v = nn.Linear(c, c, bias=False)
77 self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
78 self.fc1 = nn.Linear(c, c, bias=False)
79 self.fc2 = nn.Linear(c, c, bias=False)
80
81 def forward(self, x):
82 x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
83 x = self.fc2(self.fc1(x)) + x
84 return x
85
86
87 class TransformerBlock(nn.Module):
88 # Vision Transformer https://arxiv.org/abs/2010.11929
89 def __init__(self, c1, c2, num_heads, num_layers):
90 super().__init__()
91 self.conv = None
92 if c1 != c2:
93 self.conv = Conv(c1, c2)
94 self.linear = nn.Linear(c2, c2) # learnable position embedding
95 self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
96 self.c2 = c2
97
98 def forward(self, x):
99 if self.conv is not None:
100 x = self.conv(x)
101 b, _, w, h = x.shape
102 p = x.flatten(2).permute(2, 0, 1)
103 return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
104
105
106 class Bottleneck(nn.Module):
107 # Standard bottleneck
108 def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
109 super().__init__()
110 c_ = int(c2 * e) # hidden channels
111 self.cv1 = Conv(c1, c_, 1, 1)
112 self.cv2 = Conv(c_, c2, 3, 1, g=g)
113 self.add = shortcut and c1 == c2
114
115 def forward(self, x):
116 return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
117
118
119 class BottleneckCSP(nn.Module):
120 # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
121 def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
122 super().__init__()
123 c_ = int(c2 * e) # hidden channels
124 self.cv1 = Conv(c1, c_, 1, 1)
125 self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
126 self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
127 self.cv4 = Conv(2 * c_, c2, 1, 1)
128 self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
129 self.act = nn.SiLU()
130 self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
131
132 def forward(self, x):
133 y1 = self.cv3(self.m(self.cv1(x)))
134 y2 = self.cv2(x)
135 return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
136
137
138 class CrossConv(nn.Module):
139 # Cross Convolution Downsample
140 def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
141 # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
142 super().__init__()
143 c_ = int(c2 * e) # hidden channels
144 self.cv1 = Conv(c1, c_, (1, k), (1, s))
145 self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
146 self.add = shortcut and c1 == c2
147
148 def forward(self, x):
149 return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
150
151
152 class C3(nn.Module):
153 # CSP Bottleneck with 3 convolutions
154 def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
155 super().__init__()
156 c_ = int(c2 * e) # hidden channels
157 self.cv1 = Conv(c1, c_, 1, 1)
158 self.cv2 = Conv(c1, c_, 1, 1)
159 self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
160 self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
161
162 def forward(self, x):
163 return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
164
165
166 class C3x(C3):
167 # C3 module with cross-convolutions
168 def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
169 super().__init__(c1, c2, n, shortcut, g, e)
170 c_ = int(c2 * e)
171 self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
172
173
174 class C3TR(C3):
175 # C3 module with TransformerBlock()
176 def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
177 super().__init__(c1, c2, n, shortcut, g, e)
178 c_ = int(c2 * e)
179 self.m = TransformerBlock(c_, c_, 4, n)
180
181
182 class C3SPP(C3):
183 # C3 module with SPP()
184 def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
185 super().__init__(c1, c2, n, shortcut, g, e)
186 c_ = int(c2 * e)
187 self.m = SPP(c_, c_, k)
188
189
190 class C3Ghost(C3):
191 # C3 module with GhostBottleneck()
192 def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
193 super().__init__(c1, c2, n, shortcut, g, e)
194 c_ = int(c2 * e) # hidden channels
195 self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
196
197
198 class SPP(nn.Module):
199 # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
200 def __init__(self, c1, c2, k=(5, 9, 13)):
201 super().__init__()
202 c_ = c1 // 2 # hidden channels
203 self.cv1 = Conv(c1, c_, 1, 1)
204 self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
205 self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
206
207 def forward(self, x):
208 x = self.cv1(x)
209 with warnings.catch_warnings():
210 warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
211 return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
212
213
214 class SPPF(nn.Module):
215 # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
216 def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
217 super().__init__()
218 c_ = c1 // 2 # hidden channels
219 self.cv1 = Conv(c1, c_, 1, 1)
220 self.cv2 = Conv(c_ * 4, c2, 1, 1)
221 self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
222
223 def forward(self, x):
224 x = self.cv1(x)
225 with warnings.catch_warnings():
226 warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
227 y1 = self.m(x)
228 y2 = self.m(y1)
229 return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
230
231
232 class Focus(nn.Module):
233 # Focus wh information into c-space
234 def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
235 super().__init__()
236 self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
237 # self.contract = Contract(gain=2)
238
239 def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
240 return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
241 # return self.conv(self.contract(x))
242
243
244 class GhostConv(nn.Module):
245 # Ghost Convolution https://github.com/huawei-noah/ghostnet
246 def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
247 super().__init__()
248 c_ = c2 // 2 # hidden channels
249 self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
250 self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
251
252 def forward(self, x):
253 y = self.cv1(x)
254 return torch.cat((y, self.cv2(y)), 1)
255
256
257 class GhostBottleneck(nn.Module):
258 # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
259 def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
260 super().__init__()
261 c_ = c2 // 2
262 self.conv = nn.Sequential(
263 GhostConv(c1, c_, 1, 1), # pw
264 DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
265 GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
266 self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
267 act=False)) if s == 2 else nn.Identity()
268
269 def forward(self, x):
270 return self.conv(x) + self.shortcut(x)
271
272
273 class Contract(nn.Module):
274 # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
275 def __init__(self, gain=2):
276 super().__init__()
277 self.gain = gain
278
279 def forward(self, x):
280 b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
281 s = self.gain
282 x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
283 x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
284 return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
285
286
287 class Expand(nn.Module):
288 # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
289 def __init__(self, gain=2):
290 super().__init__()
291 self.gain = gain
292
293 def forward(self, x):
294 b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
295 s = self.gain
296 x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
297 x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
298 return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
299
300
301 class Concat(nn.Module):
302 # Concatenate a list of tensors along dimension
303 def __init__(self, dimension=1):
304 super().__init__()
305 self.d = dimension
306
307 def forward(self, x):
308 return torch.cat(x, self.d)
309
310
311 class DetectMultiBackend(nn.Module):
312 # YOLOv5 MultiBackend class for python inference on various backends
313 def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
314 # Usage:
315 # PyTorch: weights = *.pt
316 # TorchScript: *.torchscript
317 # ONNX Runtime: *.onnx
318 # ONNX OpenCV DNN: *.onnx --dnn
319 # OpenVINO: *.xml
320 # CoreML: *.mlmodel
321 # TensorRT: *.engine
322 # TensorFlow SavedModel: *_saved_model
323 # TensorFlow GraphDef: *.pb
324 # TensorFlow Lite: *.tflite
325 # TensorFlow Edge TPU: *_edgetpu.tflite
326 # PaddlePaddle: *_paddle_model
327 from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
328
329 super().__init__()
330 w = str(weights[0] if isinstance(weights, list) else weights)
331 pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
332 fp16 &= pt or jit or onnx or engine # FP16
333 nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
334 stride = 32 # default stride
335 cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
336 if not (pt or triton):
337 w = attempt_download(w) # download if not local
338
339 if pt: # PyTorch
340 model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
341 stride = max(int(model.stride.max()), 32) # model stride
342 names = model.module.names if hasattr(model, 'module') else model.names # get class names
343 model.half() if fp16 else model.float()
344 self.model = model # explicitly assign for to(), cpu(), cuda(), half()
345 elif jit: # TorchScript
346 LOGGER.info(f'Loading {w} for TorchScript inference...')
347 extra_files = {'config.txt': ''} # model metadata
348 model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
349 model.half() if fp16 else model.float()
350 if extra_files['config.txt']: # load metadata dict
351 d = json.loads(extra_files['config.txt'],
352 object_hook=lambda d: {int(k) if k.isdigit() else k: v
353 for k, v in d.items()})
354 stride, names = int(d['stride']), d['names']
355 elif dnn: # ONNX OpenCV DNN
356 LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
357 check_requirements('opencv-python>=4.5.4')
358 net = cv2.dnn.readNetFromONNX(w)
359 elif onnx: # ONNX Runtime
360 LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
361 check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
362 import onnxruntime
363 providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
364 session = onnxruntime.InferenceSession(w, providers=providers)
365 output_names = [x.name for x in session.get_outputs()]
366 meta = session.get_modelmeta().custom_metadata_map # metadata
367 if 'stride' in meta:
368 stride, names = int(meta['stride']), eval(meta['names'])
369 elif xml: # OpenVINO
370 LOGGER.info(f'Loading {w} for OpenVINO inference...')
371 check_requirements('openvino') # requires openvino-dev: https://pypi.org/project/openvino-dev/
372 from openvino.runtime import Core, Layout, get_batch
373 ie = Core()
374 if not Path(w).is_file(): # if not *.xml
375 w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
376 network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
377 if network.get_parameters()[0].get_layout().empty:
378 network.get_parameters()[0].set_layout(Layout("NCHW"))
379 batch_dim = get_batch(network)
380 if batch_dim.is_static:
381 batch_size = batch_dim.get_length()
382 executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2
383 stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata
384 elif engine: # TensorRT
385 LOGGER.info(f'Loading {w} for TensorRT inference...')
386 import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
387 check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
388 if device.type == 'cpu':
389 device = torch.device('cuda:0')
390 Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
391 logger = trt.Logger(trt.Logger.INFO)
392 with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
393 model = runtime.deserialize_cuda_engine(f.read())
394 context = model.create_execution_context()
395 bindings = OrderedDict()
396 output_names = []
397 fp16 = False # default updated below
398 dynamic = False
399 for i in range(model.num_bindings):
400 name = model.get_binding_name(i)
401 dtype = trt.nptype(model.get_binding_dtype(i))
402 if model.binding_is_input(i):
403 if -1 in tuple(model.get_binding_shape(i)): # dynamic
404 dynamic = True
405 context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
406 if dtype == np.float16:
407 fp16 = True
408 else: # output
409 output_names.append(name)
410 shape = tuple(context.get_binding_shape(i))
411 im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
412 bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
413 binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
414 batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
415 elif coreml: # CoreML
416 LOGGER.info(f'Loading {w} for CoreML inference...')
417 import coremltools as ct
418 model = ct.models.MLModel(w)
419 elif saved_model: # TF SavedModel
420 LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
421 import tensorflow as tf
422 keras = False # assume TF1 saved_model
423 model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
424 elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
425 LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
426 import tensorflow as tf
427
428 def wrap_frozen_graph(gd, inputs, outputs):
429 x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
430 ge = x.graph.as_graph_element
431 return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
432
433 def gd_outputs(gd):
434 name_list, input_list = [], []
435 for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
436 name_list.append(node.name)
437 input_list.extend(node.input)
438 return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
439
440 gd = tf.Graph().as_graph_def() # TF GraphDef
441 with open(w, 'rb') as f:
442 gd.ParseFromString(f.read())
443 frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))
444 elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
445 try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
446 from tflite_runtime.interpreter import Interpreter, load_delegate
447 except ImportError:
448 import tensorflow as tf
449 Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
450 if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
451 LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
452 delegate = {
453 'Linux': 'libedgetpu.so.1',
454 'Darwin': 'libedgetpu.1.dylib',
455 'Windows': 'edgetpu.dll'}[platform.system()]
456 interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
457 else: # TFLite
458 LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
459 interpreter = Interpreter(model_path=w) # load TFLite model
460 interpreter.allocate_tensors() # allocate
461 input_details = interpreter.get_input_details() # inputs
462 output_details = interpreter.get_output_details() # outputs
463 elif tfjs: # TF.js
464 raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported')
465 elif paddle: # PaddlePaddle
466 LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
467 check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
468 import paddle.inference as pdi
469 if not Path(w).is_file(): # if not *.pdmodel
470 w = next(Path(w).rglob('*.pdmodel')) # get *.xml file from *_openvino_model dir
471 weights = Path(w).with_suffix('.pdiparams')
472 config = pdi.Config(str(w), str(weights))
473 if cuda:
474 config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
475 predictor = pdi.create_predictor(config)
476 input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
477 output_names = predictor.get_output_names()
478 elif triton: # NVIDIA Triton Inference Server
479 LOGGER.info(f'Using {w} as Triton Inference Server...')
480 check_requirements('tritonclient[all]')
481 from utils.triton import TritonRemoteModel
482 model = TritonRemoteModel(url=w)
483 nhwc = model.runtime.startswith("tensorflow")
484 else:
485 raise NotImplementedError(f'ERROR: {w} is not a supported format')
486
487 # class names
488 if 'names' not in locals():
489 names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
490 if names[0] == 'n01440764' and len(names) == 1000: # ImageNet
491 names = yaml_load(ROOT / 'data/ImageNet.yaml')['names'] # human-readable names
492
493 self.__dict__.update(locals()) # assign all variables to self
494
495 def forward(self, im, augment=False, visualize=False):
496 # YOLOv5 MultiBackend inference
497 b, ch, h, w = im.shape # batch, channel, height, width
498 if self.fp16 and im.dtype != torch.float16:
499 im = im.half() # to FP16
500 if self.nhwc:
501 im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
502
503 if self.pt: # PyTorch
504 y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
505 elif self.jit: # TorchScript
506 y = self.model(im)
507 elif self.dnn: # ONNX OpenCV DNN
508 im = im.cpu().numpy() # torch to numpy
509 self.net.setInput(im)
510 y = self.net.forward()
511 elif self.onnx: # ONNX Runtime
512 im = im.cpu().numpy() # torch to numpy
513 y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
514 elif self.xml: # OpenVINO
515 im = im.cpu().numpy() # FP32
516 y = list(self.executable_network([im]).values())
517 elif self.engine: # TensorRT
518 if self.dynamic and im.shape != self.bindings['images'].shape:
519 i = self.model.get_binding_index('images')
520 self.context.set_binding_shape(i, im.shape) # reshape if dynamic
521 self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
522 for name in self.output_names:
523 i = self.model.get_binding_index(name)
524 self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
525 s = self.bindings['images'].shape
526 assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
527 self.binding_addrs['images'] = int(im.data_ptr())
528 self.context.execute_v2(list(self.binding_addrs.values()))
529 y = [self.bindings[x].data for x in sorted(self.output_names)]
530 elif self.coreml: # CoreML
531 im = im.cpu().numpy()
532 im = Image.fromarray((im[0] * 255).astype('uint8'))
533 # im = im.resize((192, 320), Image.ANTIALIAS)
534 y = self.model.predict({'image': im}) # coordinates are xywh normalized
535 if 'confidence' in y:
536 box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
537 conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
538 y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
539 else:
540 y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
541 elif self.paddle: # PaddlePaddle
542 im = im.cpu().numpy().astype(np.float32)
543 self.input_handle.copy_from_cpu(im)
544 self.predictor.run()
545 y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
546 elif self.triton: # NVIDIA Triton Inference Server
547 y = self.model(im)
548 else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
549 im = im.cpu().numpy()
550 if self.saved_model: # SavedModel
551 y = self.model(im, training=False) if self.keras else self.model(im)
552 elif self.pb: # GraphDef
553 y = self.frozen_func(x=self.tf.constant(im))
554 else: # Lite or Edge TPU
555 input = self.input_details[0]
556 int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
557 if int8:
558 scale, zero_point = input['quantization']
559 im = (im / scale + zero_point).astype(np.uint8) # de-scale
560 self.interpreter.set_tensor(input['index'], im)
561 self.interpreter.invoke()
562 y = []
563 for output in self.output_details:
564 x = self.interpreter.get_tensor(output['index'])
565 if int8:
566 scale, zero_point = output['quantization']
567 x = (x.astype(np.float32) - zero_point) * scale # re-scale
568 y.append(x)
569 y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
570 y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
571
572 if isinstance(y, (list, tuple)):
573 return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
574 else:
575 return self.from_numpy(y)
576
577 def from_numpy(self, x):
578 return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
579
580 def warmup(self, imgsz=(1, 3, 640, 640)):
581 # Warmup model by running inference once
582 warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
583 if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
584 im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
585 for _ in range(2 if self.jit else 1): #
586 self.forward(im) # warmup
587
588 @staticmethod
589 def _model_type(p='path/to/model.pt'):
590 # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
591 # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
592 from export import export_formats
593 from utils.downloads import is_url
594 sf = list(export_formats().Suffix) # export suffixes
595 if not is_url(p, check=False):
596 check_suffix(p, sf) # checks
597 url = urlparse(p) # if url may be Triton inference server
598 types = [s in Path(p).name for s in sf]
599 types[8] &= not types[9] # tflite &= not edgetpu
600 triton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc])
601 return types + [triton]
602
603 @staticmethod
604 def _load_metadata(f=Path('path/to/meta.yaml')):
605 # Load metadata from meta.yaml if it exists
606 if f.exists():
607 d = yaml_load(f)
608 return d['stride'], d['names'] # assign stride, names
609 return None, None
610
611
612 class AutoShape(nn.Module):
613 # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
614 conf = 0.25 # NMS confidence threshold
615 iou = 0.45 # NMS IoU threshold
616 agnostic = False # NMS class-agnostic
617 multi_label = False # NMS multiple labels per box
618 classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
619 max_det = 1000 # maximum number of detections per image
620 amp = False # Automatic Mixed Precision (AMP) inference
621
622 def __init__(self, model, verbose=True):
623 super().__init__()
624 if verbose:
625 LOGGER.info('Adding AutoShape... ')
626 copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
627 self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
628 self.pt = not self.dmb or model.pt # PyTorch model
629 self.model = model.eval()
630 if self.pt:
631 m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
632 m.inplace = False # Detect.inplace=False for safe multithread inference
633 m.export = True # do not output loss values
634
635 def _apply(self, fn):
636 # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
637 self = super()._apply(fn)
638 if self.pt:
639 m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
640 m.stride = fn(m.stride)
641 m.grid = list(map(fn, m.grid))
642 if isinstance(m.anchor_grid, list):
643 m.anchor_grid = list(map(fn, m.anchor_grid))
644 return self
645
646 @smart_inference_mode()
647 def forward(self, ims, size=640, augment=False, profile=False):
648 # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
649 # file: ims = 'data/images/zidane.jpg' # str or PosixPath
650 # URI: = 'https://ultralytics.com/images/zidane.jpg'
651 # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
652 # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
653 # numpy: = np.zeros((640,1280,3)) # HWC
654 # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
655 # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
656
657 dt = (Profile(), Profile(), Profile())
658 with dt[0]:
659 if isinstance(size, int): # expand
660 size = (size, size)
661 p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
662 autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
663 if isinstance(ims, torch.Tensor): # torch
664 with amp.autocast(autocast):
665 return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
666
667 # Pre-process
668 n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
669 shape0, shape1, files = [], [], [] # image and inference shapes, filenames
670 for i, im in enumerate(ims):
671 f = f'image{i}' # filename
672 if isinstance(im, (str, Path)): # filename or uri
673 im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
674 im = np.asarray(exif_transpose(im))
675 elif isinstance(im, Image.Image): # PIL Image
676 im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
677 files.append(Path(f).with_suffix('.jpg').name)
678 if im.shape[0] < 5: # image in CHW
679 im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
680 im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
681 s = im.shape[:2] # HWC
682 shape0.append(s) # image shape
683 g = max(size) / max(s) # gain
684 shape1.append([y * g for y in s])
685 ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
686 shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] if self.pt else size # inf shape
687 x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
688 x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
689 x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
690
691 with amp.autocast(autocast):
692 # Inference
693 with dt[1]:
694 y = self.model(x, augment=augment) # forward
695
696 # Post-process
697 with dt[2]:
698 y = non_max_suppression(y if self.dmb else y[0],
699 self.conf,
700 self.iou,
701 self.classes,
702 self.agnostic,
703 self.multi_label,
704 max_det=self.max_det) # NMS
705 for i in range(n):
706 scale_boxes(shape1, y[i][:, :4], shape0[i])
707
708 return Detections(ims, y, files, dt, self.names, x.shape)
709
710
711 class Detections:
712 # YOLOv5 detections class for inference results
713 def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
714 super().__init__()
715 d = pred[0].device # device
716 gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
717 self.ims = ims # list of images as numpy arrays
718 self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
719 self.names = names # class names
720 self.files = files # image filenames
721 self.times = times # profiling times
722 self.xyxy = pred # xyxy pixels
723 self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
724 self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
725 self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
726 self.n = len(self.pred) # number of images (batch size)
727 self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
728 self.s = tuple(shape) # inference BCHW shape
729
730 def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
731 s, crops = '', []
732 for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
733 s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
734 if pred.shape[0]:
735 for c in pred[:, -1].unique():
736 n = (pred[:, -1] == c).sum() # detections per class
737 s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
738 s = s.rstrip(', ')
739 if show or save or render or crop:
740 annotator = Annotator(im, example=str(self.names))
741 for *box, conf, cls in reversed(pred): # xyxy, confidence, class
742 label = f'{self.names[int(cls)]} {conf:.2f}'
743 if crop:
744 file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
745 crops.append({
746 'box': box,
747 'conf': conf,
748 'cls': cls,
749 'label': label,
750 'im': save_one_box(box, im, file=file, save=save)})
751 else: # all others
752 annotator.box_label(box, label if labels else '', color=colors(cls))
753 im = annotator.im
754 else:
755 s += '(no detections)'
756
757 im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
758 if show:
759 im.show(self.files[i]) # show
760 if save:
761 f = self.files[i]
762 im.save(save_dir / f) # save
763 if i == self.n - 1:
764 LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
765 if render:
766 self.ims[i] = np.asarray(im)
767 if pprint:
768 s = s.lstrip('\n')
769 return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
770 if crop:
771 if save:
772 LOGGER.info(f'Saved results to {save_dir}\n')
773 return crops
774
775 def show(self, labels=True):
776 self._run(show=True, labels=labels) # show results
777
778 def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
779 save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
780 self._run(save=True, labels=labels, save_dir=save_dir) # save results
781
782 def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
783 save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
784 return self._run(crop=True, save=save, save_dir=save_dir) # crop results
785
786 def render(self, labels=True):
787 self._run(render=True, labels=labels) # render results
788 return self.ims
789
790 def pandas(self):
791 # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
792 new = copy(self) # return copy
793 ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
794 cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
795 for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
796 a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
797 setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
798 return new
799
800 def tolist(self):
801 # return a list of Detections objects, i.e. 'for result in results.tolist():'
802 r = range(self.n) # iterable
803 x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
804 # for d in x:
805 # for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
806 # setattr(d, k, getattr(d, k)[0]) # pop out of list
807 return x
808
809 def print(self):
810 LOGGER.info(self.__str__())
811
812 def __len__(self): # override len(results)
813 return self.n
814
815 def __str__(self): # override print(results)
816 return self._run(pprint=True) # print results
817
818 def __repr__(self):
819 return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
820
821
822 class Proto(nn.Module):
823 # YOLOv5 mask Proto module for segmentation models
824 def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
825 super().__init__()
826 self.cv1 = Conv(c1, c_, k=3)
827 self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
828 self.cv2 = Conv(c_, c_, k=3)
829 self.cv3 = Conv(c_, c2)
830
831 def forward(self, x):
832 return self.cv3(self.cv2(self.upsample(self.cv1(x))))
833
834
835 class Classify(nn.Module):
836 # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
837 def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
838 super().__init__()
839 c_ = 1280 # efficientnet_b0 size
840 self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
841 self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1)
842 self.drop = nn.Dropout(p=0.0, inplace=True)
843 self.linear = nn.Linear(c_, c2) # to x(b,c2)
844
845 def forward(self, x):
846 if isinstance(x, list):
847 x = torch.cat(x, 1)
848 return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Experimental modules
4 """
5 import math
6
7 import numpy as np
8 import torch
9 import torch.nn as nn
10
11 from utils.downloads import attempt_download
12
13
14 class Sum(nn.Module):
15 # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
16 def __init__(self, n, weight=False): # n: number of inputs
17 super().__init__()
18 self.weight = weight # apply weights boolean
19 self.iter = range(n - 1) # iter object
20 if weight:
21 self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
22
23 def forward(self, x):
24 y = x[0] # no weight
25 if self.weight:
26 w = torch.sigmoid(self.w) * 2
27 for i in self.iter:
28 y = y + x[i + 1] * w[i]
29 else:
30 for i in self.iter:
31 y = y + x[i + 1]
32 return y
33
34
35 class MixConv2d(nn.Module):
36 # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
37 def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy
38 super().__init__()
39 n = len(k) # number of convolutions
40 if equal_ch: # equal c_ per group
41 i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices
42 c_ = [(i == g).sum() for g in range(n)] # intermediate channels
43 else: # equal weight.numel() per group
44 b = [c2] + [0] * n
45 a = np.eye(n + 1, n, k=-1)
46 a -= np.roll(a, 1, axis=1)
47 a *= np.array(k) ** 2
48 a[0] = 1
49 c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
50
51 self.m = nn.ModuleList([
52 nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
53 self.bn = nn.BatchNorm2d(c2)
54 self.act = nn.SiLU()
55
56 def forward(self, x):
57 return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
58
59
60 class Ensemble(nn.ModuleList):
61 # Ensemble of models
62 def __init__(self):
63 super().__init__()
64
65 def forward(self, x, augment=False, profile=False, visualize=False):
66 y = [module(x, augment, profile, visualize)[0] for module in self]
67 # y = torch.stack(y).max(0)[0] # max ensemble
68 # y = torch.stack(y).mean(0) # mean ensemble
69 y = torch.cat(y, 1) # nms ensemble
70 return y, None # inference, train output
71
72
73 def attempt_load(weights, device=None, inplace=True, fuse=True):
74 # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
75 from models.yolo import Detect, Model
76
77 model = Ensemble()
78 for w in weights if isinstance(weights, list) else [weights]:
79 ckpt = torch.load(attempt_download(w), map_location='cpu') # load
80 ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
81
82 # Model compatibility updates
83 if not hasattr(ckpt, 'stride'):
84 ckpt.stride = torch.tensor([32.])
85 if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):
86 ckpt.names = dict(enumerate(ckpt.names)) # convert to dict
87
88 model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode
89
90 # Module compatibility updates
91 for m in model.modules():
92 t = type(m)
93 if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
94 m.inplace = inplace # torch 1.7.0 compatibility
95 if t is Detect and not isinstance(m.anchor_grid, list):
96 delattr(m, 'anchor_grid')
97 setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
98 elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
99 m.recompute_scale_factor = None # torch 1.11.0 compatibility
100
101 # Return model
102 if len(model) == 1:
103 return model[-1]
104
105 # Return detection ensemble
106 print(f'Ensemble created with {weights}\n')
107 for k in 'names', 'nc', 'yaml':
108 setattr(model, k, getattr(model[0], k))
109 model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
110 assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
111 return model
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Default anchors for COCO data
3
4
5 # P5 -------------------------------------------------------------------------------------------------------------------
6 # P5-640:
7 anchors_p5_640:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12
13 # P6 -------------------------------------------------------------------------------------------------------------------
14 # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
15 anchors_p6_640:
16 - [9,11, 21,19, 17,41] # P3/8
17 - [43,32, 39,70, 86,64] # P4/16
18 - [65,131, 134,130, 120,265] # P5/32
19 - [282,180, 247,354, 512,387] # P6/64
20
21 # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
22 anchors_p6_1280:
23 - [19,27, 44,40, 38,94] # P3/8
24 - [96,68, 86,152, 180,137] # P4/16
25 - [140,301, 303,264, 238,542] # P5/32
26 - [436,615, 739,380, 925,792] # P6/64
27
28 # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
29 anchors_p6_1920:
30 - [28,41, 67,59, 57,141] # P3/8
31 - [144,103, 129,227, 270,205] # P4/16
32 - [209,452, 455,396, 358,812] # P5/32
33 - [653,922, 1109,570, 1387,1187] # P6/64
34
35
36 # P7 -------------------------------------------------------------------------------------------------------------------
37 # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
38 anchors_p7_640:
39 - [11,11, 13,30, 29,20] # P3/8
40 - [30,46, 61,38, 39,92] # P4/16
41 - [78,80, 146,66, 79,163] # P5/32
42 - [149,150, 321,143, 157,303] # P6/64
43 - [257,402, 359,290, 524,372] # P7/128
44
45 # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
46 anchors_p7_1280:
47 - [19,22, 54,36, 32,77] # P3/8
48 - [70,83, 138,71, 75,173] # P4/16
49 - [165,159, 148,334, 375,151] # P5/32
50 - [334,317, 251,626, 499,474] # P6/64
51 - [750,326, 534,814, 1079,818] # P7/128
52
53 # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
54 anchors_p7_1920:
55 - [29,34, 81,55, 47,115] # P3/8
56 - [105,124, 207,107, 113,259] # P4/16
57 - [247,238, 222,500, 563,227] # P5/32
58 - [501,476, 376,939, 749,711] # P6/64
59 - [1126,489, 801,1222, 1618,1227] # P7/128
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # darknet53 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [32, 3, 1]], # 0
16 [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17 [-1, 1, Bottleneck, [64]],
18 [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19 [-1, 2, Bottleneck, [128]],
20 [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21 [-1, 8, Bottleneck, [256]],
22 [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23 [-1, 8, Bottleneck, [512]],
24 [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 [-1, 4, Bottleneck, [1024]], # 10
26 ]
27
28 # YOLOv3-SPP head
29 head:
30 [[-1, 1, Bottleneck, [1024, False]],
31 [-1, 1, SPP, [512, [5, 9, 13]]],
32 [-1, 1, Conv, [1024, 3, 1]],
33 [-1, 1, Conv, [512, 1, 1]],
34 [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35
36 [-2, 1, Conv, [256, 1, 1]],
37 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 [[-1, 8], 1, Concat, [1]], # cat backbone P4
39 [-1, 1, Bottleneck, [512, False]],
40 [-1, 1, Bottleneck, [512, False]],
41 [-1, 1, Conv, [256, 1, 1]],
42 [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43
44 [-2, 1, Conv, [128, 1, 1]],
45 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 [[-1, 6], 1, Concat, [1]], # cat backbone P3
47 [-1, 1, Bottleneck, [256, False]],
48 [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49
50 [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors:
8 - [10,14, 23,27, 37,58] # P4/16
9 - [81,82, 135,169, 344,319] # P5/32
10
11 # YOLOv3-tiny backbone
12 backbone:
13 # [from, number, module, args]
14 [[-1, 1, Conv, [16, 3, 1]], # 0
15 [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
16 [-1, 1, Conv, [32, 3, 1]],
17 [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
18 [-1, 1, Conv, [64, 3, 1]],
19 [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
20 [-1, 1, Conv, [128, 3, 1]],
21 [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
22 [-1, 1, Conv, [256, 3, 1]],
23 [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
24 [-1, 1, Conv, [512, 3, 1]],
25 [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
26 [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
27 ]
28
29 # YOLOv3-tiny head
30 head:
31 [[-1, 1, Conv, [1024, 3, 1]],
32 [-1, 1, Conv, [256, 1, 1]],
33 [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
34
35 [-2, 1, Conv, [128, 1, 1]],
36 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 [[-1, 8], 1, Concat, [1]], # cat backbone P4
38 [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
39
40 [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
41 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # darknet53 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [32, 3, 1]], # 0
16 [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17 [-1, 1, Bottleneck, [64]],
18 [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19 [-1, 2, Bottleneck, [128]],
20 [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21 [-1, 8, Bottleneck, [256]],
22 [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23 [-1, 8, Bottleneck, [512]],
24 [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 [-1, 4, Bottleneck, [1024]], # 10
26 ]
27
28 # YOLOv3 head
29 head:
30 [[-1, 1, Bottleneck, [1024, False]],
31 [-1, 1, Conv, [512, 1, 1]],
32 [-1, 1, Conv, [1024, 3, 1]],
33 [-1, 1, Conv, [512, 1, 1]],
34 [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35
36 [-2, 1, Conv, [256, 1, 1]],
37 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 [[-1, 8], 1, Concat, [1]], # cat backbone P4
39 [-1, 1, Bottleneck, [512, False]],
40 [-1, 1, Bottleneck, [512, False]],
41 [-1, 1, Conv, [256, 1, 1]],
42 [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43
44 [-2, 1, Conv, [128, 1, 1]],
45 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 [[-1, 6], 1, Concat, [1]], # cat backbone P3
47 [-1, 1, Bottleneck, [256, False]],
48 [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49
50 [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 BiFPN head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 FPN head
28 head:
29 [[-1, 3, C3, [1024, False]], # 10 (P5/32-large)
30
31 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 [-1, 1, Conv, [512, 1, 1]],
34 [-1, 3, C3, [512, False]], # 14 (P4/16-medium)
35
36 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 [-1, 1, Conv, [256, 1, 1]],
39 [-1, 3, C3, [256, False]], # 18 (P3/8-small)
40
41 [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
42 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
9 # YOLOv5 v6.0 backbone
10 backbone:
11 # [from, number, module, args]
12 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
13 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
14 [-1, 3, C3, [128]],
15 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
16 [-1, 6, C3, [256]],
17 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
18 [-1, 9, C3, [512]],
19 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
20 [-1, 3, C3, [1024]],
21 [-1, 1, SPPF, [1024, 5]], # 9
22 ]
23
24 # YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
25 head:
26 [[-1, 1, Conv, [512, 1, 1]],
27 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
28 [[-1, 6], 1, Concat, [1]], # cat backbone P4
29 [-1, 3, C3, [512, False]], # 13
30
31 [-1, 1, Conv, [256, 1, 1]],
32 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33 [[-1, 4], 1, Concat, [1]], # cat backbone P3
34 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
35
36 [-1, 1, Conv, [128, 1, 1]],
37 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 [[-1, 2], 1, Concat, [1]], # cat backbone P2
39 [-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
40
41 [-1, 1, Conv, [128, 3, 2]],
42 [[-1, 18], 1, Concat, [1]], # cat head P3
43 [-1, 3, C3, [256, False]], # 24 (P3/8-small)
44
45 [-1, 1, Conv, [256, 3, 2]],
46 [[-1, 14], 1, Concat, [1]], # cat head P4
47 [-1, 3, C3, [512, False]], # 27 (P4/16-medium)
48
49 [-1, 1, Conv, [512, 3, 2]],
50 [[-1, 10], 1, Concat, [1]], # cat head P5
51 [-1, 3, C3, [1024, False]], # 30 (P5/32-large)
52
53 [[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
54 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.33 # model depth multiple
6 width_multiple: 0.50 # layer channel multiple
7 anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
9 # YOLOv5 v6.0 backbone
10 backbone:
11 # [from, number, module, args]
12 [ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ], # 0-P1/2
13 [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
14 [ -1, 3, C3, [ 128 ] ],
15 [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
16 [ -1, 6, C3, [ 256 ] ],
17 [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
18 [ -1, 9, C3, [ 512 ] ],
19 [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
20 [ -1, 3, C3, [ 1024 ] ],
21 [ -1, 1, SPPF, [ 1024, 5 ] ], # 9
22 ]
23
24 # YOLOv5 v6.0 head with (P3, P4) outputs
25 head:
26 [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
27 [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
28 [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
29 [ -1, 3, C3, [ 512, False ] ], # 13
30
31 [ -1, 1, Conv, [ 256, 1, 1 ] ],
32 [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
33 [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
34 [ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)
35
36 [ -1, 1, Conv, [ 256, 3, 2 ] ],
37 [ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
38 [ -1, 3, C3, [ 512, False ] ], # 20 (P4/16-medium)
39
40 [ [ 17, 20 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4)
41 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
9 # YOLOv5 v6.0 backbone
10 backbone:
11 # [from, number, module, args]
12 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
13 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
14 [-1, 3, C3, [128]],
15 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
16 [-1, 6, C3, [256]],
17 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
18 [-1, 9, C3, [512]],
19 [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
20 [-1, 3, C3, [768]],
21 [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
22 [-1, 3, C3, [1024]],
23 [-1, 1, SPPF, [1024, 5]], # 11
24 ]
25
26 # YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
27 head:
28 [[-1, 1, Conv, [768, 1, 1]],
29 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30 [[-1, 8], 1, Concat, [1]], # cat backbone P5
31 [-1, 3, C3, [768, False]], # 15
32
33 [-1, 1, Conv, [512, 1, 1]],
34 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35 [[-1, 6], 1, Concat, [1]], # cat backbone P4
36 [-1, 3, C3, [512, False]], # 19
37
38 [-1, 1, Conv, [256, 1, 1]],
39 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40 [[-1, 4], 1, Concat, [1]], # cat backbone P3
41 [-1, 3, C3, [256, False]], # 23 (P3/8-small)
42
43 [-1, 1, Conv, [256, 3, 2]],
44 [[-1, 20], 1, Concat, [1]], # cat head P4
45 [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
46
47 [-1, 1, Conv, [512, 3, 2]],
48 [[-1, 16], 1, Concat, [1]], # cat head P5
49 [-1, 3, C3, [768, False]], # 29 (P5/32-large)
50
51 [-1, 1, Conv, [768, 3, 2]],
52 [[-1, 12], 1, Concat, [1]], # cat head P6
53 [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
54
55 [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
56 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
9 # YOLOv5 v6.0 backbone
10 backbone:
11 # [from, number, module, args]
12 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
13 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
14 [-1, 3, C3, [128]],
15 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
16 [-1, 6, C3, [256]],
17 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
18 [-1, 9, C3, [512]],
19 [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
20 [-1, 3, C3, [768]],
21 [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
22 [-1, 3, C3, [1024]],
23 [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
24 [-1, 3, C3, [1280]],
25 [-1, 1, SPPF, [1280, 5]], # 13
26 ]
27
28 # YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
29 head:
30 [[-1, 1, Conv, [1024, 1, 1]],
31 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 [[-1, 10], 1, Concat, [1]], # cat backbone P6
33 [-1, 3, C3, [1024, False]], # 17
34
35 [-1, 1, Conv, [768, 1, 1]],
36 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 [[-1, 8], 1, Concat, [1]], # cat backbone P5
38 [-1, 3, C3, [768, False]], # 21
39
40 [-1, 1, Conv, [512, 1, 1]],
41 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
42 [[-1, 6], 1, Concat, [1]], # cat backbone P4
43 [-1, 3, C3, [512, False]], # 25
44
45 [-1, 1, Conv, [256, 1, 1]],
46 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
47 [[-1, 4], 1, Concat, [1]], # cat backbone P3
48 [-1, 3, C3, [256, False]], # 29 (P3/8-small)
49
50 [-1, 1, Conv, [256, 3, 2]],
51 [[-1, 26], 1, Concat, [1]], # cat head P4
52 [-1, 3, C3, [512, False]], # 32 (P4/16-medium)
53
54 [-1, 1, Conv, [512, 3, 2]],
55 [[-1, 22], 1, Concat, [1]], # cat head P5
56 [-1, 3, C3, [768, False]], # 35 (P5/32-large)
57
58 [-1, 1, Conv, [768, 3, 2]],
59 [[-1, 18], 1, Concat, [1]], # cat head P6
60 [-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
61
62 [-1, 1, Conv, [1024, 3, 2]],
63 [[-1, 14], 1, Concat, [1]], # cat head P7
64 [-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
65
66 [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
67 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 PANet head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors:
8 - [19,27, 44,40, 38,94] # P3/8
9 - [96,68, 86,152, 180,137] # P4/16
10 - [140,301, 303,264, 238,542] # P5/32
11 - [436,615, 739,380, 925,792] # P6/64
12
13 # YOLOv5 v6.0 backbone
14 backbone:
15 # [from, number, module, args]
16 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 [-1, 3, C3, [128]],
19 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 [-1, 6, C3, [256]],
21 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 [-1, 9, C3, [512]],
23 [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24 [-1, 3, C3, [768]],
25 [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26 [-1, 3, C3, [1024]],
27 [-1, 1, SPPF, [1024, 5]], # 11
28 ]
29
30 # YOLOv5 v6.0 head
31 head:
32 [[-1, 1, Conv, [768, 1, 1]],
33 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 [[-1, 8], 1, Concat, [1]], # cat backbone P5
35 [-1, 3, C3, [768, False]], # 15
36
37 [-1, 1, Conv, [512, 1, 1]],
38 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 [[-1, 6], 1, Concat, [1]], # cat backbone P4
40 [-1, 3, C3, [512, False]], # 19
41
42 [-1, 1, Conv, [256, 1, 1]],
43 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 [[-1, 4], 1, Concat, [1]], # cat backbone P3
45 [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46
47 [-1, 1, Conv, [256, 3, 2]],
48 [[-1, 20], 1, Concat, [1]], # cat head P4
49 [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50
51 [-1, 1, Conv, [512, 3, 2]],
52 [[-1, 16], 1, Concat, [1]], # cat head P5
53 [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54
55 [-1, 1, Conv, [768, 3, 2]],
56 [[-1, 12], 1, Concat, [1]], # cat head P6
57 [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58
59 [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.67 # model depth multiple
6 width_multiple: 0.75 # layer channel multiple
7 anchors:
8 - [19,27, 44,40, 38,94] # P3/8
9 - [96,68, 86,152, 180,137] # P4/16
10 - [140,301, 303,264, 238,542] # P5/32
11 - [436,615, 739,380, 925,792] # P6/64
12
13 # YOLOv5 v6.0 backbone
14 backbone:
15 # [from, number, module, args]
16 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 [-1, 3, C3, [128]],
19 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 [-1, 6, C3, [256]],
21 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 [-1, 9, C3, [512]],
23 [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24 [-1, 3, C3, [768]],
25 [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26 [-1, 3, C3, [1024]],
27 [-1, 1, SPPF, [1024, 5]], # 11
28 ]
29
30 # YOLOv5 v6.0 head
31 head:
32 [[-1, 1, Conv, [768, 1, 1]],
33 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 [[-1, 8], 1, Concat, [1]], # cat backbone P5
35 [-1, 3, C3, [768, False]], # 15
36
37 [-1, 1, Conv, [512, 1, 1]],
38 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 [[-1, 6], 1, Concat, [1]], # cat backbone P4
40 [-1, 3, C3, [512, False]], # 19
41
42 [-1, 1, Conv, [256, 1, 1]],
43 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 [[-1, 4], 1, Concat, [1]], # cat backbone P3
45 [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46
47 [-1, 1, Conv, [256, 3, 2]],
48 [[-1, 20], 1, Concat, [1]], # cat head P4
49 [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50
51 [-1, 1, Conv, [512, 3, 2]],
52 [[-1, 16], 1, Concat, [1]], # cat head P5
53 [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54
55 [-1, 1, Conv, [768, 3, 2]],
56 [[-1, 12], 1, Concat, [1]], # cat head P6
57 [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58
59 [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.33 # model depth multiple
6 width_multiple: 0.25 # layer channel multiple
7 anchors:
8 - [19,27, 44,40, 38,94] # P3/8
9 - [96,68, 86,152, 180,137] # P4/16
10 - [140,301, 303,264, 238,542] # P5/32
11 - [436,615, 739,380, 925,792] # P6/64
12
13 # YOLOv5 v6.0 backbone
14 backbone:
15 # [from, number, module, args]
16 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 [-1, 3, C3, [128]],
19 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 [-1, 6, C3, [256]],
21 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 [-1, 9, C3, [512]],
23 [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24 [-1, 3, C3, [768]],
25 [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26 [-1, 3, C3, [1024]],
27 [-1, 1, SPPF, [1024, 5]], # 11
28 ]
29
30 # YOLOv5 v6.0 head
31 head:
32 [[-1, 1, Conv, [768, 1, 1]],
33 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 [[-1, 8], 1, Concat, [1]], # cat backbone P5
35 [-1, 3, C3, [768, False]], # 15
36
37 [-1, 1, Conv, [512, 1, 1]],
38 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 [[-1, 6], 1, Concat, [1]], # cat backbone P4
40 [-1, 3, C3, [512, False]], # 19
41
42 [-1, 1, Conv, [256, 1, 1]],
43 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 [[-1, 4], 1, Concat, [1]], # cat backbone P3
45 [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46
47 [-1, 1, Conv, [256, 3, 2]],
48 [[-1, 20], 1, Concat, [1]], # cat head P4
49 [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50
51 [-1, 1, Conv, [512, 3, 2]],
52 [[-1, 16], 1, Concat, [1]], # cat head P5
53 [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54
55 [-1, 1, Conv, [768, 3, 2]],
56 [[-1, 12], 1, Concat, [1]], # cat head P6
57 [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58
59 [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
6 depth_multiple: 0.33 # model depth multiple
7 width_multiple: 0.50 # layer channel multiple
8 anchors:
9 - [10,13, 16,30, 33,23] # P3/8
10 - [30,61, 62,45, 59,119] # P4/16
11 - [116,90, 156,198, 373,326] # P5/32
12
13 # YOLOv5 v6.0 backbone
14 backbone:
15 # [from, number, module, args]
16 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 [-1, 3, C3, [128]],
19 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 [-1, 6, C3, [256]],
21 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 [-1, 9, C3, [512]],
23 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24 [-1, 3, C3, [1024]],
25 [-1, 1, SPPF, [1024, 5]], # 9
26 ]
27
28 # YOLOv5 v6.0 head
29 head:
30 [[-1, 1, Conv, [512, 1, 1]],
31 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 [-1, 3, C3, [512, False]], # 13
34
35 [-1, 1, Conv, [256, 1, 1]],
36 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
40 [-1, 1, Conv, [256, 3, 2]],
41 [[-1, 14], 1, Concat, [1]], # cat head P4
42 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
44 [-1, 1, Conv, [512, 3, 2]],
45 [[-1, 10], 1, Concat, [1]], # cat head P5
46 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
48 [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.33 # model depth multiple
6 width_multiple: 0.50 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3Ghost, [128]],
18 [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3Ghost, [256]],
20 [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3Ghost, [512]],
22 [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3Ghost, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, GhostConv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3Ghost, [512, False]], # 13
33
34 [-1, 1, GhostConv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, GhostConv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, GhostConv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.33 # model depth multiple
6 width_multiple: 0.50 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.33 # model depth multiple
6 width_multiple: 0.50 # layer channel multiple
7 anchors:
8 - [19,27, 44,40, 38,94] # P3/8
9 - [96,68, 86,152, 180,137] # P4/16
10 - [140,301, 303,264, 238,542] # P5/32
11 - [436,615, 739,380, 925,792] # P6/64
12
13 # YOLOv5 v6.0 backbone
14 backbone:
15 # [from, number, module, args]
16 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 [-1, 3, C3, [128]],
19 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 [-1, 6, C3, [256]],
21 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 [-1, 9, C3, [512]],
23 [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24 [-1, 3, C3, [768]],
25 [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26 [-1, 3, C3, [1024]],
27 [-1, 1, SPPF, [1024, 5]], # 11
28 ]
29
30 # YOLOv5 v6.0 head
31 head:
32 [[-1, 1, Conv, [768, 1, 1]],
33 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 [[-1, 8], 1, Concat, [1]], # cat backbone P5
35 [-1, 3, C3, [768, False]], # 15
36
37 [-1, 1, Conv, [512, 1, 1]],
38 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 [[-1, 6], 1, Concat, [1]], # cat backbone P4
40 [-1, 3, C3, [512, False]], # 19
41
42 [-1, 1, Conv, [256, 1, 1]],
43 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 [[-1, 4], 1, Concat, [1]], # cat backbone P3
45 [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46
47 [-1, 1, Conv, [256, 3, 2]],
48 [[-1, 20], 1, Concat, [1]], # cat head P4
49 [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50
51 [-1, 1, Conv, [512, 3, 2]],
52 [[-1, 16], 1, Concat, [1]], # cat head P5
53 [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54
55 [-1, 1, Conv, [768, 3, 2]],
56 [[-1, 12], 1, Concat, [1]], # cat head P6
57 [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58
59 [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.33 # model depth multiple
6 width_multiple: 1.25 # layer channel multiple
7 anchors:
8 - [19,27, 44,40, 38,94] # P3/8
9 - [96,68, 86,152, 180,137] # P4/16
10 - [140,301, 303,264, 238,542] # P5/32
11 - [436,615, 739,380, 925,792] # P6/64
12
13 # YOLOv5 v6.0 backbone
14 backbone:
15 # [from, number, module, args]
16 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 [-1, 3, C3, [128]],
19 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 [-1, 6, C3, [256]],
21 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 [-1, 9, C3, [512]],
23 [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24 [-1, 3, C3, [768]],
25 [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26 [-1, 3, C3, [1024]],
27 [-1, 1, SPPF, [1024, 5]], # 11
28 ]
29
30 # YOLOv5 v6.0 head
31 head:
32 [[-1, 1, Conv, [768, 1, 1]],
33 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 [[-1, 8], 1, Concat, [1]], # cat backbone P5
35 [-1, 3, C3, [768, False]], # 15
36
37 [-1, 1, Conv, [512, 1, 1]],
38 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 [[-1, 6], 1, Concat, [1]], # cat backbone P4
40 [-1, 3, C3, [512, False]], # 19
41
42 [-1, 1, Conv, [256, 1, 1]],
43 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 [[-1, 4], 1, Concat, [1]], # cat backbone P3
45 [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46
47 [-1, 1, Conv, [256, 3, 2]],
48 [[-1, 20], 1, Concat, [1]], # cat head P4
49 [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50
51 [-1, 1, Conv, [512, 3, 2]],
52 [[-1, 16], 1, Concat, [1]], # cat head P5
53 [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54
55 [-1, 1, Conv, [768, 3, 2]],
56 [[-1, 12], 1, Concat, [1]], # cat head P6
57 [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58
59 [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.67 # model depth multiple
6 width_multiple: 0.75 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48 ]
...\ No newline at end of file ...\ No newline at end of file
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.33 # model depth multiple
6 width_multiple: 0.25 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.33 # model depth multiple
6 width_multiple: 0.5 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48 ]
...\ No newline at end of file ...\ No newline at end of file
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.33 # model depth multiple
6 width_multiple: 1.25 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 TensorFlow, Keras and TFLite versions of YOLOv5
4 Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
5
6 Usage:
7 $ python models/tf.py --weights yolov5s.pt
8
9 Export:
10 $ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
11 """
12
13 import argparse
14 import sys
15 from copy import deepcopy
16 from pathlib import Path
17
18 FILE = Path(__file__).resolve()
19 ROOT = FILE.parents[1] # YOLOv5 root directory
20 if str(ROOT) not in sys.path:
21 sys.path.append(str(ROOT)) # add ROOT to PATH
22 # ROOT = ROOT.relative_to(Path.cwd()) # relative
23
24 import numpy as np
25 import tensorflow as tf
26 import torch
27 import torch.nn as nn
28 from tensorflow import keras
29
30 from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
31 DWConvTranspose2d, Focus, autopad)
32 from models.experimental import MixConv2d, attempt_load
33 from models.yolo import Detect, Segment
34 from utils.activations import SiLU
35 from utils.general import LOGGER, make_divisible, print_args
36
37
38 class TFBN(keras.layers.Layer):
39 # TensorFlow BatchNormalization wrapper
40 def __init__(self, w=None):
41 super().__init__()
42 self.bn = keras.layers.BatchNormalization(
43 beta_initializer=keras.initializers.Constant(w.bias.numpy()),
44 gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
45 moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
46 moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
47 epsilon=w.eps)
48
49 def call(self, inputs):
50 return self.bn(inputs)
51
52
53 class TFPad(keras.layers.Layer):
54 # Pad inputs in spatial dimensions 1 and 2
55 def __init__(self, pad):
56 super().__init__()
57 if isinstance(pad, int):
58 self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
59 else: # tuple/list
60 self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
61
62 def call(self, inputs):
63 return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
64
65
66 class TFConv(keras.layers.Layer):
67 # Standard convolution
68 def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
69 # ch_in, ch_out, weights, kernel, stride, padding, groups
70 super().__init__()
71 assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
72 # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
73 # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
74 conv = keras.layers.Conv2D(
75 filters=c2,
76 kernel_size=k,
77 strides=s,
78 padding='SAME' if s == 1 else 'VALID',
79 use_bias=not hasattr(w, 'bn'),
80 kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
81 bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
82 self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
83 self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
84 self.act = activations(w.act) if act else tf.identity
85
86 def call(self, inputs):
87 return self.act(self.bn(self.conv(inputs)))
88
89
90 class TFDWConv(keras.layers.Layer):
91 # Depthwise convolution
92 def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
93 # ch_in, ch_out, weights, kernel, stride, padding, groups
94 super().__init__()
95 assert c2 % c1 == 0, f'TFDWConv() output={c2} must be a multiple of input={c1} channels'
96 conv = keras.layers.DepthwiseConv2D(
97 kernel_size=k,
98 depth_multiplier=c2 // c1,
99 strides=s,
100 padding='SAME' if s == 1 else 'VALID',
101 use_bias=not hasattr(w, 'bn'),
102 depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
103 bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
104 self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
105 self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
106 self.act = activations(w.act) if act else tf.identity
107
108 def call(self, inputs):
109 return self.act(self.bn(self.conv(inputs)))
110
111
112 class TFDWConvTranspose2d(keras.layers.Layer):
113 # Depthwise ConvTranspose2d
114 def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
115 # ch_in, ch_out, weights, kernel, stride, padding, groups
116 super().__init__()
117 assert c1 == c2, f'TFDWConv() output={c2} must be equal to input={c1} channels'
118 assert k == 4 and p1 == 1, 'TFDWConv() only valid for k=4 and p1=1'
119 weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
120 self.c1 = c1
121 self.conv = [
122 keras.layers.Conv2DTranspose(filters=1,
123 kernel_size=k,
124 strides=s,
125 padding='VALID',
126 output_padding=p2,
127 use_bias=True,
128 kernel_initializer=keras.initializers.Constant(weight[..., i:i + 1]),
129 bias_initializer=keras.initializers.Constant(bias[i])) for i in range(c1)]
130
131 def call(self, inputs):
132 return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
133
134
135 class TFFocus(keras.layers.Layer):
136 # Focus wh information into c-space
137 def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
138 # ch_in, ch_out, kernel, stride, padding, groups
139 super().__init__()
140 self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
141
142 def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
143 # inputs = inputs / 255 # normalize 0-255 to 0-1
144 inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
145 return self.conv(tf.concat(inputs, 3))
146
147
148 class TFBottleneck(keras.layers.Layer):
149 # Standard bottleneck
150 def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
151 super().__init__()
152 c_ = int(c2 * e) # hidden channels
153 self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
154 self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
155 self.add = shortcut and c1 == c2
156
157 def call(self, inputs):
158 return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
159
160
161 class TFCrossConv(keras.layers.Layer):
162 # Cross Convolution
163 def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
164 super().__init__()
165 c_ = int(c2 * e) # hidden channels
166 self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
167 self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
168 self.add = shortcut and c1 == c2
169
170 def call(self, inputs):
171 return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
172
173
174 class TFConv2d(keras.layers.Layer):
175 # Substitution for PyTorch nn.Conv2D
176 def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
177 super().__init__()
178 assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
179 self.conv = keras.layers.Conv2D(filters=c2,
180 kernel_size=k,
181 strides=s,
182 padding='VALID',
183 use_bias=bias,
184 kernel_initializer=keras.initializers.Constant(
185 w.weight.permute(2, 3, 1, 0).numpy()),
186 bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None)
187
188 def call(self, inputs):
189 return self.conv(inputs)
190
191
192 class TFBottleneckCSP(keras.layers.Layer):
193 # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
194 def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
195 # ch_in, ch_out, number, shortcut, groups, expansion
196 super().__init__()
197 c_ = int(c2 * e) # hidden channels
198 self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
199 self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
200 self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
201 self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
202 self.bn = TFBN(w.bn)
203 self.act = lambda x: keras.activations.swish(x)
204 self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
205
206 def call(self, inputs):
207 y1 = self.cv3(self.m(self.cv1(inputs)))
208 y2 = self.cv2(inputs)
209 return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
210
211
212 class TFC3(keras.layers.Layer):
213 # CSP Bottleneck with 3 convolutions
214 def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
215 # ch_in, ch_out, number, shortcut, groups, expansion
216 super().__init__()
217 c_ = int(c2 * e) # hidden channels
218 self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
219 self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
220 self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
221 self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
222
223 def call(self, inputs):
224 return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
225
226
227 class TFC3x(keras.layers.Layer):
228 # 3 module with cross-convolutions
229 def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
230 # ch_in, ch_out, number, shortcut, groups, expansion
231 super().__init__()
232 c_ = int(c2 * e) # hidden channels
233 self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
234 self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
235 self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
236 self.m = keras.Sequential([
237 TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)])
238
239 def call(self, inputs):
240 return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
241
242
243 class TFSPP(keras.layers.Layer):
244 # Spatial pyramid pooling layer used in YOLOv3-SPP
245 def __init__(self, c1, c2, k=(5, 9, 13), w=None):
246 super().__init__()
247 c_ = c1 // 2 # hidden channels
248 self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
249 self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
250 self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
251
252 def call(self, inputs):
253 x = self.cv1(inputs)
254 return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
255
256
257 class TFSPPF(keras.layers.Layer):
258 # Spatial pyramid pooling-Fast layer
259 def __init__(self, c1, c2, k=5, w=None):
260 super().__init__()
261 c_ = c1 // 2 # hidden channels
262 self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
263 self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
264 self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
265
266 def call(self, inputs):
267 x = self.cv1(inputs)
268 y1 = self.m(x)
269 y2 = self.m(y1)
270 return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
271
272
273 class TFDetect(keras.layers.Layer):
274 # TF YOLOv5 Detect layer
275 def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
276 super().__init__()
277 self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
278 self.nc = nc # number of classes
279 self.no = nc + 5 # number of outputs per anchor
280 self.nl = len(anchors) # number of detection layers
281 self.na = len(anchors[0]) // 2 # number of anchors
282 self.grid = [tf.zeros(1)] * self.nl # init grid
283 self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
284 self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
285 self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
286 self.training = False # set to False after building model
287 self.imgsz = imgsz
288 for i in range(self.nl):
289 ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
290 self.grid[i] = self._make_grid(nx, ny)
291
292 def call(self, inputs):
293 z = [] # inference output
294 x = []
295 for i in range(self.nl):
296 x.append(self.m[i](inputs[i]))
297 # x(bs,20,20,255) to x(bs,3,20,20,85)
298 ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
299 x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
300
301 if not self.training: # inference
302 y = x[i]
303 grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
304 anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
305 xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy
306 wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
307 # Normalize xywh to 0-1 to reduce calibration error
308 xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
309 wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
310 y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
311 z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
312
313 return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
314
315 @staticmethod
316 def _make_grid(nx=20, ny=20):
317 # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
318 # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
319 xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
320 return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
321
322
323 class TFSegment(TFDetect):
324 # YOLOv5 Segment head for segmentation models
325 def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
326 super().__init__(nc, anchors, ch, imgsz, w)
327 self.nm = nm # number of masks
328 self.npr = npr # number of protos
329 self.no = 5 + nc + self.nm # number of outputs per anchor
330 self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv
331 self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos
332 self.detect = TFDetect.call
333
334 def call(self, x):
335 p = self.proto(x[0])
336 p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
337 x = self.detect(self, x)
338 return (x, p) if self.training else (x[0], p)
339
340
341 class TFProto(keras.layers.Layer):
342
343 def __init__(self, c1, c_=256, c2=32, w=None):
344 super().__init__()
345 self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
346 self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
347 self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
348 self.cv3 = TFConv(c_, c2, w=w.cv3)
349
350 def call(self, inputs):
351 return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
352
353
354 class TFUpsample(keras.layers.Layer):
355 # TF version of torch.nn.Upsample()
356 def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
357 super().__init__()
358 assert scale_factor == 2, "scale_factor must be 2"
359 self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
360 # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
361 # with default arguments: align_corners=False, half_pixel_centers=False
362 # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
363 # size=(x.shape[1] * 2, x.shape[2] * 2))
364
365 def call(self, inputs):
366 return self.upsample(inputs)
367
368
369 class TFConcat(keras.layers.Layer):
370 # TF version of torch.concat()
371 def __init__(self, dimension=1, w=None):
372 super().__init__()
373 assert dimension == 1, "convert only NCHW to NHWC concat"
374 self.d = 3
375
376 def call(self, inputs):
377 return tf.concat(inputs, self.d)
378
379
380 def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
381 LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
382 anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
383 na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
384 no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
385
386 layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
387 for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
388 m_str = m
389 m = eval(m) if isinstance(m, str) else m # eval strings
390 for j, a in enumerate(args):
391 try:
392 args[j] = eval(a) if isinstance(a, str) else a # eval strings
393 except NameError:
394 pass
395
396 n = max(round(n * gd), 1) if n > 1 else n # depth gain
397 if m in [
398 nn.Conv2d, Conv, DWConv, DWConvTranspose2d, Bottleneck, SPP, SPPF, MixConv2d, Focus, CrossConv,
399 BottleneckCSP, C3, C3x]:
400 c1, c2 = ch[f], args[0]
401 c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
402
403 args = [c1, c2, *args[1:]]
404 if m in [BottleneckCSP, C3, C3x]:
405 args.insert(2, n)
406 n = 1
407 elif m is nn.BatchNorm2d:
408 args = [ch[f]]
409 elif m is Concat:
410 c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
411 elif m in [Detect, Segment]:
412 args.append([ch[x + 1] for x in f])
413 if isinstance(args[1], int): # number of anchors
414 args[1] = [list(range(args[1] * 2))] * len(f)
415 if m is Segment:
416 args[3] = make_divisible(args[3] * gw, 8)
417 args.append(imgsz)
418 else:
419 c2 = ch[f]
420
421 tf_m = eval('TF' + m_str.replace('nn.', ''))
422 m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
423 else tf_m(*args, w=model.model[i]) # module
424
425 torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
426 t = str(m)[8:-2].replace('__main__.', '') # module type
427 np = sum(x.numel() for x in torch_m_.parameters()) # number params
428 m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
429 LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print
430 save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
431 layers.append(m_)
432 ch.append(c2)
433 return keras.Sequential(layers), sorted(save)
434
435
436 class TFModel:
437 # TF YOLOv5 model
438 def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
439 super().__init__()
440 if isinstance(cfg, dict):
441 self.yaml = cfg # model dict
442 else: # is *.yaml
443 import yaml # for torch hub
444 self.yaml_file = Path(cfg).name
445 with open(cfg) as f:
446 self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
447
448 # Define model
449 if nc and nc != self.yaml['nc']:
450 LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
451 self.yaml['nc'] = nc # override yaml value
452 self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
453
454 def predict(self,
455 inputs,
456 tf_nms=False,
457 agnostic_nms=False,
458 topk_per_class=100,
459 topk_all=100,
460 iou_thres=0.45,
461 conf_thres=0.25):
462 y = [] # outputs
463 x = inputs
464 for m in self.model.layers:
465 if m.f != -1: # if not from previous layer
466 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
467
468 x = m(x) # run
469 y.append(x if m.i in self.savelist else None) # save output
470
471 # Add TensorFlow NMS
472 if tf_nms:
473 boxes = self._xywh2xyxy(x[0][..., :4])
474 probs = x[0][:, :, 4:5]
475 classes = x[0][:, :, 5:]
476 scores = probs * classes
477 if agnostic_nms:
478 nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
479 else:
480 boxes = tf.expand_dims(boxes, 2)
481 nms = tf.image.combined_non_max_suppression(boxes,
482 scores,
483 topk_per_class,
484 topk_all,
485 iou_thres,
486 conf_thres,
487 clip_boxes=False)
488 return (nms,)
489 return x # output [1,6300,85] = [xywh, conf, class0, class1, ...]
490 # x = x[0] # [x(1,6300,85), ...] to x(6300,85)
491 # xywh = x[..., :4] # x(6300,4) boxes
492 # conf = x[..., 4:5] # x(6300,1) confidences
493 # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
494 # return tf.concat([conf, cls, xywh], 1)
495
496 @staticmethod
497 def _xywh2xyxy(xywh):
498 # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
499 x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
500 return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
501
502
503 class AgnosticNMS(keras.layers.Layer):
504 # TF Agnostic NMS
505 def call(self, input, topk_all, iou_thres, conf_thres):
506 # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
507 return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
508 input,
509 fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
510 name='agnostic_nms')
511
512 @staticmethod
513 def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS
514 boxes, classes, scores = x
515 class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
516 scores_inp = tf.reduce_max(scores, -1)
517 selected_inds = tf.image.non_max_suppression(boxes,
518 scores_inp,
519 max_output_size=topk_all,
520 iou_threshold=iou_thres,
521 score_threshold=conf_thres)
522 selected_boxes = tf.gather(boxes, selected_inds)
523 padded_boxes = tf.pad(selected_boxes,
524 paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
525 mode="CONSTANT",
526 constant_values=0.0)
527 selected_scores = tf.gather(scores_inp, selected_inds)
528 padded_scores = tf.pad(selected_scores,
529 paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
530 mode="CONSTANT",
531 constant_values=-1.0)
532 selected_classes = tf.gather(class_inds, selected_inds)
533 padded_classes = tf.pad(selected_classes,
534 paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
535 mode="CONSTANT",
536 constant_values=-1.0)
537 valid_detections = tf.shape(selected_inds)[0]
538 return padded_boxes, padded_scores, padded_classes, valid_detections
539
540
541 def activations(act=nn.SiLU):
542 # Returns TF activation from input PyTorch activation
543 if isinstance(act, nn.LeakyReLU):
544 return lambda x: keras.activations.relu(x, alpha=0.1)
545 elif isinstance(act, nn.Hardswish):
546 return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
547 elif isinstance(act, (nn.SiLU, SiLU)):
548 return lambda x: keras.activations.swish(x)
549 else:
550 raise Exception(f'no matching TensorFlow activation found for PyTorch activation {act}')
551
552
553 def representative_dataset_gen(dataset, ncalib=100):
554 # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
555 for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
556 im = np.transpose(img, [1, 2, 0])
557 im = np.expand_dims(im, axis=0).astype(np.float32)
558 im /= 255
559 yield [im]
560 if n >= ncalib:
561 break
562
563
564 def run(
565 weights=ROOT / 'yolov5s.pt', # weights path
566 imgsz=(640, 640), # inference size h,w
567 batch_size=1, # batch size
568 dynamic=False, # dynamic batch size
569 ):
570 # PyTorch model
571 im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
572 model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False)
573 _ = model(im) # inference
574 model.info()
575
576 # TensorFlow model
577 im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
578 tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
579 _ = tf_model.predict(im) # inference
580
581 # Keras model
582 im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
583 keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
584 keras_model.summary()
585
586 LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
587
588
589 def parse_opt():
590 parser = argparse.ArgumentParser()
591 parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
592 parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
593 parser.add_argument('--batch-size', type=int, default=1, help='batch size')
594 parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
595 opt = parser.parse_args()
596 opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
597 print_args(vars(opt))
598 return opt
599
600
601 def main(opt):
602 run(**vars(opt))
603
604
605 if __name__ == "__main__":
606 opt = parse_opt()
607 main(opt)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 YOLO-specific modules
4
5 Usage:
6 $ python models/yolo.py --cfg yolov5s.yaml
7 """
8
9 import argparse
10 import contextlib
11 import os
12 import platform
13 import sys
14 from copy import deepcopy
15 from pathlib import Path
16
17 FILE = Path(__file__).resolve()
18 ROOT = FILE.parents[1] # YOLOv5 root directory
19 if str(ROOT) not in sys.path:
20 sys.path.append(str(ROOT)) # add ROOT to PATH
21 if platform.system() != 'Windows':
22 ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
23
24 from models.common import *
25 from models.experimental import *
26 from utils.autoanchor import check_anchor_order
27 from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
28 from utils.plots import feature_visualization
29 from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
30 time_sync)
31
32 try:
33 import thop # for FLOPs computation
34 except ImportError:
35 thop = None
36
37
38 class Detect(nn.Module):
39 # YOLOv5 Detect head for detection models
40 stride = None # strides computed during build
41 dynamic = False # force grid reconstruction
42 export = False # export mode
43
44 def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
45 super().__init__()
46 self.nc = nc # number of classes
47 self.no = nc + 5 # number of outputs per anchor
48 self.nl = len(anchors) # number of detection layers
49 self.na = len(anchors[0]) // 2 # number of anchors
50 self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
51 self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
52 self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
53 self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
54 self.inplace = inplace # use inplace ops (e.g. slice assignment)
55
56 def forward(self, x):
57 z = [] # inference output
58 for i in range(self.nl):
59 x[i] = self.m[i](x[i]) # conv
60 bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
61 x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
62
63 if not self.training: # inference
64 if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
65 self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
66
67 if isinstance(self, Segment): # (boxes + masks)
68 xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
69 xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
70 wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
71 y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
72 else: # Detect (boxes only)
73 xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
74 xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
75 wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
76 y = torch.cat((xy, wh, conf), 4)
77 z.append(y.view(bs, self.na * nx * ny, self.no))
78
79 return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
80
81 def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, '1.10.0')):
82 d = self.anchors[i].device
83 t = self.anchors[i].dtype
84 shape = 1, self.na, ny, nx, 2 # grid shape
85 y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
86 yv, xv = torch.meshgrid(y, x, indexing='ij') if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility
87 grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
88 anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
89 return grid, anchor_grid
90
91
92 class Segment(Detect):
93 # YOLOv5 Segment head for segmentation models
94 def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
95 super().__init__(nc, anchors, ch, inplace)
96 self.nm = nm # number of masks
97 self.npr = npr # number of protos
98 self.no = 5 + nc + self.nm # number of outputs per anchor
99 self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
100 self.proto = Proto(ch[0], self.npr, self.nm) # protos
101 self.detect = Detect.forward
102
103 def forward(self, x):
104 p = self.proto(x[0])
105 x = self.detect(self, x)
106 return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
107
108
109 class BaseModel(nn.Module):
110 # YOLOv5 base model
111 def forward(self, x, profile=False, visualize=False):
112 return self._forward_once(x, profile, visualize) # single-scale inference, train
113
114 def _forward_once(self, x, profile=False, visualize=False):
115 y, dt = [], [] # outputs
116 for m in self.model:
117 if m.f != -1: # if not from previous layer
118 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
119 if profile:
120 self._profile_one_layer(m, x, dt)
121 x = m(x) # run
122 y.append(x if m.i in self.save else None) # save output
123 if visualize:
124 feature_visualization(x, m.type, m.i, save_dir=visualize)
125 return x
126
127 def _profile_one_layer(self, m, x, dt):
128 c = m == self.model[-1] # is final layer, copy input as inplace fix
129 o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
130 t = time_sync()
131 for _ in range(10):
132 m(x.copy() if c else x)
133 dt.append((time_sync() - t) * 100)
134 if m == self.model[0]:
135 LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
136 LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
137 if c:
138 LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
139
140 def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
141 LOGGER.info('Fusing layers... ')
142 for m in self.model.modules():
143 if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
144 m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
145 delattr(m, 'bn') # remove batchnorm
146 m.forward = m.forward_fuse # update forward
147 self.info()
148 return self
149
150 def info(self, verbose=False, img_size=640): # print model information
151 model_info(self, verbose, img_size)
152
153 def _apply(self, fn):
154 # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
155 self = super()._apply(fn)
156 m = self.model[-1] # Detect()
157 if isinstance(m, (Detect, Segment)):
158 m.stride = fn(m.stride)
159 m.grid = list(map(fn, m.grid))
160 if isinstance(m.anchor_grid, list):
161 m.anchor_grid = list(map(fn, m.anchor_grid))
162 return self
163
164
165 class DetectionModel(BaseModel):
166 # YOLOv5 detection model
167 def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
168 super().__init__()
169 if isinstance(cfg, dict):
170 self.yaml = cfg # model dict
171 else: # is *.yaml
172 import yaml # for torch hub
173 self.yaml_file = Path(cfg).name
174 with open(cfg, encoding='ascii', errors='ignore') as f:
175 self.yaml = yaml.safe_load(f) # model dict
176
177 # Define model
178 ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
179 if nc and nc != self.yaml['nc']:
180 LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
181 self.yaml['nc'] = nc # override yaml value
182 if anchors:
183 LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
184 self.yaml['anchors'] = round(anchors) # override yaml value
185 self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
186 self.names = [str(i) for i in range(self.yaml['nc'])] # default names
187 self.inplace = self.yaml.get('inplace', True)
188
189 # Build strides, anchors
190 m = self.model[-1] # Detect()
191 if isinstance(m, (Detect, Segment)):
192 s = 256 # 2x min stride
193 m.inplace = self.inplace
194 forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
195 m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
196 check_anchor_order(m)
197 m.anchors /= m.stride.view(-1, 1, 1)
198 self.stride = m.stride
199 self._initialize_biases() # only run once
200
201 # Init weights, biases
202 initialize_weights(self)
203 self.info()
204 LOGGER.info('')
205
206 def forward(self, x, augment=False, profile=False, visualize=False):
207 if augment:
208 return self._forward_augment(x) # augmented inference, None
209 return self._forward_once(x, profile, visualize) # single-scale inference, train
210
211 def _forward_augment(self, x):
212 img_size = x.shape[-2:] # height, width
213 s = [1, 0.83, 0.67] # scales
214 f = [None, 3, None] # flips (2-ud, 3-lr)
215 y = [] # outputs
216 for si, fi in zip(s, f):
217 xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
218 yi = self._forward_once(xi)[0] # forward
219 # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
220 yi = self._descale_pred(yi, fi, si, img_size)
221 y.append(yi)
222 y = self._clip_augmented(y) # clip augmented tails
223 return torch.cat(y, 1), None # augmented inference, train
224
225 def _descale_pred(self, p, flips, scale, img_size):
226 # de-scale predictions following augmented inference (inverse operation)
227 if self.inplace:
228 p[..., :4] /= scale # de-scale
229 if flips == 2:
230 p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
231 elif flips == 3:
232 p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
233 else:
234 x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
235 if flips == 2:
236 y = img_size[0] - y # de-flip ud
237 elif flips == 3:
238 x = img_size[1] - x # de-flip lr
239 p = torch.cat((x, y, wh, p[..., 4:]), -1)
240 return p
241
242 def _clip_augmented(self, y):
243 # Clip YOLOv5 augmented inference tails
244 nl = self.model[-1].nl # number of detection layers (P3-P5)
245 g = sum(4 ** x for x in range(nl)) # grid points
246 e = 1 # exclude layer count
247 i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices
248 y[0] = y[0][:, :-i] # large
249 i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
250 y[-1] = y[-1][:, i:] # small
251 return y
252
253 def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
254 # https://arxiv.org/abs/1708.02002 section 3.3
255 # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
256 m = self.model[-1] # Detect() module
257 for mi, s in zip(m.m, m.stride): # from
258 b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
259 b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
260 b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # cls
261 mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
262
263
264 Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
265
266
267 class SegmentationModel(DetectionModel):
268 # YOLOv5 segmentation model
269 def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
270 super().__init__(cfg, ch, nc, anchors)
271
272
273 class ClassificationModel(BaseModel):
274 # YOLOv5 classification model
275 def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index
276 super().__init__()
277 self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
278
279 def _from_detection_model(self, model, nc=1000, cutoff=10):
280 # Create a YOLOv5 classification model from a YOLOv5 detection model
281 if isinstance(model, DetectMultiBackend):
282 model = model.model # unwrap DetectMultiBackend
283 model.model = model.model[:cutoff] # backbone
284 m = model.model[-1] # last layer
285 ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module
286 c = Classify(ch, nc) # Classify()
287 c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type
288 model.model[-1] = c # replace
289 self.model = model.model
290 self.stride = model.stride
291 self.save = []
292 self.nc = nc
293
294 def _from_yaml(self, cfg):
295 # Create a YOLOv5 classification model from a *.yaml file
296 self.model = None
297
298
299 def parse_model(d, ch): # model_dict, input_channels(3)
300 # Parse a YOLOv5 model.yaml dictionary
301 LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
302 anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
303 if act:
304 Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
305 LOGGER.info(f"{colorstr('activation:')} {act}") # print
306 na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
307 no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
308
309 layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
310 for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
311 m = eval(m) if isinstance(m, str) else m # eval strings
312 for j, a in enumerate(args):
313 with contextlib.suppress(NameError):
314 args[j] = eval(a) if isinstance(a, str) else a # eval strings
315
316 n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
317 if m in {
318 Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
319 BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
320 c1, c2 = ch[f], args[0]
321 if c2 != no: # if not output
322 c2 = make_divisible(c2 * gw, 8)
323
324 args = [c1, c2, *args[1:]]
325 if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
326 args.insert(2, n) # number of repeats
327 n = 1
328 elif m is nn.BatchNorm2d:
329 args = [ch[f]]
330 elif m is Concat:
331 c2 = sum(ch[x] for x in f)
332 # TODO: channel, gw, gd
333 elif m in {Detect, Segment}:
334 args.append([ch[x] for x in f])
335 if isinstance(args[1], int): # number of anchors
336 args[1] = [list(range(args[1] * 2))] * len(f)
337 if m is Segment:
338 args[3] = make_divisible(args[3] * gw, 8)
339 elif m is Contract:
340 c2 = ch[f] * args[0] ** 2
341 elif m is Expand:
342 c2 = ch[f] // args[0] ** 2
343 else:
344 c2 = ch[f]
345
346 m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
347 t = str(m)[8:-2].replace('__main__.', '') # module type
348 np = sum(x.numel() for x in m_.parameters()) # number params
349 m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
350 LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
351 save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
352 layers.append(m_)
353 if i == 0:
354 ch = []
355 ch.append(c2)
356 return nn.Sequential(*layers), sorted(save)
357
358
359 if __name__ == '__main__':
360 parser = argparse.ArgumentParser()
361 parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
362 parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
363 parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
364 parser.add_argument('--profile', action='store_true', help='profile model speed')
365 parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer')
366 parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
367 opt = parser.parse_args()
368 opt.cfg = check_yaml(opt.cfg) # check YAML
369 print_args(vars(opt))
370 device = select_device(opt.device)
371
372 # Create model
373 im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
374 model = Model(opt.cfg).to(device)
375
376 # Options
377 if opt.line_profile: # profile layer by layer
378 model(im, profile=True)
379
380 elif opt.profile: # profile forward-backward
381 results = profile(input=im, ops=[model], n=3)
382
383 elif opt.test: # test all models
384 for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
385 try:
386 _ = Model(cfg)
387 except Exception as e:
388 print(f'Error in {cfg}: {e}')
389
390 else: # report fused model summary
391 model.fuse()
1 from easydict import EasyDict as edict
2
3 config = edict(
4 # weights='/home/situ/qfs/invoice_tamper/09_project/project/yolov5_inference/runs/exp2/weights/best.pt', # model path or triton URL
5 weights='runs/train/exp/weights/best.onnx', # model path or triton URL
6 data='data/VOC.yaml', # dataset.yaml path
7 imgsz=(640, 640), # inference size (height, width)
8 conf_thres=0.2, # confidence threshold
9 iou_thres=0.45, # NMS IOU threshold
10 max_det=1000, # maximum detections per image
11 device='' # cuda device, i.e. 0 or 0,1,2,3 or cpu
12 )
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.0 # model depth multiple
6 width_multiple: 1.0 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.67 # model depth multiple
6 width_multiple: 0.75 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.33 # model depth multiple
6 width_multiple: 0.25 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 0.33 # model depth multiple
6 width_multiple: 0.50 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 ]
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
3 # Parameters
4 nc: 80 # number of classes
5 depth_multiple: 1.33 # model depth multiple
6 width_multiple: 1.25 # layer channel multiple
7 anchors:
8 - [10,13, 16,30, 33,23] # P3/8
9 - [30,61, 62,45, 59,119] # P4/16
10 - [116,90, 156,198, 373,326] # P5/32
11
12 # YOLOv5 v6.0 backbone
13 backbone:
14 # [from, number, module, args]
15 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 [-1, 3, C3, [128]],
18 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 [-1, 6, C3, [256]],
20 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 [-1, 9, C3, [512]],
22 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 [-1, 3, C3, [1024]],
24 [-1, 1, SPPF, [1024, 5]], # 9
25 ]
26
27 # YOLOv5 v6.0 head
28 head:
29 [[-1, 1, Conv, [512, 1, 1]],
30 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 [-1, 3, C3, [512, False]], # 13
33
34 [-1, 1, Conv, [256, 1, 1]],
35 [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
39 [-1, 1, Conv, [256, 3, 2]],
40 [[-1, 14], 1, Concat, [1]], # cat head P4
41 [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
43 [-1, 1, Conv, [512, 3, 2]],
44 [[-1, 10], 1, Concat, [1]], # cat head P5
45 [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
47 [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 ]
1 import time
2 import cv2
3 from bank_ocr_inference import bill_ocr, extract_bank_info
4 from inference import Yolov5
5 from models.yolov5_config import config
6
7
8 def enlarge_position(box):
9 x1, y1, x2, y2 = box
10 w, h = abs(x2 - x1), abs(y2 - y1)
11 y1, y2 = max(y1 - h // 3, 0), y2 + h // 3
12 x1, x2 = max(x1 - w // 8, 0), x2 + w // 8
13 return [x1, y1, x2, y2]
14
15
16 def tamper_detect(image):
17 st = time.time()
18 ocr_results = bill_ocr(image)
19 et1 = time.time()
20 info_results = extract_bank_info(ocr_results)
21 et2 = time.time()
22 tamper_results = []
23 if len(info_results) != 0:
24 for info_result in info_results:
25 box = [info_result[1][0], info_result[1][1], info_result[1][4], info_result[1][5]]
26 x1, y1, x2, y2 = enlarge_position(box)
27 # x1, y1, x2, y2 = box
28 info_image = image[y1:y2, x1:x2, :]
29 # cv2.imshow('info_image', info_image)
30 results = detector.detect(info_image)
31 print(results)
32 if len(results['result']) != 0:
33 for res in results['result']:
34 left = int(res['left'])
35 top = int(res['top'])
36 width = int(res['width'])
37 height = int(res['height'])
38 absolute_position = [x1 + left, y1 + top, x1 + left + width, y1 + top + height]
39 tamper_results .append(absolute_position)
40 print(tamper_results)
41 et3 = time.time()
42
43 print(f'all:{et3 - st} ocr:{et1 - st} extract:{et2 - et1} yolo:{et3 - et2}')
44 for i in tamper_results:
45 cv2.rectangle(image, tuple(i[:2]), tuple(i[2:]), (0, 0, 255), 2)
46 cv2.imshow('info', image)
47 cv2.waitKey(0)
48
49
50 if __name__ == '__main__':
51 detector = Yolov5(config)
52 image = cv2.imread(
53 "/home/situ/下载/农行/_1594626795.5104141page_18_img_0-.jpg")
54 tamper_detect(image)
1 import os
2
3 import cv2
4 import numpy as np
5 import pandas as pd
6 import tqdm
7
8
9 def get_source_image_det(crop_position, predict_positions):
10 result = []
11 x1, y1, x2, y2 = crop_position
12 for p in predict_positions:
13 px1, py1, px2, py2, score = p
14 w, h = px2 - px1, py2 - py1
15 result.append([x1 + px1, y1 + py1, x1 + px1 + w, y1 + py1 + h, score])
16 return result
17
18
19 def decode_label(image, label_path):
20 data = open(label_path).readlines()
21 h, w, c = image.shape
22 result = []
23 for d in data:
24 d = [float(i) for i in d.strip().split(' ')]
25 cls, cx, cy, cw, ch, score = d
26 cx, cy, cw, ch = cx * w, cy * h, cw * w, ch * h
27 result.append([int(cx - cw // 2), int(cy - ch // 2), int(cx + cw // 2), int(cy + ch // 2), score])
28 return result
29
30
31 if __name__ == '__main__':
32 source_image_path = '/data/situ_invoice_bill_data/new_data/qfs_bank_bill_data/gongshang/authentic/images/val'
33 val_image_path = '/home/situ/qfs/invoice_tamper/09_project/project/tamper_det/data/images/crop_img'
34 predict_label_path = '/home/situ/qfs/invoice_tamper/09_project/project/tamper_det/runs/detect/exp/labels'
35 crop_csv_path = '/data/situ_invoice_bill_data/new_data/qfs_bank_bill_data/gongshang/croped_merge.csv'
36 predict_labels = os.listdir(predict_label_path)
37 source_images = os.listdir(source_image_path)
38 data = pd.read_csv(crop_csv_path)
39 img_name = data.loc[:, 'img_name'].tolist()
40 crop_position1 = data.loc[:, 'name_crop_coord'].tolist()
41 crop_position2 = data.loc[:, 'number_crop_coord'].tolist()
42 cc = '/data/situ_invoice_bill_data/new_data/qfs_bank_bill_data/gongshang/tampered/images/val/ps3'
43 for im in os.listdir(cc):
44 print(im)
45 img = cv2.imread(os.path.join(cc, im))
46 img_ = img.copy()
47 id = img_name.index(im)
48 name_crop_position = [int(i) for i in crop_position1[id].split(',')]
49 number_crop_position = [int(i) for i in crop_position2[id].split(',')]
50 nx1, ny1, nx2, ny2 = name_crop_position
51 nux1, nuy1, nux2, nuy2 = number_crop_position
52 if im[:-4] + '_hname.txt' in predict_labels:
53
54 h, w, c = img[ny1:ny2, nx1:nx2, :].shape
55 data = open(os.path.join(predict_label_path, im[:-4] + '_hname.txt')).readlines()
56 for d in data:
57 cls, cx, cy, cw, ch, score = [float(i) for i in d.strip().split(' ')]
58 cx, cy, cw, ch = int(cx * w), int(cy * h), int(cw * w), int(ch * h)
59 cx1, cy1 = cx - cw // 2, cy - ch // 2
60 x1, y1, x2, y2 = nx1 + cx1, ny1 + cy1, nx1 + cx1 + cw, ny1 + cy1 + ch
61 cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
62 cv2.putText(img, f'tampered:{score}', (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
63 if im[:-4] + '_hnumber.txt' in predict_labels:
64 h, w, c = img[nuy1:nuy2, nux1:nux2, :].shape
65 data = open(os.path.join(predict_label_path, im[:-4] + '_hname.txt')).readlines()
66 for d in data:
67 cls, cx, cy, cw, ch, score = [float(i) for i in d.strip().split(' ')]
68 cx, cy, cw, ch = int(cx * w), int(cy * h), int(cw * w), int(ch * h)
69 cx1, cy1 = cx - cw // 2, cy - ch // 2
70 x1, y1, x2, y2 = nux1 + cx1, nuy1 + cy1, nux1 + cx1 + cw, nuy1 + cy1 + ch
71 cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
72 cv2.putText(img, f'tampered:{score}', (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
73 result = np.vstack((img_, img))
74 cv2.imwrite(f'z/{im}', result)
1 # YOLOv5 requirements
2 # Usage: pip install -r requirements.txt
3
4 # Base ----------------------------------------
5 matplotlib>=3.2.2
6 numpy>=1.18.5
7 opencv-python>=4.1.1
8 Pillow>=7.1.2
9 PyYAML>=5.3.1
10 requests>=2.23.0
11 scipy>=1.4.1
12 torch>=1.7.0 # see https://pytorch.org/get-started/locally/ (recommended)
13 torchvision>=0.8.1
14 tqdm>=4.64.0
15 # protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012
16
17 # Logging -------------------------------------
18 tensorboard>=2.4.1
19 # wandb
20 # clearml
21
22 # Plotting ------------------------------------
23 pandas>=1.1.4
24 seaborn>=0.11.0
25
26 # Export --------------------------------------
27 # coremltools>=6.0 # CoreML export
28 # onnx>=1.9.0 # ONNX export
29 # onnx-simplifier>=0.4.1 # ONNX simplifier
30 # nvidia-pyindex # TensorRT export
31 # nvidia-tensorrt # TensorRT export
32 # scikit-learn<=1.1.2 # CoreML quantization
33 # tensorflow>=2.4.1 # TF exports (-cpu, -aarch64, -macos)
34 # tensorflowjs>=3.9.0 # TF.js export
35 # openvino-dev # OpenVINO export
36
37 # Deploy --------------------------------------
38 # tritonclient[all]~=2.24.0
39
40 # Extras --------------------------------------
41 ipython # interactive notebook
42 psutil # system utilization
43 thop>=0.1.1 # FLOPs computation
44 # mss # screenshots
45 # albumentations>=1.0.3
46 # pycocotools>=2.0 # COCO mAP
47 # roboflow
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Train a YOLOv5 model on a custom dataset.
4 Models and datasets download automatically from the latest YOLOv5 release.
5
6 Usage - Single-GPU training:
7 $ python train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (recommended)
8 $ python train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch
9
10 Usage - Multi-GPU DDP training:
11 $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 train.py --data coco128.yaml --weights yolov5s.pt --img 640 --device 0,1,2,3
12
13 Models: https://github.com/ultralytics/yolov5/tree/master/models
14 Datasets: https://github.com/ultralytics/yolov5/tree/master/data
15 Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
16 """
17
18 import argparse
19 import math
20 import os
21 import random
22 import sys
23 import time
24 from copy import deepcopy
25 from datetime import datetime
26 from pathlib import Path
27
28 import numpy as np
29 import torch
30 import torch.distributed as dist
31 import torch.nn as nn
32 import yaml
33 from torch.optim import lr_scheduler
34 from tqdm import tqdm
35
36 FILE = Path(__file__).resolve()
37 ROOT = FILE.parents[0] # YOLOv5 root directory
38 if str(ROOT) not in sys.path:
39 sys.path.append(str(ROOT)) # add ROOT to PATH
40 ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
41
42 import val as validate # for end-of-epoch mAP
43 from models.experimental import attempt_load
44 from models.yolo import Model
45 from utils.autoanchor import check_anchors
46 from utils.autobatch import check_train_batch_size
47 from utils.callbacks import Callbacks
48 from utils.dataloaders import create_dataloader
49 from utils.downloads import attempt_download, is_url
50 from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size,
51 check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path,
52 init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods,
53 one_cycle, print_args, print_mutation, strip_optimizer, yaml_save)
54 from utils.loggers import Loggers
55 from utils.loggers.comet.comet_utils import check_comet_resume
56 from utils.loggers.wandb.wandb_utils import check_wandb_resume
57 from utils.loss import ComputeLoss
58 from utils.metrics import fitness
59 from utils.plots import plot_evolve
60 from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer,
61 smart_resume, torch_distributed_zero_first)
62
63 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
64 RANK = int(os.getenv('RANK', -1))
65 WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
66
67
68 def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary
69 save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
70 Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
71 opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
72 callbacks.run('on_pretrain_routine_start')
73
74 # Directories
75 w = save_dir / 'weights' # weights dir
76 (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir
77 last, best = w / 'last.pt', w / 'best.pt'
78
79 # Hyperparameters
80 if isinstance(hyp, str):
81 with open(hyp, errors='ignore') as f:
82 hyp = yaml.safe_load(f) # load hyps dict
83 LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
84 opt.hyp = hyp.copy() # for saving hyps to checkpoints
85
86 # Save run settings
87 if not evolve:
88 yaml_save(save_dir / 'hyp.yaml', hyp)
89 yaml_save(save_dir / 'opt.yaml', vars(opt))
90
91 # Loggers
92 data_dict = None
93 if RANK in {-1, 0}:
94 loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance
95
96 # Register actions
97 for k in methods(loggers):
98 callbacks.register_action(k, callback=getattr(loggers, k))
99
100 # Process custom dataset artifact link
101 data_dict = loggers.remote_dataset
102 if resume: # If resuming runs from remote artifact
103 weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size
104
105 # Config
106 plots = not evolve and not opt.noplots # create plots
107 cuda = device.type != 'cpu'
108 init_seeds(opt.seed + 1 + RANK, deterministic=True)
109 with torch_distributed_zero_first(LOCAL_RANK):
110 data_dict = data_dict or check_dataset(data) # check if None
111 train_path, val_path = data_dict['train'], data_dict['val']
112 nc = 1 if single_cls else int(data_dict['nc']) # number of classes
113 names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
114 is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset
115
116 # Model
117 check_suffix(weights, '.pt') # check weights
118 pretrained = weights.endswith('.pt')
119 if pretrained:
120 with torch_distributed_zero_first(LOCAL_RANK):
121 weights = attempt_download(weights) # download if not found locally
122 ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak
123 model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
124 exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys
125 csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
126 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect
127 model.load_state_dict(csd, strict=False) # load
128 LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report
129 else:
130 model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
131 amp = check_amp(model) # check AMP
132
133 # Freeze
134 freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze
135 for k, v in model.named_parameters():
136 v.requires_grad = True # train all layers
137 # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results)
138 if any(x in k for x in freeze):
139 LOGGER.info(f'freezing {k}')
140 v.requires_grad = False
141
142 # Image size
143 gs = max(int(model.stride.max()), 32) # grid size (max stride)
144 imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple
145
146 # Batch size
147 if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size
148 batch_size = check_train_batch_size(model, imgsz, amp)
149 loggers.on_params_update({"batch_size": batch_size})
150
151 # Optimizer
152 nbs = 64 # nominal batch size
153 accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
154 hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
155 optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay'])
156
157 # Scheduler
158 if opt.cos_lr:
159 lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf']
160 else:
161 lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear
162 scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs)
163
164 # EMA
165 ema = ModelEMA(model) if RANK in {-1, 0} else None
166
167 # Resume
168 best_fitness, start_epoch = 0.0, 0
169 if pretrained:
170 if resume:
171 best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume)
172 del ckpt, csd
173
174 # DP mode
175 if cuda and RANK == -1 and torch.cuda.device_count() > 1:
176 LOGGER.warning('WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
177 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
178 model = torch.nn.DataParallel(model)
179
180 # SyncBatchNorm
181 if opt.sync_bn and cuda and RANK != -1:
182 model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
183 LOGGER.info('Using SyncBatchNorm()')
184
185 # Trainloader
186 train_loader, dataset = create_dataloader(train_path,
187 imgsz,
188 batch_size // WORLD_SIZE,
189 gs,
190 single_cls,
191 hyp=hyp,
192 augment=True,
193 cache=None if opt.cache == 'val' else opt.cache,
194 rect=opt.rect,
195 rank=LOCAL_RANK,
196 workers=workers,
197 image_weights=opt.image_weights,
198 quad=opt.quad,
199 prefix=colorstr('train: '),
200 shuffle=True)
201 labels = np.concatenate(dataset.labels, 0)
202 mlc = int(labels[:, 0].max()) # max label class
203 assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
204
205 # Process 0
206 if RANK in {-1, 0}:
207 val_loader = create_dataloader(val_path,
208 imgsz,
209 batch_size // WORLD_SIZE * 2,
210 gs,
211 single_cls,
212 hyp=hyp,
213 cache=None if noval else opt.cache,
214 rect=True,
215 rank=-1,
216 workers=workers * 2,
217 pad=0.5,
218 prefix=colorstr('val: '))[0]
219
220 if not resume:
221 if not opt.noautoanchor:
222 check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # run AutoAnchor
223 model.half().float() # pre-reduce anchor precision
224
225 callbacks.run('on_pretrain_routine_end', labels, names)
226
227 # DDP mode
228 if cuda and RANK != -1:
229 model = smart_DDP(model)
230
231 # Model attributes
232 nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps)
233 hyp['box'] *= 3 / nl # scale to layers
234 hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers
235 hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
236 hyp['label_smoothing'] = opt.label_smoothing
237 model.nc = nc # attach number of classes to model
238 model.hyp = hyp # attach hyperparameters to model
239 model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights
240 model.names = names
241
242 # Start training
243 t0 = time.time()
244 nb = len(train_loader) # number of batches
245 nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations)
246 # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
247 last_opt_step = -1
248 maps = np.zeros(nc) # mAP per class
249 results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
250 scheduler.last_epoch = start_epoch - 1 # do not move
251 scaler = torch.cuda.amp.GradScaler(enabled=amp)
252 stopper, stop = EarlyStopping(patience=opt.patience), False
253 compute_loss = ComputeLoss(model) # init loss class
254 callbacks.run('on_train_start')
255 LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
256 f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
257 f"Logging results to {colorstr('bold', save_dir)}\n"
258 f'Starting training for {epochs} epochs...')
259 for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
260 callbacks.run('on_train_epoch_start')
261 model.train()
262
263 # Update image weights (optional, single-GPU only)
264 if opt.image_weights:
265 cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights
266 iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights
267 dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx
268
269 # Update mosaic border (optional)
270 # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
271 # dataset.mosaic_border = [b - imgsz, -b] # height, width borders
272
273 mloss = torch.zeros(3, device=device) # mean losses
274 if RANK != -1:
275 train_loader.sampler.set_epoch(epoch)
276 pbar = enumerate(train_loader)
277 LOGGER.info(('\n' + '%11s' * 7) % ('Epoch', 'GPU_mem', 'box_loss', 'obj_loss', 'cls_loss', 'Instances', 'Size'))
278 if RANK in {-1, 0}:
279 pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
280 optimizer.zero_grad()
281 for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
282 callbacks.run('on_train_batch_start')
283 ni = i + nb * epoch # number integrated batches (since train start)
284 imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0
285
286 # Warmup
287 if ni <= nw:
288 xi = [0, nw] # x interp
289 # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)
290 accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
291 for j, x in enumerate(optimizer.param_groups):
292 # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
293 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)])
294 if 'momentum' in x:
295 x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
296
297 # Multi-scale
298 if opt.multi_scale:
299 sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
300 sf = sz / max(imgs.shape[2:]) # scale factor
301 if sf != 1:
302 ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
303 imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
304
305 # Forward
306 with torch.cuda.amp.autocast(amp):
307 pred = model(imgs) # forward
308 loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size
309 if RANK != -1:
310 loss *= WORLD_SIZE # gradient averaged between devices in DDP mode
311 if opt.quad:
312 loss *= 4.
313
314 # Backward
315 scaler.scale(loss).backward()
316
317 # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
318 if ni - last_opt_step >= accumulate:
319 scaler.unscale_(optimizer) # unscale gradients
320 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients
321 scaler.step(optimizer) # optimizer.step
322 scaler.update()
323 optimizer.zero_grad()
324 if ema:
325 ema.update(model)
326 last_opt_step = ni
327
328 # Log
329 if RANK in {-1, 0}:
330 mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
331 mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB)
332 pbar.set_description(('%11s' * 2 + '%11.4g' * 5) %
333 (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
334 callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths, list(mloss))
335 if callbacks.stop_training:
336 return
337 # end batch ------------------------------------------------------------------------------------------------
338
339 # Scheduler
340 lr = [x['lr'] for x in optimizer.param_groups] # for loggers
341 scheduler.step()
342
343 if RANK in {-1, 0}:
344 # mAP
345 callbacks.run('on_train_epoch_end', epoch=epoch)
346 ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
347 final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
348 if not noval or final_epoch: # Calculate mAP
349 results, maps, _ = validate.run(data_dict,
350 batch_size=batch_size // WORLD_SIZE * 2,
351 imgsz=imgsz,
352 half=amp,
353 model=ema.ema,
354 single_cls=single_cls,
355 dataloader=val_loader,
356 save_dir=save_dir,
357 plots=False,
358 callbacks=callbacks,
359 compute_loss=compute_loss)
360
361 # Update best mAP
362 fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
363 stop = stopper(epoch=epoch, fitness=fi) # early stop check
364 if fi > best_fitness:
365 best_fitness = fi
366 log_vals = list(mloss) + list(results) + lr
367 callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
368
369 # Save model
370 if (not nosave) or (final_epoch and not evolve): # if save
371 ckpt = {
372 'epoch': epoch,
373 'best_fitness': best_fitness,
374 'model': deepcopy(de_parallel(model)).half(),
375 'ema': deepcopy(ema.ema).half(),
376 'updates': ema.updates,
377 'optimizer': optimizer.state_dict(),
378 'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
379 'opt': vars(opt),
380 'date': datetime.now().isoformat()}
381
382 # Save last, best and delete
383 torch.save(ckpt, last)
384 if best_fitness == fi:
385 torch.save(ckpt, best)
386 if opt.save_period > 0 and epoch % opt.save_period == 0:
387 torch.save(ckpt, w / f'epoch{epoch}.pt')
388 del ckpt
389 callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
390
391 # EarlyStopping
392 if RANK != -1: # if DDP training
393 broadcast_list = [stop if RANK == 0 else None]
394 dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks
395 if RANK != 0:
396 stop = broadcast_list[0]
397 if stop:
398 break # must break all DDP ranks
399
400 # end epoch ----------------------------------------------------------------------------------------------------
401 # end training -----------------------------------------------------------------------------------------------------
402 if RANK in {-1, 0}:
403 LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
404 for f in last, best:
405 if f.exists():
406 strip_optimizer(f) # strip optimizers
407 if f is best:
408 LOGGER.info(f'\nValidating {f}...')
409 results, _, _ = validate.run(
410 data_dict,
411 batch_size=batch_size // WORLD_SIZE * 2,
412 imgsz=imgsz,
413 model=attempt_load(f, device).half(),
414 iou_thres=0.65 if is_coco else 0.60, # best pycocotools at iou 0.65
415 single_cls=single_cls,
416 dataloader=val_loader,
417 save_dir=save_dir,
418 save_json=is_coco,
419 verbose=True,
420 plots=plots,
421 callbacks=callbacks,
422 compute_loss=compute_loss) # val best model with plots
423 if is_coco:
424 callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
425
426 callbacks.run('on_train_end', last, best, epoch, results)
427
428 torch.cuda.empty_cache()
429 return results
430
431
432 def parse_opt(known=False):
433 parser = argparse.ArgumentParser()
434 parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
435 parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
436 parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
437 parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
438 parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
439 parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
440 parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
441 parser.add_argument('--rect', action='store_true', help='rectangular training')
442 parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
443 parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
444 parser.add_argument('--noval', action='store_true', help='only validate final epoch')
445 parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
446 parser.add_argument('--noplots', action='store_true', help='save no plot files')
447 parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
448 parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
449 parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
450 parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
451 parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
452 parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
453 parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
454 parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
455 parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
456 parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
457 parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
458 parser.add_argument('--name', default='exp', help='save to project/name')
459 parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
460 parser.add_argument('--quad', action='store_true', help='quad dataloader')
461 parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
462 parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
463 parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
464 parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
465 parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
466 parser.add_argument('--seed', type=int, default=0, help='Global training seed')
467 parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
468
469 # Logger arguments
470 parser.add_argument('--entity', default=None, help='Entity')
471 parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='Upload data, "val" option')
472 parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval')
473 parser.add_argument('--artifact_alias', type=str, default='latest', help='Version of dataset artifact to use')
474
475 return parser.parse_known_args()[0] if known else parser.parse_args()
476
477
478 def main(opt, callbacks=Callbacks()):
479 # Checks
480 if RANK in {-1, 0}:
481 print_args(vars(opt))
482 check_git_status()
483 check_requirements()
484
485 # Resume (from specified or most recent last.pt)
486 if opt.resume and not check_wandb_resume(opt) and not check_comet_resume(opt) and not opt.evolve:
487 last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
488 opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml
489 opt_data = opt.data # original dataset
490 if opt_yaml.is_file():
491 with open(opt_yaml, errors='ignore') as f:
492 d = yaml.safe_load(f)
493 else:
494 d = torch.load(last, map_location='cpu')['opt']
495 opt = argparse.Namespace(**d) # replace
496 opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate
497 if is_url(opt_data):
498 opt.data = check_file(opt_data) # avoid HUB resume auth timeout
499 else:
500 opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
501 check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks
502 assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
503 if opt.evolve:
504 if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve
505 opt.project = str(ROOT / 'runs/evolve')
506 opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume
507 if opt.name == 'cfg':
508 opt.name = Path(opt.cfg).stem # use model.yaml as name
509 opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
510
511 # DDP mode
512 device = select_device(opt.device, batch_size=opt.batch_size)
513 if LOCAL_RANK != -1:
514 msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
515 assert not opt.image_weights, f'--image-weights {msg}'
516 assert not opt.evolve, f'--evolve {msg}'
517 assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
518 assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
519 assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
520 torch.cuda.set_device(LOCAL_RANK)
521 device = torch.device('cuda', LOCAL_RANK)
522 dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
523
524 # Train
525 if not opt.evolve:
526 train(opt.hyp, opt, device, callbacks)
527
528 # Evolve hyperparameters (optional)
529 else:
530 # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
531 meta = {
532 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
533 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
534 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1
535 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
536 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok)
537 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum
538 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr
539 'box': (1, 0.02, 0.2), # box loss gain
540 'cls': (1, 0.2, 4.0), # cls loss gain
541 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
542 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels)
543 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
544 'iou_t': (0, 0.1, 0.7), # IoU training threshold
545 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
546 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore)
547 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
548 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
549 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
550 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction)
551 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg)
552 'translate': (1, 0.0, 0.9), # image translation (+/- fraction)
553 'scale': (1, 0.0, 0.9), # image scale (+/- gain)
554 'shear': (1, 0.0, 10.0), # image shear (+/- deg)
555 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
556 'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
557 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
558 'mosaic': (1, 0.0, 1.0), # image mixup (probability)
559 'mixup': (1, 0.0, 1.0), # image mixup (probability)
560 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability)
561
562 with open(opt.hyp, errors='ignore') as f:
563 hyp = yaml.safe_load(f) # load hyps dict
564 if 'anchors' not in hyp: # anchors commented in hyp.yaml
565 hyp['anchors'] = 3
566 if opt.noautoanchor:
567 del hyp['anchors'], meta['anchors']
568 opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch
569 # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
570 evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
571 if opt.bucket:
572 os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists
573
574 for _ in range(opt.evolve): # generations to evolve
575 if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate
576 # Select parent(s)
577 parent = 'single' # parent selection method: 'single' or 'weighted'
578 x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
579 n = min(5, len(x)) # number of previous results to consider
580 x = x[np.argsort(-fitness(x))][:n] # top n mutations
581 w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0)
582 if parent == 'single' or len(x) == 1:
583 # x = x[random.randint(0, n - 1)] # random selection
584 x = x[random.choices(range(n), weights=w)[0]] # weighted selection
585 elif parent == 'weighted':
586 x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
587
588 # Mutate
589 mp, s = 0.8, 0.2 # mutation probability, sigma
590 npr = np.random
591 npr.seed(int(time.time()))
592 g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1
593 ng = len(meta)
594 v = np.ones(ng)
595 while all(v == 1): # mutate until a change occurs (prevent duplicates)
596 v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
597 for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300)
598 hyp[k] = float(x[i + 7] * v[i]) # mutate
599
600 # Constrain to limits
601 for k, v in meta.items():
602 hyp[k] = max(hyp[k], v[1]) # lower limit
603 hyp[k] = min(hyp[k], v[2]) # upper limit
604 hyp[k] = round(hyp[k], 5) # significant digits
605
606 # Train mutation
607 results = train(hyp.copy(), opt, device, callbacks)
608 callbacks = Callbacks()
609 # Write mutation results
610 keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss',
611 'val/obj_loss', 'val/cls_loss')
612 print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket)
613
614 # Plot results
615 plot_evolve(evolve_csv)
616 LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n'
617 f"Results saved to {colorstr('bold', save_dir)}\n"
618 f'Usage example: $ python train.py --hyp {evolve_yaml}')
619
620
621 def run(**kwargs):
622 # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
623 opt = parse_opt(True)
624 for k, v in kwargs.items():
625 setattr(opt, k, v)
626 main(opt)
627 return opt
628
629
630 if __name__ == "__main__":
631 opt = parse_opt()
632 main(opt)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 utils/initialization
4 """
5
6 import contextlib
7 import platform
8 import threading
9
10
11 def emojis(str=''):
12 # Return platform-dependent emoji-safe version of string
13 return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
14
15
16 class TryExcept(contextlib.ContextDecorator):
17 # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
18 def __init__(self, msg=''):
19 self.msg = msg
20
21 def __enter__(self):
22 pass
23
24 def __exit__(self, exc_type, value, traceback):
25 if value:
26 print(emojis(f'{self.msg}{value}'))
27 return True
28
29
30 def threaded(func):
31 # Multi-threads a target function and returns thread. Usage: @threaded decorator
32 def wrapper(*args, **kwargs):
33 thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
34 thread.start()
35 return thread
36
37 return wrapper
38
39
40 def notebook_init(verbose=True):
41 # Check system software and hardware
42 print('Checking setup...')
43
44 import os
45 import shutil
46
47 from utils.general import check_font, check_requirements, is_colab
48 from utils.torch_utils import select_device # imports
49
50 check_requirements(('psutil', 'IPython'))
51 check_font()
52
53 import psutil
54 from IPython import display # to display images and clear console output
55
56 if is_colab():
57 shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory
58
59 # System info
60 if verbose:
61 gb = 1 << 30 # bytes to GiB (1024 ** 3)
62 ram = psutil.virtual_memory().total
63 total, used, free = shutil.disk_usage("/")
64 display.clear_output()
65 s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)'
66 else:
67 s = ''
68
69 select_device(newline=False)
70 print(emojis(f'Setup complete ✅ {s}'))
71 return display
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Activation functions
4 """
5
6 import torch
7 import torch.nn as nn
8 import torch.nn.functional as F
9
10
11 class SiLU(nn.Module):
12 # SiLU activation https://arxiv.org/pdf/1606.08415.pdf
13 @staticmethod
14 def forward(x):
15 return x * torch.sigmoid(x)
16
17
18 class Hardswish(nn.Module):
19 # Hard-SiLU activation
20 @staticmethod
21 def forward(x):
22 # return x * F.hardsigmoid(x) # for TorchScript and CoreML
23 return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX
24
25
26 class Mish(nn.Module):
27 # Mish activation https://github.com/digantamisra98/Mish
28 @staticmethod
29 def forward(x):
30 return x * F.softplus(x).tanh()
31
32
33 class MemoryEfficientMish(nn.Module):
34 # Mish activation memory-efficient
35 class F(torch.autograd.Function):
36
37 @staticmethod
38 def forward(ctx, x):
39 ctx.save_for_backward(x)
40 return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
41
42 @staticmethod
43 def backward(ctx, grad_output):
44 x = ctx.saved_tensors[0]
45 sx = torch.sigmoid(x)
46 fx = F.softplus(x).tanh()
47 return grad_output * (fx + x * sx * (1 - fx * fx))
48
49 def forward(self, x):
50 return self.F.apply(x)
51
52
53 class FReLU(nn.Module):
54 # FReLU activation https://arxiv.org/abs/2007.11824
55 def __init__(self, c1, k=3): # ch_in, kernel
56 super().__init__()
57 self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
58 self.bn = nn.BatchNorm2d(c1)
59
60 def forward(self, x):
61 return torch.max(x, self.bn(self.conv(x)))
62
63
64 class AconC(nn.Module):
65 r""" ACON activation (activate or not)
66 AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
67 according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
68 """
69
70 def __init__(self, c1):
71 super().__init__()
72 self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
73 self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
74 self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
75
76 def forward(self, x):
77 dpx = (self.p1 - self.p2) * x
78 return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
79
80
81 class MetaAconC(nn.Module):
82 r""" ACON activation (activate or not)
83 MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network
84 according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
85 """
86
87 def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r
88 super().__init__()
89 c2 = max(r, c1 // r)
90 self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
91 self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
92 self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True)
93 self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True)
94 # self.bn1 = nn.BatchNorm2d(c2)
95 # self.bn2 = nn.BatchNorm2d(c1)
96
97 def forward(self, x):
98 y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
99 # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891
100 # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable
101 beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed
102 dpx = (self.p1 - self.p2) * x
103 return dpx * torch.sigmoid(beta * dpx) + self.p2 * x
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Image augmentation functions
4 """
5
6 import math
7 import random
8
9 import cv2
10 import numpy as np
11 import torch
12 import torchvision.transforms as T
13 import torchvision.transforms.functional as TF
14
15 from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
16 from utils.metrics import bbox_ioa
17
18 IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean
19 IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation
20
21
22 class Albumentations:
23 # YOLOv5 Albumentations class (optional, only used if package is installed)
24 def __init__(self, size=640):
25 self.transform = None
26 prefix = colorstr('albumentations: ')
27 try:
28 import albumentations as A
29 check_version(A.__version__, '1.0.3', hard=True) # version requirement
30
31 T = [
32 A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
33 A.Blur(p=0.01),
34 A.MedianBlur(p=0.01),
35 A.ToGray(p=0.01),
36 A.CLAHE(p=0.01),
37 A.RandomBrightnessContrast(p=0.0),
38 A.RandomGamma(p=0.0),
39 A.ImageCompression(quality_lower=75, p=0.0)] # transforms
40 self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
41
42 LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
43 except ImportError: # package not installed, skip
44 pass
45 except Exception as e:
46 LOGGER.info(f'{prefix}{e}')
47
48 def __call__(self, im, labels, p=1.0):
49 if self.transform and random.random() < p:
50 new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
51 im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
52 return im, labels
53
54
55 def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
56 # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std
57 return TF.normalize(x, mean, std, inplace=inplace)
58
59
60 def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
61 # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean
62 for i in range(3):
63 x[:, i] = x[:, i] * std[i] + mean[i]
64 return x
65
66
67 def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
68 # HSV color-space augmentation
69 if hgain or sgain or vgain:
70 r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
71 hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
72 dtype = im.dtype # uint8
73
74 x = np.arange(0, 256, dtype=r.dtype)
75 lut_hue = ((x * r[0]) % 180).astype(dtype)
76 lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
77 lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
78
79 im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
80 cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
81
82
83 def hist_equalize(im, clahe=True, bgr=False):
84 # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
85 yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
86 if clahe:
87 c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
88 yuv[:, :, 0] = c.apply(yuv[:, :, 0])
89 else:
90 yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
91 return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
92
93
94 def replicate(im, labels):
95 # Replicate labels
96 h, w = im.shape[:2]
97 boxes = labels[:, 1:].astype(int)
98 x1, y1, x2, y2 = boxes.T
99 s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
100 for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
101 x1b, y1b, x2b, y2b = boxes[i]
102 bh, bw = y2b - y1b, x2b - x1b
103 yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
104 x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
105 im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax]
106 labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
107
108 return im, labels
109
110
111 def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
112 # Resize and pad image while meeting stride-multiple constraints
113 shape = im.shape[:2] # current shape [height, width]
114 if isinstance(new_shape, int):
115 new_shape = (new_shape, new_shape)
116
117 # Scale ratio (new / old)
118 r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
119 if not scaleup: # only scale down, do not scale up (for better val mAP)
120 r = min(r, 1.0)
121
122 # Compute padding
123 ratio = r, r # width, height ratios
124 new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
125 dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
126 if auto: # minimum rectangle
127 dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
128 elif scaleFill: # stretch
129 dw, dh = 0.0, 0.0
130 new_unpad = (new_shape[1], new_shape[0])
131 ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
132
133 dw /= 2 # divide padding into 2 sides
134 dh /= 2
135
136 if shape[::-1] != new_unpad: # resize
137 im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
138 top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
139 left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
140 im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
141 return im, ratio, (dw, dh)
142
143
144 def random_perspective(im,
145 targets=(),
146 segments=(),
147 degrees=10,
148 translate=.1,
149 scale=.1,
150 shear=10,
151 perspective=0.0,
152 border=(0, 0)):
153 # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
154 # targets = [cls, xyxy]
155
156 height = im.shape[0] + border[0] * 2 # shape(h,w,c)
157 width = im.shape[1] + border[1] * 2
158
159 # Center
160 C = np.eye(3)
161 C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
162 C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
163
164 # Perspective
165 P = np.eye(3)
166 P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
167 P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
168
169 # Rotation and Scale
170 R = np.eye(3)
171 a = random.uniform(-degrees, degrees)
172 # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
173 s = random.uniform(1 - scale, 1 + scale)
174 # s = 2 ** random.uniform(-scale, scale)
175 R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
176
177 # Shear
178 S = np.eye(3)
179 S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
180 S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
181
182 # Translation
183 T = np.eye(3)
184 T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
185 T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
186
187 # Combined rotation matrix
188 M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
189 if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
190 if perspective:
191 im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
192 else: # affine
193 im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
194
195 # Visualize
196 # import matplotlib.pyplot as plt
197 # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
198 # ax[0].imshow(im[:, :, ::-1]) # base
199 # ax[1].imshow(im2[:, :, ::-1]) # warped
200
201 # Transform label coordinates
202 n = len(targets)
203 if n:
204 use_segments = any(x.any() for x in segments)
205 new = np.zeros((n, 4))
206 if use_segments: # warp segments
207 segments = resample_segments(segments) # upsample
208 for i, segment in enumerate(segments):
209 xy = np.ones((len(segment), 3))
210 xy[:, :2] = segment
211 xy = xy @ M.T # transform
212 xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
213
214 # clip
215 new[i] = segment2box(xy, width, height)
216
217 else: # warp boxes
218 xy = np.ones((n * 4, 3))
219 xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
220 xy = xy @ M.T # transform
221 xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
222
223 # create new boxes
224 x = xy[:, [0, 2, 4, 6]]
225 y = xy[:, [1, 3, 5, 7]]
226 new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
227
228 # clip
229 new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
230 new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
231
232 # filter candidates
233 i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
234 targets = targets[i]
235 targets[:, 1:5] = new[i]
236
237 return im, targets
238
239
240 def copy_paste(im, labels, segments, p=0.5):
241 # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
242 n = len(segments)
243 if p and n:
244 h, w, c = im.shape # height, width, channels
245 im_new = np.zeros(im.shape, np.uint8)
246 for j in random.sample(range(n), k=round(p * n)):
247 l, s = labels[j], segments[j]
248 box = w - l[3], l[2], w - l[1], l[4]
249 ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
250 if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
251 labels = np.concatenate((labels, [[l[0], *box]]), 0)
252 segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
253 cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
254
255 result = cv2.bitwise_and(src1=im, src2=im_new)
256 result = cv2.flip(result, 1) # augment segments (flip left-right)
257 i = result > 0 # pixels to replace
258 # i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
259 im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug
260
261 return im, labels, segments
262
263
264 def cutout(im, labels, p=0.5):
265 # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
266 if random.random() < p:
267 h, w = im.shape[:2]
268 scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
269 for s in scales:
270 mask_h = random.randint(1, int(h * s)) # create random masks
271 mask_w = random.randint(1, int(w * s))
272
273 # box
274 xmin = max(0, random.randint(0, w) - mask_w // 2)
275 ymin = max(0, random.randint(0, h) - mask_h // 2)
276 xmax = min(w, xmin + mask_w)
277 ymax = min(h, ymin + mask_h)
278
279 # apply random color mask
280 im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
281
282 # return unobscured labels
283 if len(labels) and s > 0.03:
284 box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
285 ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h)) # intersection over area
286 labels = labels[ioa < 0.60] # remove >60% obscured labels
287
288 return labels
289
290
291 def mixup(im, labels, im2, labels2):
292 # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
293 r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
294 im = (im * r + im2 * (1 - r)).astype(np.uint8)
295 labels = np.concatenate((labels, labels2), 0)
296 return im, labels
297
298
299 def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
300 # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
301 w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
302 w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
303 ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
304 return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
305
306
307 def classify_albumentations(
308 augment=True,
309 size=224,
310 scale=(0.08, 1.0),
311 ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33
312 hflip=0.5,
313 vflip=0.0,
314 jitter=0.4,
315 mean=IMAGENET_MEAN,
316 std=IMAGENET_STD,
317 auto_aug=False):
318 # YOLOv5 classification Albumentations (optional, only used if package is installed)
319 prefix = colorstr('albumentations: ')
320 try:
321 import albumentations as A
322 from albumentations.pytorch import ToTensorV2
323 check_version(A.__version__, '1.0.3', hard=True) # version requirement
324 if augment: # Resize and crop
325 T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)]
326 if auto_aug:
327 # TODO: implement AugMix, AutoAug & RandAug in albumentation
328 LOGGER.info(f'{prefix}auto augmentations are currently not supported')
329 else:
330 if hflip > 0:
331 T += [A.HorizontalFlip(p=hflip)]
332 if vflip > 0:
333 T += [A.VerticalFlip(p=vflip)]
334 if jitter > 0:
335 color_jitter = (float(jitter),) * 3 # repeat value for brightness, contrast, satuaration, 0 hue
336 T += [A.ColorJitter(*color_jitter, 0)]
337 else: # Use fixed crop for eval set (reproducibility)
338 T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)]
339 T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor
340 LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
341 return A.Compose(T)
342
343 except ImportError: # package not installed, skip
344 LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)')
345 except Exception as e:
346 LOGGER.info(f'{prefix}{e}')
347
348
349 def classify_transforms(size=224):
350 # Transforms to apply if albumentations not installed
351 assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)'
352 # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
353 return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
354
355
356 class LetterBox:
357 # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
358 def __init__(self, size=(640, 640), auto=False, stride=32):
359 super().__init__()
360 self.h, self.w = (size, size) if isinstance(size, int) else size
361 self.auto = auto # pass max size integer, automatically solve for short side using stride
362 self.stride = stride # used with auto
363
364 def __call__(self, im): # im = np.array HWC
365 imh, imw = im.shape[:2]
366 r = min(self.h / imh, self.w / imw) # ratio of new/old
367 h, w = round(imh * r), round(imw * r) # resized image
368 hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w
369 top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
370 im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype)
371 im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
372 return im_out
373
374
375 class CenterCrop:
376 # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
377 def __init__(self, size=640):
378 super().__init__()
379 self.h, self.w = (size, size) if isinstance(size, int) else size
380
381 def __call__(self, im): # im = np.array HWC
382 imh, imw = im.shape[:2]
383 m = min(imh, imw) # min dimension
384 top, left = (imh - m) // 2, (imw - m) // 2
385 return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR)
386
387
388 class ToTensor:
389 # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
390 def __init__(self, half=False):
391 super().__init__()
392 self.half = half
393
394 def __call__(self, im): # im = np.array HWC in BGR order
395 im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous
396 im = torch.from_numpy(im) # to torch
397 im = im.half() if self.half else im.float() # uint8 to fp16/32
398 im /= 255.0 # 0-255 to 0.0-1.0
399 return im
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 AutoAnchor utils
4 """
5
6 import random
7
8 import numpy as np
9 import torch
10 import yaml
11 from tqdm import tqdm
12
13 from utils import TryExcept
14 from utils.general import LOGGER, colorstr
15
16 PREFIX = colorstr('AutoAnchor: ')
17
18
19 def check_anchor_order(m):
20 # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
21 a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer
22 da = a[-1] - a[0] # delta a
23 ds = m.stride[-1] - m.stride[0] # delta s
24 if da and (da.sign() != ds.sign()): # same order
25 LOGGER.info(f'{PREFIX}Reversing anchor order')
26 m.anchors[:] = m.anchors.flip(0)
27
28
29 @TryExcept(f'{PREFIX}ERROR: ')
30 def check_anchors(dataset, model, thr=4.0, imgsz=640):
31 # Check anchor fit to data, recompute if necessary
32 m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
33 shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
34 scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
35 wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
36
37 def metric(k): # compute metric
38 r = wh[:, None] / k[None]
39 x = torch.min(r, 1 / r).min(2)[0] # ratio metric
40 best = x.max(1)[0] # best_x
41 aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold
42 bpr = (best > 1 / thr).float().mean() # best possible recall
43 return bpr, aat
44
45 stride = m.stride.to(m.anchors.device).view(-1, 1, 1) # model strides
46 anchors = m.anchors.clone() * stride # current anchors
47 bpr, aat = metric(anchors.cpu().view(-1, 2))
48 s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). '
49 if bpr > 0.98: # threshold to recompute
50 LOGGER.info(f'{s}Current anchors are a good fit to dataset ✅')
51 else:
52 LOGGER.info(f'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...')
53 na = m.anchors.numel() // 2 # number of anchors
54 anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
55 new_bpr = metric(anchors)[0]
56 if new_bpr > bpr: # replace anchors
57 anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
58 m.anchors[:] = anchors.clone().view_as(m.anchors)
59 check_anchor_order(m) # must be in pixel-space (not grid-space)
60 m.anchors /= stride
61 s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)'
62 else:
63 s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)'
64 LOGGER.info(s)
65
66
67 def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
68 """ Creates kmeans-evolved anchors from training dataset
69
70 Arguments:
71 dataset: path to data.yaml, or a loaded dataset
72 n: number of anchors
73 img_size: image size used for training
74 thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
75 gen: generations to evolve anchors using genetic algorithm
76 verbose: print all results
77
78 Return:
79 k: kmeans evolved anchors
80
81 Usage:
82 from utils.autoanchor import *; _ = kmean_anchors()
83 """
84 from scipy.cluster.vq import kmeans
85
86 npr = np.random
87 thr = 1 / thr
88
89 def metric(k, wh): # compute metrics
90 r = wh[:, None] / k[None]
91 x = torch.min(r, 1 / r).min(2)[0] # ratio metric
92 # x = wh_iou(wh, torch.tensor(k)) # iou metric
93 return x, x.max(1)[0] # x, best_x
94
95 def anchor_fitness(k): # mutation fitness
96 _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
97 return (best * (best > thr).float()).mean() # fitness
98
99 def print_results(k, verbose=True):
100 k = k[np.argsort(k.prod(1))] # sort small to large
101 x, best = metric(k, wh0)
102 bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
103 s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \
104 f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \
105 f'past_thr={x[x > thr].mean():.3f}-mean: '
106 for x in k:
107 s += '%i,%i, ' % (round(x[0]), round(x[1]))
108 if verbose:
109 LOGGER.info(s[:-2])
110 return k
111
112 if isinstance(dataset, str): # *.yaml file
113 with open(dataset, errors='ignore') as f:
114 data_dict = yaml.safe_load(f) # model dict
115 from utils.dataloaders import LoadImagesAndLabels
116 dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
117
118 # Get label wh
119 shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
120 wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
121
122 # Filter
123 i = (wh0 < 3.0).any(1).sum()
124 if i:
125 LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size')
126 wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels
127 # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
128
129 # Kmeans init
130 try:
131 LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...')
132 assert n <= len(wh) # apply overdetermined constraint
133 s = wh.std(0) # sigmas for whitening
134 k = kmeans(wh / s, n, iter=30)[0] * s # points
135 assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar
136 except Exception:
137 LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init')
138 k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init
139 wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
140 k = print_results(k, verbose=False)
141
142 # Plot
143 # k, d = [None] * 20, [None] * 20
144 # for i in tqdm(range(1, 21)):
145 # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
146 # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
147 # ax = ax.ravel()
148 # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
149 # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
150 # ax[0].hist(wh[wh[:, 0]<100, 0],400)
151 # ax[1].hist(wh[wh[:, 1]<100, 1],400)
152 # fig.savefig('wh.png', dpi=200)
153
154 # Evolve
155 f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
156 pbar = tqdm(range(gen), bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
157 for _ in pbar:
158 v = np.ones(sh)
159 while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
160 v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
161 kg = (k.copy() * v).clip(min=2.0)
162 fg = anchor_fitness(kg)
163 if fg > f:
164 f, k = fg, kg.copy()
165 pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
166 if verbose:
167 print_results(k, verbose)
168
169 return print_results(k).astype(np.float32)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Auto-batch utils
4 """
5
6 from copy import deepcopy
7
8 import numpy as np
9 import torch
10
11 from utils.general import LOGGER, colorstr
12 from utils.torch_utils import profile
13
14
15 def check_train_batch_size(model, imgsz=640, amp=True):
16 # Check YOLOv5 training batch size
17 with torch.cuda.amp.autocast(amp):
18 return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size
19
20
21 def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
22 # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory
23 # Usage:
24 # import torch
25 # from utils.autobatch import autobatch
26 # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)
27 # print(autobatch(model))
28
29 # Check device
30 prefix = colorstr('AutoBatch: ')
31 LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}')
32 device = next(model.parameters()).device # get model device
33 if device.type == 'cpu':
34 LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
35 return batch_size
36 if torch.backends.cudnn.benchmark:
37 LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}')
38 return batch_size
39
40 # Inspect CUDA memory
41 gb = 1 << 30 # bytes to GiB (1024 ** 3)
42 d = str(device).upper() # 'CUDA:0'
43 properties = torch.cuda.get_device_properties(device) # device properties
44 t = properties.total_memory / gb # GiB total
45 r = torch.cuda.memory_reserved(device) / gb # GiB reserved
46 a = torch.cuda.memory_allocated(device) / gb # GiB allocated
47 f = t - (r + a) # GiB free
48 LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free')
49
50 # Profile batch sizes
51 batch_sizes = [1, 2, 4, 8, 16]
52 try:
53 img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
54 results = profile(img, model, n=3, device=device)
55 except Exception as e:
56 LOGGER.warning(f'{prefix}{e}')
57
58 # Fit a solution
59 y = [x[2] for x in results if x] # memory [2]
60 p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit
61 b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size)
62 if None in results: # some sizes failed
63 i = results.index(None) # first fail index
64 if b >= batch_sizes[i]: # y intercept above failure point
65 b = batch_sizes[max(i - 1, 0)] # select prior safe point
66 if b < 1 or b > 1024: # b outside of safe range
67 b = batch_size
68 LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
69
70 fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted
71 LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅')
72 return b
File mode changed
1 # AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/
2 # This script will run on every instance restart, not only on first start
3 # --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA ---
4
5 Content-Type: multipart/mixed; boundary="//"
6 MIME-Version: 1.0
7
8 --//
9 Content-Type: text/cloud-config; charset="us-ascii"
10 MIME-Version: 1.0
11 Content-Transfer-Encoding: 7bit
12 Content-Disposition: attachment; filename="cloud-config.txt"
13
14 #cloud-config
15 cloud_final_modules:
16 - [scripts-user, always]
17
18 --//
19 Content-Type: text/x-shellscript; charset="us-ascii"
20 MIME-Version: 1.0
21 Content-Transfer-Encoding: 7bit
22 Content-Disposition: attachment; filename="userdata.txt"
23
24 #!/bin/bash
25 # --- paste contents of userdata.sh here ---
26 --//
1 # Resume all interrupted trainings in yolov5/ dir including DDP trainings
2 # Usage: $ python utils/aws/resume.py
3
4 import os
5 import sys
6 from pathlib import Path
7
8 import torch
9 import yaml
10
11 FILE = Path(__file__).resolve()
12 ROOT = FILE.parents[2] # YOLOv5 root directory
13 if str(ROOT) not in sys.path:
14 sys.path.append(str(ROOT)) # add ROOT to PATH
15
16 port = 0 # --master_port
17 path = Path('').resolve()
18 for last in path.rglob('*/**/last.pt'):
19 ckpt = torch.load(last)
20 if ckpt['optimizer'] is None:
21 continue
22
23 # Load opt.yaml
24 with open(last.parent.parent / 'opt.yaml', errors='ignore') as f:
25 opt = yaml.safe_load(f)
26
27 # Get device count
28 d = opt['device'].split(',') # devices
29 nd = len(d) # number of devices
30 ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel
31
32 if ddp: # multi-GPU
33 port += 1
34 cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
35 else: # single-GPU
36 cmd = f'python train.py --resume {last}'
37
38 cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread
39 print(cmd)
40 os.system(cmd)
1 #!/bin/bash
2 # AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html
3 # This script will run only once on first instance start (for a re-start script see mime.sh)
4 # /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir
5 # Use >300 GB SSD
6
7 cd home/ubuntu
8 if [ ! -d yolov5 ]; then
9 echo "Running first-time script." # install dependencies, download COCO, pull Docker
10 git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5
11 cd yolov5
12 bash data/scripts/get_coco.sh && echo "COCO done." &
13 sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
14 python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
15 wait && echo "All tasks done." # finish background tasks
16 else
17 echo "Running re-start script." # resume interrupted runs
18 i=0
19 list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour'
20 while IFS= read -r id; do
21 ((i++))
22 echo "restarting container $i: $id"
23 sudo docker start $id
24 # sudo docker exec -it $id python train.py --resume # single-GPU
25 sudo docker exec -d $id python utils/aws/resume.py # multi-scenario
26 done <<<"$list"
27 fi
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Callback utils
4 """
5
6 import threading
7
8
9 class Callbacks:
10 """"
11 Handles all registered callbacks for YOLOv5 Hooks
12 """
13
14 def __init__(self):
15 # Define the available callbacks
16 self._callbacks = {
17 'on_pretrain_routine_start': [],
18 'on_pretrain_routine_end': [],
19 'on_train_start': [],
20 'on_train_epoch_start': [],
21 'on_train_batch_start': [],
22 'optimizer_step': [],
23 'on_before_zero_grad': [],
24 'on_train_batch_end': [],
25 'on_train_epoch_end': [],
26 'on_val_start': [],
27 'on_val_batch_start': [],
28 'on_val_image_end': [],
29 'on_val_batch_end': [],
30 'on_val_end': [],
31 'on_fit_epoch_end': [], # fit = train + val
32 'on_model_save': [],
33 'on_train_end': [],
34 'on_params_update': [],
35 'teardown': [],}
36 self.stop_training = False # set True to interrupt training
37
38 def register_action(self, hook, name='', callback=None):
39 """
40 Register a new action to a callback hook
41
42 Args:
43 hook: The callback hook name to register the action to
44 name: The name of the action for later reference
45 callback: The callback to fire
46 """
47 assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
48 assert callable(callback), f"callback '{callback}' is not callable"
49 self._callbacks[hook].append({'name': name, 'callback': callback})
50
51 def get_registered_actions(self, hook=None):
52 """"
53 Returns all the registered actions by callback hook
54
55 Args:
56 hook: The name of the hook to check, defaults to all
57 """
58 return self._callbacks[hook] if hook else self._callbacks
59
60 def run(self, hook, *args, thread=False, **kwargs):
61 """
62 Loop through the registered actions and fire all callbacks on main thread
63
64 Args:
65 hook: The name of the hook to check, defaults to all
66 args: Arguments to receive from YOLOv5
67 thread: (boolean) Run callbacks in daemon thread
68 kwargs: Keyword Arguments to receive from YOLOv5
69 """
70
71 assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
72 for logger in self._callbacks[hook]:
73 if thread:
74 threading.Thread(target=logger['callback'], args=args, kwargs=kwargs, daemon=True).start()
75 else:
76 logger['callback'](*args, **kwargs)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Dataloaders and dataset utils
4 """
5
6 import contextlib
7 import glob
8 import hashlib
9 import json
10 import math
11 import os
12 import random
13 import shutil
14 import time
15 from itertools import repeat
16 from multiprocessing.pool import Pool, ThreadPool
17 from pathlib import Path
18 from threading import Thread
19 from urllib.parse import urlparse
20 from zipfile import ZipFile
21
22 import numpy as np
23 import torch
24 import torch.nn.functional as F
25 import torchvision
26 import yaml
27 from PIL import ExifTags, Image, ImageOps
28 from torch.utils.data import DataLoader, Dataset, dataloader, distributed
29 from tqdm import tqdm
30
31 from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste,
32 cutout, letterbox, mixup, random_perspective)
33 from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
34 cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
35 from utils.torch_utils import torch_distributed_zero_first
36
37 # Parameters
38 HELP_URL = 'See https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
39 IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # include image suffixes
40 VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes
41 BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}' # tqdm bar format
42 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
43 RANK = int(os.getenv('RANK', -1))
44 PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders
45
46 # Get orientation exif tag
47 for orientation in ExifTags.TAGS.keys():
48 if ExifTags.TAGS[orientation] == 'Orientation':
49 break
50
51
52 def get_hash(paths):
53 # Returns a single hash value of a list of paths (files or dirs)
54 size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
55 h = hashlib.md5(str(size).encode()) # hash sizes
56 h.update(''.join(paths).encode()) # hash paths
57 return h.hexdigest() # return hash
58
59
60 def exif_size(img):
61 # Returns exif-corrected PIL size
62 s = img.size # (width, height)
63 with contextlib.suppress(Exception):
64 rotation = dict(img._getexif().items())[orientation]
65 if rotation in [6, 8]: # rotation 270 or 90
66 s = (s[1], s[0])
67 return s
68
69
70 def exif_transpose(image):
71 """
72 Transpose a PIL image accordingly if it has an EXIF Orientation tag.
73 Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose()
74
75 :param image: The image to transpose.
76 :return: An image.
77 """
78 exif = image.getexif()
79 orientation = exif.get(0x0112, 1) # default 1
80 if orientation > 1:
81 method = {
82 2: Image.FLIP_LEFT_RIGHT,
83 3: Image.ROTATE_180,
84 4: Image.FLIP_TOP_BOTTOM,
85 5: Image.TRANSPOSE,
86 6: Image.ROTATE_270,
87 7: Image.TRANSVERSE,
88 8: Image.ROTATE_90}.get(orientation)
89 if method is not None:
90 image = image.transpose(method)
91 del exif[0x0112]
92 image.info["exif"] = exif.tobytes()
93 return image
94
95
96 def seed_worker(worker_id):
97 # Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader
98 worker_seed = torch.initial_seed() % 2 ** 32
99 np.random.seed(worker_seed)
100 random.seed(worker_seed)
101
102
103 def create_dataloader(path,
104 imgsz,
105 batch_size,
106 stride,
107 single_cls=False,
108 hyp=None,
109 augment=False,
110 cache=False,
111 pad=0.0,
112 rect=False,
113 rank=-1,
114 workers=8,
115 image_weights=False,
116 quad=False,
117 prefix='',
118 shuffle=False):
119 if rect and shuffle:
120 LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
121 shuffle = False
122 with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
123 dataset = LoadImagesAndLabels(
124 path,
125 imgsz,
126 batch_size,
127 augment=augment, # augmentation
128 hyp=hyp, # hyperparameters
129 rect=rect, # rectangular batches
130 cache_images=cache,
131 single_cls=single_cls,
132 stride=int(stride),
133 pad=pad,
134 image_weights=image_weights,
135 prefix=prefix)
136
137 batch_size = min(batch_size, len(dataset))
138 nd = torch.cuda.device_count() # number of CUDA devices
139 nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers
140 sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
141 loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
142 generator = torch.Generator()
143 generator.manual_seed(6148914691236517205 + RANK)
144 return loader(dataset,
145 batch_size=batch_size,
146 shuffle=shuffle and sampler is None,
147 num_workers=nw,
148 sampler=sampler,
149 pin_memory=PIN_MEMORY,
150 collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
151 worker_init_fn=seed_worker,
152 generator=generator), dataset
153
154
155 class InfiniteDataLoader(dataloader.DataLoader):
156 """ Dataloader that reuses workers
157
158 Uses same syntax as vanilla DataLoader
159 """
160
161 def __init__(self, *args, **kwargs):
162 super().__init__(*args, **kwargs)
163 object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
164 self.iterator = super().__iter__()
165
166 def __len__(self):
167 return len(self.batch_sampler.sampler)
168
169 def __iter__(self):
170 for _ in range(len(self)):
171 yield next(self.iterator)
172
173
174 class _RepeatSampler:
175 """ Sampler that repeats forever
176
177 Args:
178 sampler (Sampler)
179 """
180
181 def __init__(self, sampler):
182 self.sampler = sampler
183
184 def __iter__(self):
185 while True:
186 yield from iter(self.sampler)
187
188
189 class LoadScreenshots:
190 # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
191 def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
192 # source = [screen_number left top width height] (pixels)
193 check_requirements('mss')
194 import mss
195
196 source, *params = source.split()
197 self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0
198 if len(params) == 1:
199 self.screen = int(params[0])
200 elif len(params) == 4:
201 left, top, width, height = (int(x) for x in params)
202 elif len(params) == 5:
203 self.screen, left, top, width, height = (int(x) for x in params)
204 self.img_size = img_size
205 self.stride = stride
206 self.transforms = transforms
207 self.auto = auto
208 self.mode = 'stream'
209 self.frame = 0
210 self.sct = mss.mss()
211
212 # Parse monitor shape
213 monitor = self.sct.monitors[self.screen]
214 self.top = monitor["top"] if top is None else (monitor["top"] + top)
215 self.left = monitor["left"] if left is None else (monitor["left"] + left)
216 self.width = width or monitor["width"]
217 self.height = height or monitor["height"]
218 self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
219
220 def __iter__(self):
221 return self
222
223 def __next__(self):
224 # mss screen capture: get raw pixels from the screen as np array
225 im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR
226 s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
227
228 if self.transforms:
229 im = self.transforms(im0) # transforms
230 else:
231 im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize
232 im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
233 im = np.ascontiguousarray(im) # contiguous
234 self.frame += 1
235 return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s
236
237
238 class LoadImages:
239 # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
240 def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
241 files = []
242 for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
243 p = str(Path(p).resolve())
244 if '*' in p:
245 files.extend(sorted(glob.glob(p, recursive=True))) # glob
246 elif os.path.isdir(p):
247 files.extend(sorted(glob.glob(os.path.join(p, '*.*')))) # dir
248 elif os.path.isfile(p):
249 files.append(p) # files
250 else:
251 raise FileNotFoundError(f'{p} does not exist')
252
253 images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
254 videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
255 ni, nv = len(images), len(videos)
256
257 self.img_size = img_size
258 self.stride = stride
259 self.files = images + videos
260 self.nf = ni + nv # number of files
261 self.video_flag = [False] * ni + [True] * nv
262 self.mode = 'image'
263 self.auto = auto
264 self.transforms = transforms # optional
265 self.vid_stride = vid_stride # video frame-rate stride
266 if any(videos):
267 self._new_video(videos[0]) # new video
268 else:
269 self.cap = None
270 assert self.nf > 0, f'No images or videos found in {p}. ' \
271 f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
272
273 def __iter__(self):
274 self.count = 0
275 return self
276
277 def __next__(self):
278 if self.count == self.nf:
279 raise StopIteration
280 path = self.files[self.count]
281
282 if self.video_flag[self.count]:
283 # Read video
284 self.mode = 'video'
285 for _ in range(self.vid_stride):
286 self.cap.grab()
287 ret_val, im0 = self.cap.retrieve()
288 while not ret_val:
289 self.count += 1
290 self.cap.release()
291 if self.count == self.nf: # last video
292 raise StopIteration
293 path = self.files[self.count]
294 self._new_video(path)
295 ret_val, im0 = self.cap.read()
296
297 self.frame += 1
298 # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False
299 s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
300
301 else:
302 # Read image
303 self.count += 1
304 im0 = cv2.imread(path) # BGR
305 assert im0 is not None, f'Image Not Found {path}'
306 s = f'image {self.count}/{self.nf} {path}: '
307
308 if self.transforms:
309 im = self.transforms(im0) # transforms
310 else:
311 im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize
312 im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
313 im = np.ascontiguousarray(im) # contiguous
314
315 return path, im, im0, self.cap, s
316
317 def _new_video(self, path):
318 # Create a new video capture object
319 self.frame = 0
320 self.cap = cv2.VideoCapture(path)
321 self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
322 self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees
323 # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493
324
325 def _cv2_rotate(self, im):
326 # Rotate a cv2 video manually
327 if self.orientation == 0:
328 return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
329 elif self.orientation == 180:
330 return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE)
331 elif self.orientation == 90:
332 return cv2.rotate(im, cv2.ROTATE_180)
333 return im
334
335 def __len__(self):
336 return self.nf # number of files
337
338
339 class LoadStreams:
340 # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`
341 def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
342 torch.backends.cudnn.benchmark = True # faster for fixed-size inference
343 self.mode = 'stream'
344 self.img_size = img_size
345 self.stride = stride
346 self.vid_stride = vid_stride # video frame-rate stride
347 sources = Path(sources).read_text().rsplit() if Path(sources).is_file() else [sources]
348 n = len(sources)
349 self.sources = [clean_str(x) for x in sources] # clean source names for later
350 self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
351 for i, s in enumerate(sources): # index, source
352 # Start thread to read frames from video stream
353 st = f'{i + 1}/{n}: {s}... '
354 if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video
355 check_requirements(('pafy', 'youtube_dl==2020.12.2'))
356 import pafy
357 s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL
358 s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
359 if s == 0:
360 assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.'
361 assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.'
362 cap = cv2.VideoCapture(s)
363 assert cap.isOpened(), f'{st}Failed to open {s}'
364 w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
365 h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
366 fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan
367 self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback
368 self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback
369
370 _, self.imgs[i] = cap.read() # guarantee first frame
371 self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True)
372 LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
373 self.threads[i].start()
374 LOGGER.info('') # newline
375
376 # check for common shapes
377 s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs])
378 self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
379 self.auto = auto and self.rect
380 self.transforms = transforms # optional
381 if not self.rect:
382 LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
383
384 def update(self, i, cap, stream):
385 # Read stream `i` frames in daemon thread
386 n, f = 0, self.frames[i] # frame number, frame array
387 while cap.isOpened() and n < f:
388 n += 1
389 cap.grab() # .read() = .grab() followed by .retrieve()
390 if n % self.vid_stride == 0:
391 success, im = cap.retrieve()
392 if success:
393 self.imgs[i] = im
394 else:
395 LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.')
396 self.imgs[i] = np.zeros_like(self.imgs[i])
397 cap.open(stream) # re-open stream if signal was lost
398 time.sleep(0.0) # wait time
399
400 def __iter__(self):
401 self.count = -1
402 return self
403
404 def __next__(self):
405 self.count += 1
406 if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
407 cv2.destroyAllWindows()
408 raise StopIteration
409
410 im0 = self.imgs.copy()
411 if self.transforms:
412 im = np.stack([self.transforms(x) for x in im0]) # transforms
413 else:
414 im = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0] for x in im0]) # resize
415 im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW
416 im = np.ascontiguousarray(im) # contiguous
417
418 return self.sources, im, im0, None, ''
419
420 def __len__(self):
421 return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
422
423
424 def img2label_paths(img_paths):
425 # Define label paths as a function of image paths
426 sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings
427 return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
428
429
430 class LoadImagesAndLabels(Dataset):
431 # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
432 cache_version = 0.6 # dataset labels *.cache version
433 rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
434
435 def __init__(self,
436 path,
437 img_size=640,
438 batch_size=16,
439 augment=False,
440 hyp=None,
441 rect=False,
442 image_weights=False,
443 cache_images=False,
444 single_cls=False,
445 stride=32,
446 pad=0.0,
447 prefix=''):
448 self.img_size = img_size
449 self.augment = augment
450 self.hyp = hyp
451 self.image_weights = image_weights
452 self.rect = False if image_weights else rect
453 self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
454 self.mosaic_border = [-img_size // 2, -img_size // 2]
455 self.stride = stride
456 self.path = path
457 self.albumentations = Albumentations(size=img_size) if augment else None
458
459 try:
460 f = [] # image files
461 for p in path if isinstance(path, list) else [path]:
462 p = Path(p) # os-agnostic
463 if p.is_dir(): # dir
464 f += glob.glob(str(p / '**' / '*.*'), recursive=True)
465 # f = list(p.rglob('*.*')) # pathlib
466 elif p.is_file(): # file
467 with open(p) as t:
468 t = t.read().strip().splitlines()
469 parent = str(p.parent) + os.sep
470 f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
471 # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
472 else:
473 raise FileNotFoundError(f'{prefix}{p} does not exist')
474 self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
475 # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib
476 assert self.im_files, f'{prefix}No images found'
477 except Exception as e:
478 raise Exception(f'{prefix}Error loading data from {path}: {e}\n{HELP_URL}')
479
480 # Check cache
481 self.label_files = img2label_paths(self.im_files) # labels
482 cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
483 try:
484 cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict
485 assert cache['version'] == self.cache_version # matches current version
486 assert cache['hash'] == get_hash(self.label_files + self.im_files) # identical hash
487 except Exception:
488 cache, exists = self.cache_labels(cache_path, prefix), False # run cache ops
489
490 # Display cache
491 nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total
492 if exists and LOCAL_RANK in {-1, 0}:
493 d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupt"
494 tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=BAR_FORMAT) # display cache results
495 if cache['msgs']:
496 LOGGER.info('\n'.join(cache['msgs'])) # display warnings
497 assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}'
498
499 # Read cache
500 [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items
501 labels, shapes, self.segments = zip(*cache.values())
502 nl = len(np.concatenate(labels, 0)) # number of labels
503 assert nl > 0 or not augment, f'{prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}'
504 self.labels = list(labels)
505 self.shapes = np.array(shapes)
506 self.im_files = list(cache.keys()) # update
507 self.label_files = img2label_paths(cache.keys()) # update
508 n = len(shapes) # number of images
509 bi = np.floor(np.arange(n) / batch_size).astype(int) # batch index
510 nb = bi[-1] + 1 # number of batches
511 self.batch = bi # batch index of image
512 self.n = n
513 self.indices = range(n)
514
515 # Update labels
516 include_class = [] # filter labels to include only these classes (optional)
517 include_class_array = np.array(include_class).reshape(1, -1)
518 for i, (label, segment) in enumerate(zip(self.labels, self.segments)):
519 if include_class:
520 j = (label[:, 0:1] == include_class_array).any(1)
521 self.labels[i] = label[j]
522 if segment:
523 self.segments[i] = segment[j]
524 if single_cls: # single-class training, merge all classes into 0
525 self.labels[i][:, 0] = 0
526 if segment:
527 self.segments[i][:, 0] = 0
528
529 # Rectangular Training
530 if self.rect:
531 # Sort by aspect ratio
532 s = self.shapes # wh
533 ar = s[:, 1] / s[:, 0] # aspect ratio
534 irect = ar.argsort()
535 self.im_files = [self.im_files[i] for i in irect]
536 self.label_files = [self.label_files[i] for i in irect]
537 self.labels = [self.labels[i] for i in irect]
538 self.segments = [self.segments[i] for i in irect]
539 self.shapes = s[irect] # wh
540 ar = ar[irect]
541
542 # Set training image shapes
543 shapes = [[1, 1]] * nb
544 for i in range(nb):
545 ari = ar[bi == i]
546 mini, maxi = ari.min(), ari.max()
547 if maxi < 1:
548 shapes[i] = [maxi, 1]
549 elif mini > 1:
550 shapes[i] = [1, 1 / mini]
551
552 self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride
553
554 # Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources)
555 self.ims = [None] * n
556 self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files]
557 if cache_images:
558 gb = 0 # Gigabytes of cached images
559 self.im_hw0, self.im_hw = [None] * n, [None] * n
560 fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image
561 results = ThreadPool(NUM_THREADS).imap(fcn, range(n))
562 pbar = tqdm(enumerate(results), total=n, bar_format=BAR_FORMAT, disable=LOCAL_RANK > 0)
563 for i, x in pbar:
564 if cache_images == 'disk':
565 gb += self.npy_files[i].stat().st_size
566 else: # 'ram'
567 self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
568 gb += self.ims[i].nbytes
569 pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
570 pbar.close()
571
572 def cache_labels(self, path=Path('./labels.cache'), prefix=''):
573 # Cache dataset labels, check images and read shapes
574 x = {} # dict
575 nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
576 desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
577 with Pool(NUM_THREADS) as pool:
578 pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix))),
579 desc=desc,
580 total=len(self.im_files),
581 bar_format=BAR_FORMAT)
582 for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
583 nm += nm_f
584 nf += nf_f
585 ne += ne_f
586 nc += nc_f
587 if im_file:
588 x[im_file] = [lb, shape, segments]
589 if msg:
590 msgs.append(msg)
591 pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupt"
592
593 pbar.close()
594 if msgs:
595 LOGGER.info('\n'.join(msgs))
596 if nf == 0:
597 LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}')
598 x['hash'] = get_hash(self.label_files + self.im_files)
599 x['results'] = nf, nm, ne, nc, len(self.im_files)
600 x['msgs'] = msgs # warnings
601 x['version'] = self.cache_version # cache version
602 try:
603 np.save(path, x) # save cache for next time
604 path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
605 LOGGER.info(f'{prefix}New cache created: {path}')
606 except Exception as e:
607 LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}') # not writeable
608 return x
609
610 def __len__(self):
611 return len(self.im_files)
612
613 # def __iter__(self):
614 # self.count = -1
615 # print('ran dataset iter')
616 # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
617 # return self
618
619 def __getitem__(self, index):
620 index = self.indices[index] # linear, shuffled, or image_weights
621
622 hyp = self.hyp
623 mosaic = self.mosaic and random.random() < hyp['mosaic']
624 if mosaic:
625 # Load mosaic
626 img, labels = self.load_mosaic(index)
627 shapes = None
628
629 # MixUp augmentation
630 if random.random() < hyp['mixup']:
631 img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1)))
632
633 else:
634 # Load image
635 img, (h0, w0), (h, w) = self.load_image(index)
636
637 # Letterbox
638 shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
639 img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
640 shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
641
642 labels = self.labels[index].copy()
643 if labels.size: # normalized xywh to pixel xyxy format
644 labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
645
646 if self.augment:
647 img, labels = random_perspective(img,
648 labels,
649 degrees=hyp['degrees'],
650 translate=hyp['translate'],
651 scale=hyp['scale'],
652 shear=hyp['shear'],
653 perspective=hyp['perspective'])
654
655 nl = len(labels) # number of labels
656 if nl:
657 labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
658
659 if self.augment:
660 # Albumentations
661 img, labels = self.albumentations(img, labels)
662 nl = len(labels) # update after albumentations
663
664 # HSV color-space
665 augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
666
667 # Flip up-down
668 if random.random() < hyp['flipud']:
669 img = np.flipud(img)
670 if nl:
671 labels[:, 2] = 1 - labels[:, 2]
672
673 # Flip left-right
674 if random.random() < hyp['fliplr']:
675 img = np.fliplr(img)
676 if nl:
677 labels[:, 1] = 1 - labels[:, 1]
678
679 # Cutouts
680 # labels = cutout(img, labels, p=0.5)
681 # nl = len(labels) # update after cutout
682
683 labels_out = torch.zeros((nl, 6))
684 if nl:
685 labels_out[:, 1:] = torch.from_numpy(labels)
686
687 # Convert
688 img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
689 img = np.ascontiguousarray(img)
690
691 return torch.from_numpy(img), labels_out, self.im_files[index], shapes
692
693 def load_image(self, i):
694 # Loads 1 image from dataset index 'i', returns (im, original hw, resized hw)
695 im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i],
696 if im is None: # not cached in RAM
697 if fn.exists(): # load npy
698 im = np.load(fn)
699 else: # read image
700 im = cv2.imread(f) # BGR
701 assert im is not None, f'Image Not Found {f}'
702 h0, w0 = im.shape[:2] # orig hw
703 r = self.img_size / max(h0, w0) # ratio
704 if r != 1: # if sizes are not equal
705 interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
706 im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp)
707 return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
708 return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized
709
710 def cache_images_to_disk(self, i):
711 # Saves an image as an *.npy file for faster loading
712 f = self.npy_files[i]
713 if not f.exists():
714 np.save(f.as_posix(), cv2.imread(self.im_files[i]))
715
716 def load_mosaic(self, index):
717 # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
718 labels4, segments4 = [], []
719 s = self.img_size
720 yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
721 indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
722 random.shuffle(indices)
723 for i, index in enumerate(indices):
724 # Load image
725 img, _, (h, w) = self.load_image(index)
726
727 # place img in img4
728 if i == 0: # top left
729 img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
730 x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
731 x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
732 elif i == 1: # top right
733 x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
734 x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
735 elif i == 2: # bottom left
736 x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
737 x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
738 elif i == 3: # bottom right
739 x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
740 x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
741
742 img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
743 padw = x1a - x1b
744 padh = y1a - y1b
745
746 # Labels
747 labels, segments = self.labels[index].copy(), self.segments[index].copy()
748 if labels.size:
749 labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
750 segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
751 labels4.append(labels)
752 segments4.extend(segments)
753
754 # Concat/clip labels
755 labels4 = np.concatenate(labels4, 0)
756 for x in (labels4[:, 1:], *segments4):
757 np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
758 # img4, labels4 = replicate(img4, labels4) # replicate
759
760 # Augment
761 img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
762 img4, labels4 = random_perspective(img4,
763 labels4,
764 segments4,
765 degrees=self.hyp['degrees'],
766 translate=self.hyp['translate'],
767 scale=self.hyp['scale'],
768 shear=self.hyp['shear'],
769 perspective=self.hyp['perspective'],
770 border=self.mosaic_border) # border to remove
771
772 return img4, labels4
773
774 def load_mosaic9(self, index):
775 # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
776 labels9, segments9 = [], []
777 s = self.img_size
778 indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
779 random.shuffle(indices)
780 hp, wp = -1, -1 # height, width previous
781 for i, index in enumerate(indices):
782 # Load image
783 img, _, (h, w) = self.load_image(index)
784
785 # place img in img9
786 if i == 0: # center
787 img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
788 h0, w0 = h, w
789 c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
790 elif i == 1: # top
791 c = s, s - h, s + w, s
792 elif i == 2: # top right
793 c = s + wp, s - h, s + wp + w, s
794 elif i == 3: # right
795 c = s + w0, s, s + w0 + w, s + h
796 elif i == 4: # bottom right
797 c = s + w0, s + hp, s + w0 + w, s + hp + h
798 elif i == 5: # bottom
799 c = s + w0 - w, s + h0, s + w0, s + h0 + h
800 elif i == 6: # bottom left
801 c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
802 elif i == 7: # left
803 c = s - w, s + h0 - h, s, s + h0
804 elif i == 8: # top left
805 c = s - w, s + h0 - hp - h, s, s + h0 - hp
806
807 padx, pady = c[:2]
808 x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords
809
810 # Labels
811 labels, segments = self.labels[index].copy(), self.segments[index].copy()
812 if labels.size:
813 labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format
814 segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
815 labels9.append(labels)
816 segments9.extend(segments)
817
818 # Image
819 img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
820 hp, wp = h, w # height, width previous
821
822 # Offset
823 yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border) # mosaic center x, y
824 img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
825
826 # Concat/clip labels
827 labels9 = np.concatenate(labels9, 0)
828 labels9[:, [1, 3]] -= xc
829 labels9[:, [2, 4]] -= yc
830 c = np.array([xc, yc]) # centers
831 segments9 = [x - c for x in segments9]
832
833 for x in (labels9[:, 1:], *segments9):
834 np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
835 # img9, labels9 = replicate(img9, labels9) # replicate
836
837 # Augment
838 img9, labels9 = random_perspective(img9,
839 labels9,
840 segments9,
841 degrees=self.hyp['degrees'],
842 translate=self.hyp['translate'],
843 scale=self.hyp['scale'],
844 shear=self.hyp['shear'],
845 perspective=self.hyp['perspective'],
846 border=self.mosaic_border) # border to remove
847
848 return img9, labels9
849
850 @staticmethod
851 def collate_fn(batch):
852 im, label, path, shapes = zip(*batch) # transposed
853 for i, lb in enumerate(label):
854 lb[:, 0] = i # add target image index for build_targets()
855 return torch.stack(im, 0), torch.cat(label, 0), path, shapes
856
857 @staticmethod
858 def collate_fn4(batch):
859 im, label, path, shapes = zip(*batch) # transposed
860 n = len(shapes) // 4
861 im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
862
863 ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
864 wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
865 s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale
866 for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW
867 i *= 4
868 if random.random() < 0.5:
869 im1 = F.interpolate(im[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear',
870 align_corners=False)[0].type(im[i].type())
871 lb = label[i]
872 else:
873 im1 = torch.cat((torch.cat((im[i], im[i + 1]), 1), torch.cat((im[i + 2], im[i + 3]), 1)), 2)
874 lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
875 im4.append(im1)
876 label4.append(lb)
877
878 for i, lb in enumerate(label4):
879 lb[:, 0] = i # add target image index for build_targets()
880
881 return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4
882
883
884 # Ancillary functions --------------------------------------------------------------------------------------------------
885 def flatten_recursive(path=DATASETS_DIR / 'coco128'):
886 # Flatten a recursive directory by bringing all files to top level
887 new_path = Path(f'{str(path)}_flat')
888 if os.path.exists(new_path):
889 shutil.rmtree(new_path) # delete output folder
890 os.makedirs(new_path) # make new output folder
891 for file in tqdm(glob.glob(f'{str(Path(path))}/**/*.*', recursive=True)):
892 shutil.copyfile(file, new_path / Path(file).name)
893
894
895 def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders import *; extract_boxes()
896 # Convert detection dataset into classification dataset, with one directory per class
897 path = Path(path) # images dir
898 shutil.rmtree(path / 'classification') if (path / 'classification').is_dir() else None # remove existing
899 files = list(path.rglob('*.*'))
900 n = len(files) # number of files
901 for im_file in tqdm(files, total=n):
902 if im_file.suffix[1:] in IMG_FORMATS:
903 # image
904 im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB
905 h, w = im.shape[:2]
906
907 # labels
908 lb_file = Path(img2label_paths([str(im_file)])[0])
909 if Path(lb_file).exists():
910 with open(lb_file) as f:
911 lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels
912
913 for j, x in enumerate(lb):
914 c = int(x[0]) # class
915 f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename
916 if not f.parent.is_dir():
917 f.parent.mkdir(parents=True)
918
919 b = x[1:] * [w, h, w, h] # box
920 # b[2:] = b[2:].max() # rectangle to square
921 b[2:] = b[2:] * 1.2 + 3 # pad
922 b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int)
923
924 b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
925 b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
926 assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
927
928
929 def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
930 """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
931 Usage: from utils.dataloaders import *; autosplit()
932 Arguments
933 path: Path to images directory
934 weights: Train, val, test weights (list, tuple)
935 annotated_only: Only use images with an annotated txt file
936 """
937 path = Path(path) # images dir
938 files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only
939 n = len(files) # number of files
940 random.seed(0) # for reproducibility
941 indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
942
943 txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
944 for x in txt:
945 if (path.parent / x).exists():
946 (path.parent / x).unlink() # remove existing
947
948 print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
949 for i, img in tqdm(zip(indices, files), total=n):
950 if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
951 with open(path.parent / txt[i], 'a') as f:
952 f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n') # add image to txt file
953
954
955 def verify_image_label(args):
956 # Verify one image-label pair
957 im_file, lb_file, prefix = args
958 nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments
959 try:
960 # verify images
961 im = Image.open(im_file)
962 im.verify() # PIL verify
963 shape = exif_size(im) # image size
964 assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
965 assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
966 if im.format.lower() in ('jpg', 'jpeg'):
967 with open(im_file, 'rb') as f:
968 f.seek(-2, 2)
969 if f.read() != b'\xff\xd9': # corrupt JPEG
970 ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
971 msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
972
973 # verify labels
974 if os.path.isfile(lb_file):
975 nf = 1 # label found
976 with open(lb_file) as f:
977 lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
978 if any(len(x) > 6 for x in lb): # is segment
979 classes = np.array([x[0] for x in lb], dtype=np.float32)
980 segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...)
981 lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
982 lb = np.array(lb, dtype=np.float32)
983 nl = len(lb)
984 if nl:
985 assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
986 assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
987 assert (lb[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
988 _, i = np.unique(lb, axis=0, return_index=True)
989 if len(i) < nl: # duplicate row check
990 lb = lb[i] # remove duplicates
991 if segments:
992 segments = [segments[x] for x in i]
993 msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
994 else:
995 ne = 1 # label empty
996 lb = np.zeros((0, 5), dtype=np.float32)
997 else:
998 nm = 1 # label missing
999 lb = np.zeros((0, 5), dtype=np.float32)
1000 return im_file, lb, shape, segments, nm, nf, ne, nc, msg
1001 except Exception as e:
1002 nc = 1
1003 msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}'
1004 return [None, None, None, None, nm, nf, ne, nc, msg]
1005
1006
1007 class HUBDatasetStats():
1008 """ Return dataset statistics dictionary with images and instances counts per split per class
1009 To run in parent directory: export PYTHONPATH="$PWD/yolov5"
1010 Usage1: from utils.dataloaders import *; HUBDatasetStats('coco128.yaml', autodownload=True)
1011 Usage2: from utils.dataloaders import *; HUBDatasetStats('path/to/coco128_with_yaml.zip')
1012 Arguments
1013 path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
1014 autodownload: Attempt to download dataset if not found locally
1015 """
1016
1017 def __init__(self, path='coco128.yaml', autodownload=False):
1018 # Initialize class
1019 zipped, data_dir, yaml_path = self._unzip(Path(path))
1020 try:
1021 with open(check_yaml(yaml_path), errors='ignore') as f:
1022 data = yaml.safe_load(f) # data dict
1023 if zipped:
1024 data['path'] = data_dir
1025 except Exception as e:
1026 raise Exception("error/HUB/dataset_stats/yaml_load") from e
1027
1028 check_dataset(data, autodownload) # download dataset if missing
1029 self.hub_dir = Path(data['path'] + '-hub')
1030 self.im_dir = self.hub_dir / 'images'
1031 self.im_dir.mkdir(parents=True, exist_ok=True) # makes /images
1032 self.stats = {'nc': data['nc'], 'names': list(data['names'].values())} # statistics dictionary
1033 self.data = data
1034
1035 @staticmethod
1036 def _find_yaml(dir):
1037 # Return data.yaml file
1038 files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml')) # try root level first and then recursive
1039 assert files, f'No *.yaml file found in {dir}'
1040 if len(files) > 1:
1041 files = [f for f in files if f.stem == dir.stem] # prefer *.yaml files that match dir name
1042 assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed'
1043 assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}'
1044 return files[0]
1045
1046 def _unzip(self, path):
1047 # Unzip data.zip
1048 if not str(path).endswith('.zip'): # path is data.yaml
1049 return False, None, path
1050 assert Path(path).is_file(), f'Error unzipping {path}, file not found'
1051 ZipFile(path).extractall(path=path.parent) # unzip
1052 dir = path.with_suffix('') # dataset directory == zip name
1053 assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
1054 return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path
1055
1056 def _hub_ops(self, f, max_dim=1920):
1057 # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
1058 f_new = self.im_dir / Path(f).name # dataset-hub image filename
1059 try: # use PIL
1060 im = Image.open(f)
1061 r = max_dim / max(im.height, im.width) # ratio
1062 if r < 1.0: # image too large
1063 im = im.resize((int(im.width * r), int(im.height * r)))
1064 im.save(f_new, 'JPEG', quality=50, optimize=True) # save
1065 except Exception as e: # use OpenCV
1066 LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
1067 im = cv2.imread(f)
1068 im_height, im_width = im.shape[:2]
1069 r = max_dim / max(im_height, im_width) # ratio
1070 if r < 1.0: # image too large
1071 im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
1072 cv2.imwrite(str(f_new), im)
1073
1074 def get_json(self, save=False, verbose=False):
1075 # Return dataset JSON for Ultralytics HUB
1076 def _round(labels):
1077 # Update labels to integer class and 6 decimal place floats
1078 return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels]
1079
1080 for split in 'train', 'val', 'test':
1081 if self.data.get(split) is None:
1082 self.stats[split] = None # i.e. no test set
1083 continue
1084 dataset = LoadImagesAndLabels(self.data[split]) # load dataset
1085 x = np.array([
1086 np.bincount(label[:, 0].astype(int), minlength=self.data['nc'])
1087 for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics')]) # shape(128x80)
1088 self.stats[split] = {
1089 'instance_stats': {
1090 'total': int(x.sum()),
1091 'per_class': x.sum(0).tolist()},
1092 'image_stats': {
1093 'total': dataset.n,
1094 'unlabelled': int(np.all(x == 0, 1).sum()),
1095 'per_class': (x > 0).sum(0).tolist()},
1096 'labels': [{
1097 str(Path(k).name): _round(v.tolist())} for k, v in zip(dataset.im_files, dataset.labels)]}
1098
1099 # Save, print and return
1100 if save:
1101 stats_path = self.hub_dir / 'stats.json'
1102 print(f'Saving {stats_path.resolve()}...')
1103 with open(stats_path, 'w') as f:
1104 json.dump(self.stats, f) # save stats.json
1105 if verbose:
1106 print(json.dumps(self.stats, indent=2, sort_keys=False))
1107 return self.stats
1108
1109 def process_images(self):
1110 # Compress images for Ultralytics HUB
1111 for split in 'train', 'val', 'test':
1112 if self.data.get(split) is None:
1113 continue
1114 dataset = LoadImagesAndLabels(self.data[split]) # load dataset
1115 desc = f'{split} images'
1116 for _ in tqdm(ThreadPool(NUM_THREADS).imap(self._hub_ops, dataset.im_files), total=dataset.n, desc=desc):
1117 pass
1118 print(f'Done. All images saved to {self.im_dir}')
1119 return self.im_dir
1120
1121
1122 # Classification dataloaders -------------------------------------------------------------------------------------------
1123 class ClassificationDataset(torchvision.datasets.ImageFolder):
1124 """
1125 YOLOv5 Classification Dataset.
1126 Arguments
1127 root: Dataset path
1128 transform: torchvision transforms, used by default
1129 album_transform: Albumentations transforms, used if installed
1130 """
1131
1132 def __init__(self, root, augment, imgsz, cache=False):
1133 super().__init__(root=root)
1134 self.torch_transforms = classify_transforms(imgsz)
1135 self.album_transforms = classify_albumentations(augment, imgsz) if augment else None
1136 self.cache_ram = cache is True or cache == 'ram'
1137 self.cache_disk = cache == 'disk'
1138 self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im
1139
1140 def __getitem__(self, i):
1141 f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image
1142 if self.cache_ram and im is None:
1143 im = self.samples[i][3] = cv2.imread(f)
1144 elif self.cache_disk:
1145 if not fn.exists(): # load npy
1146 np.save(fn.as_posix(), cv2.imread(f))
1147 im = np.load(fn)
1148 else: # read image
1149 im = cv2.imread(f) # BGR
1150 if self.album_transforms:
1151 sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))["image"]
1152 else:
1153 sample = self.torch_transforms(im)
1154 return sample, j
1155
1156
1157 def create_classification_dataloader(path,
1158 imgsz=224,
1159 batch_size=16,
1160 augment=True,
1161 cache=False,
1162 rank=-1,
1163 workers=8,
1164 shuffle=True):
1165 # Returns Dataloader object to be used with YOLOv5 Classifier
1166 with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
1167 dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache)
1168 batch_size = min(batch_size, len(dataset))
1169 nd = torch.cuda.device_count()
1170 nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])
1171 sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
1172 generator = torch.Generator()
1173 generator.manual_seed(6148914691236517205 + RANK)
1174 return InfiniteDataLoader(dataset,
1175 batch_size=batch_size,
1176 shuffle=shuffle and sampler is None,
1177 num_workers=nw,
1178 sampler=sampler,
1179 pin_memory=PIN_MEMORY,
1180 worker_init_fn=seed_worker,
1181 generator=generator) # or DataLoader(persistent_workers=True)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Builds ultralytics/yolov5:latest image on DockerHub https://hub.docker.com/r/ultralytics/yolov5
3 # Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference
4
5 # Start FROM NVIDIA PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
6 FROM nvcr.io/nvidia/pytorch:22.09-py3
7 RUN rm -rf /opt/pytorch # remove 1.2GB dir
8
9 # Downloads to user config dir
10 ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
11
12 # Install linux packages
13 RUN apt update && apt install --no-install-recommends -y zip htop screen libgl1-mesa-glx
14
15 # Install pip packages
16 COPY requirements.txt .
17 RUN python -m pip install --upgrade pip wheel
18 RUN pip uninstall -y Pillow torchtext torch torchvision
19 RUN pip install --no-cache -r requirements.txt albumentations wandb gsutil notebook Pillow>=9.1.0 \
20 'opencv-python<4.6.0.66' \
21 --extra-index-url https://download.pytorch.org/whl/cu113
22
23 # Create working directory
24 RUN mkdir -p /usr/src/app
25 WORKDIR /usr/src/app
26
27 # Copy contents
28 # COPY . /usr/src/app (issues as not a .git directory)
29 RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app
30
31 # Set environment variables
32 ENV OMP_NUM_THREADS=8
33
34
35 # Usage Examples -------------------------------------------------------------------------------------------------------
36
37 # Build and Push
38 # t=ultralytics/yolov5:latest && sudo docker build -f utils/docker/Dockerfile -t $t . && sudo docker push $t
39
40 # Pull and Run
41 # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all $t
42
43 # Pull and Run with local directory access
44 # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/datasets:/usr/src/datasets $t
45
46 # Kill all
47 # sudo docker kill $(sudo docker ps -q)
48
49 # Kill all image-based
50 # sudo docker kill $(sudo docker ps -qa --filter ancestor=ultralytics/yolov5:latest)
51
52 # DockerHub tag update
53 # t=ultralytics/yolov5:latest tnew=ultralytics/yolov5:v6.2 && sudo docker pull $t && sudo docker tag $t $tnew && sudo docker push $tnew
54
55 # Clean up
56 # docker system prune -a --volumes
57
58 # Update Ubuntu drivers
59 # https://www.maketecheasier.com/install-nvidia-drivers-ubuntu/
60
61 # DDP test
62 # python -m torch.distributed.run --nproc_per_node 2 --master_port 1 train.py --epochs 3
63
64 # GCP VM from Image
65 # docker.io/ultralytics/yolov5:latest
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Builds ultralytics/yolov5:latest-arm64 image on DockerHub https://hub.docker.com/r/ultralytics/yolov5
3 # Image is aarch64-compatible for Apple M1 and other ARM architectures i.e. Jetson Nano and Raspberry Pi
4
5 # Start FROM Ubuntu image https://hub.docker.com/_/ubuntu
6 FROM arm64v8/ubuntu:20.04
7
8 # Downloads to user config dir
9 ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
10
11 # Install linux packages
12 RUN apt update
13 RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y tzdata
14 RUN apt install --no-install-recommends -y python3-pip git zip curl htop gcc libgl1-mesa-glx libglib2.0-0 libpython3-dev
15 # RUN alias python=python3
16
17 # Install pip packages
18 COPY requirements.txt .
19 RUN python3 -m pip install --upgrade pip wheel
20 RUN pip install --no-cache -r requirements.txt gsutil notebook \
21 tensorflow-aarch64
22 # tensorflowjs \
23 # onnx onnx-simplifier onnxruntime \
24 # coremltools openvino-dev \
25
26 # Create working directory
27 RUN mkdir -p /usr/src/app
28 WORKDIR /usr/src/app
29
30 # Copy contents
31 # COPY . /usr/src/app (issues as not a .git directory)
32 RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app
33
34
35 # Usage Examples -------------------------------------------------------------------------------------------------------
36
37 # Build and Push
38 # t=ultralytics/yolov5:latest-M1 && sudo docker build --platform linux/arm64 -f utils/docker/Dockerfile-arm64 -t $t . && sudo docker push $t
39
40 # Pull and Run
41 # t=ultralytics/yolov5:latest-M1 && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 # Builds ultralytics/yolov5:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/yolov5
3 # Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLOv5 deployments
4
5 # Start FROM Ubuntu image https://hub.docker.com/_/ubuntu
6 FROM ubuntu:20.04
7
8 # Downloads to user config dir
9 ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
10
11 # Install linux packages
12 RUN apt update
13 RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y tzdata
14 RUN apt install --no-install-recommends -y python3-pip git zip curl htop libgl1-mesa-glx libglib2.0-0 libpython3-dev
15 # RUN alias python=python3
16
17 # Install pip packages
18 COPY requirements.txt .
19 RUN python3 -m pip install --upgrade pip wheel
20 RUN pip install --no-cache -r requirements.txt albumentations gsutil notebook \
21 coremltools onnx onnx-simplifier onnxruntime tensorflow-cpu tensorflowjs \
22 # openvino-dev \
23 --extra-index-url https://download.pytorch.org/whl/cpu
24
25 # Create working directory
26 RUN mkdir -p /usr/src/app
27 WORKDIR /usr/src/app
28
29 # Copy contents
30 # COPY . /usr/src/app (issues as not a .git directory)
31 RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app
32
33
34 # Usage Examples -------------------------------------------------------------------------------------------------------
35
36 # Build and Push
37 # t=ultralytics/yolov5:latest-cpu && sudo docker build -f utils/docker/Dockerfile-cpu -t $t . && sudo docker push $t
38
39 # Pull and Run
40 # t=ultralytics/yolov5:latest-cpu && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Download utils
4 """
5
6 import logging
7 import os
8 import platform
9 import subprocess
10 import time
11 import urllib
12 from pathlib import Path
13 from zipfile import ZipFile
14
15 import requests
16 import torch
17
18
19 def is_url(url, check=True):
20 # Check if string is URL and check if URL exists
21 try:
22 url = str(url)
23 result = urllib.parse.urlparse(url)
24 assert all([result.scheme, result.netloc]) # check if is url
25 return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online
26 except (AssertionError, urllib.request.HTTPError):
27 return False
28
29
30 def gsutil_getsize(url=''):
31 # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
32 s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
33 return eval(s.split(' ')[0]) if len(s) else 0 # bytes
34
35
36 def url_getsize(url='https://ultralytics.com/images/bus.jpg'):
37 # Return downloadable file size in bytes
38 response = requests.head(url, allow_redirects=True)
39 return int(response.headers.get('content-length', -1))
40
41
42 def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
43 # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
44 from utils.general import LOGGER
45
46 file = Path(file)
47 assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
48 try: # url1
49 LOGGER.info(f'Downloading {url} to {file}...')
50 torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
51 assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
52 except Exception as e: # url2
53 if file.exists():
54 file.unlink() # remove partial downloads
55 LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
56 os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
57 finally:
58 if not file.exists() or file.stat().st_size < min_bytes: # check
59 if file.exists():
60 file.unlink() # remove partial downloads
61 LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}")
62 LOGGER.info('')
63
64
65 def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
66 # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc.
67 from utils.general import LOGGER
68
69 def github_assets(repository, version='latest'):
70 # Return GitHub repo tag (i.e. 'v6.2') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...])
71 if version != 'latest':
72 version = f'tags/{version}' # i.e. tags/v6.2
73 response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api
74 return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets
75
76 file = Path(str(file).strip().replace("'", ''))
77 if not file.exists():
78 # URL specified
79 name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
80 if str(file).startswith(('http:/', 'https:/')): # download
81 url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
82 file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
83 if Path(file).is_file():
84 LOGGER.info(f'Found {url} locally at {file}') # file already exists
85 else:
86 safe_download(file=file, url=url, min_bytes=1E5)
87 return file
88
89 # GitHub assets
90 assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default
91 try:
92 tag, assets = github_assets(repo, release)
93 except Exception:
94 try:
95 tag, assets = github_assets(repo) # latest release
96 except Exception:
97 try:
98 tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
99 except Exception:
100 tag = release
101
102 file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
103 if name in assets:
104 url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl' # backup gdrive mirror
105 safe_download(
106 file,
107 url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
108 min_bytes=1E5,
109 error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
110
111 return str(file)
112
113
114 def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
115 # Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
116 t = time.time()
117 file = Path(file)
118 cookie = Path('cookie') # gdrive cookie
119 print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
120 if file.exists():
121 file.unlink() # remove existing file
122 if cookie.exists():
123 cookie.unlink() # remove existing cookie
124
125 # Attempt file download
126 out = "NUL" if platform.system() == "Windows" else "/dev/null"
127 os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
128 if os.path.exists('cookie'): # large file
129 s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
130 else: # small file
131 s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
132 r = os.system(s) # execute, capture return
133 if cookie.exists():
134 cookie.unlink() # remove existing cookie
135
136 # Error check
137 if r != 0:
138 if file.exists():
139 file.unlink() # remove partial
140 print('Download error ') # raise Exception('Download error')
141 return r
142
143 # Unzip if archive
144 if file.suffix == '.zip':
145 print('unzipping... ', end='')
146 ZipFile(file).extractall(path=file.parent) # unzip
147 file.unlink() # remove zip
148
149 print(f'Done ({time.time() - t:.1f}s)')
150 return r
151
152
153 def get_token(cookie="./cookie"):
154 with open(cookie) as f:
155 for line in f:
156 if "download" in line:
157 return line.split()[-1]
158 return ""
159
160
161 # Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
162 #
163 #
164 # def upload_blob(bucket_name, source_file_name, destination_blob_name):
165 # # Uploads a file to a bucket
166 # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
167 #
168 # storage_client = storage.Client()
169 # bucket = storage_client.get_bucket(bucket_name)
170 # blob = bucket.blob(destination_blob_name)
171 #
172 # blob.upload_from_filename(source_file_name)
173 #
174 # print('File {} uploaded to {}.'.format(
175 # source_file_name,
176 # destination_blob_name))
177 #
178 #
179 # def download_blob(bucket_name, source_blob_name, destination_file_name):
180 # # Uploads a blob from a bucket
181 # storage_client = storage.Client()
182 # bucket = storage_client.get_bucket(bucket_name)
183 # blob = bucket.blob(source_blob_name)
184 #
185 # blob.download_to_filename(destination_file_name)
186 #
187 # print('Blob {} downloaded to {}.'.format(
188 # source_blob_name,
189 # destination_file_name))
1 # Flask REST API
2
3 [REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are
4 commonly used to expose Machine Learning (ML) models to other services. This folder contains an example REST API
5 created using Flask to expose the YOLOv5s model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/).
6
7 ## Requirements
8
9 [Flask](https://palletsprojects.com/p/flask/) is required. Install with:
10
11 ```shell
12 $ pip install Flask
13 ```
14
15 ## Run
16
17 After Flask installation run:
18
19 ```shell
20 $ python3 restapi.py --port 5000
21 ```
22
23 Then use [curl](https://curl.se/) to perform a request:
24
25 ```shell
26 $ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s'
27 ```
28
29 The model inference results are returned as a JSON response:
30
31 ```json
32 [
33 {
34 "class": 0,
35 "confidence": 0.8900438547,
36 "height": 0.9318675399,
37 "name": "person",
38 "width": 0.3264600933,
39 "xcenter": 0.7438579798,
40 "ycenter": 0.5207948685
41 },
42 {
43 "class": 0,
44 "confidence": 0.8440024257,
45 "height": 0.7155083418,
46 "name": "person",
47 "width": 0.6546785235,
48 "xcenter": 0.427829951,
49 "ycenter": 0.6334488392
50 },
51 {
52 "class": 27,
53 "confidence": 0.3771208823,
54 "height": 0.3902671337,
55 "name": "tie",
56 "width": 0.0696444362,
57 "xcenter": 0.3675483763,
58 "ycenter": 0.7991207838
59 },
60 {
61 "class": 27,
62 "confidence": 0.3527112305,
63 "height": 0.1540903747,
64 "name": "tie",
65 "width": 0.0336618312,
66 "xcenter": 0.7814827561,
67 "ycenter": 0.5065554976
68 }
69 ]
70 ```
71
72 An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given
73 in `example_request.py`
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Perform test request
4 """
5
6 import pprint
7
8 import requests
9
10 DETECTION_URL = "http://localhost:5000/v1/object-detection/yolov5s"
11 IMAGE = "zidane.jpg"
12
13 # Read image
14 with open(IMAGE, "rb") as f:
15 image_data = f.read()
16
17 response = requests.post(DETECTION_URL, files={"image": image_data}).json()
18
19 pprint.pprint(response)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Run a Flask REST API exposing one or more YOLOv5s models
4 """
5
6 import argparse
7 import io
8
9 import torch
10 from flask import Flask, request
11 from PIL import Image
12
13 app = Flask(__name__)
14 models = {}
15
16 DETECTION_URL = "/v1/object-detection/<model>"
17
18
19 @app.route(DETECTION_URL, methods=["POST"])
20 def predict(model):
21 if request.method != "POST":
22 return
23
24 if request.files.get("image"):
25 # Method 1
26 # with request.files["image"] as f:
27 # im = Image.open(io.BytesIO(f.read()))
28
29 # Method 2
30 im_file = request.files["image"]
31 im_bytes = im_file.read()
32 im = Image.open(io.BytesIO(im_bytes))
33
34 if model in models:
35 results = models[model](im, size=640) # reduce size=320 for faster inference
36 return results.pandas().xyxy[0].to_json(orient="records")
37
38
39 if __name__ == "__main__":
40 parser = argparse.ArgumentParser(description="Flask API exposing YOLOv5 model")
41 parser.add_argument("--port", default=5000, type=int, help="port number")
42 parser.add_argument('--model', nargs='+', default=['yolov5s'], help='model(s) to run, i.e. --model yolov5n yolov5s')
43 opt = parser.parse_args()
44
45 for m in opt.model:
46 models[m] = torch.hub.load("ultralytics/yolov5", m, force_reload=True, skip_validation=True)
47
48 app.run(host="0.0.0.0", port=opt.port) # debug=True causes Restarting with stat
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 General utils
4 """
5
6 import contextlib
7 import glob
8 import inspect
9 import logging
10 import math
11 import os
12 import platform
13 import random
14 import re
15 import shutil
16 import signal
17 import sys
18 import time
19 import urllib
20 from copy import deepcopy
21 from datetime import datetime
22 from itertools import repeat
23 from multiprocessing.pool import ThreadPool
24 from pathlib import Path
25 from subprocess import check_output
26 from typing import Optional
27 from zipfile import ZipFile
28
29 import cv2
30 import numpy as np
31 import pandas as pd
32 import pkg_resources as pkg
33 import torch
34 import torchvision
35 import yaml
36
37 from utils import TryExcept, emojis
38 from utils.downloads import gsutil_getsize
39 from utils.metrics import box_iou, fitness
40
41 FILE = Path(__file__).resolve()
42 ROOT = FILE.parents[1] # YOLOv5 root directory
43 RANK = int(os.getenv('RANK', -1))
44
45 # Settings
46 NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
47 DATASETS_DIR = Path(os.getenv('YOLOv5_DATASETS_DIR', ROOT.parent / 'datasets')) # global datasets directory
48 AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode
49 VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
50 FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
51
52 torch.set_printoptions(linewidth=320, precision=5, profile='long')
53 np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
54 pd.options.display.max_columns = 10
55 cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
56 os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads
57 os.environ['OMP_NUM_THREADS'] = '1' if platform.system() == 'darwin' else str(NUM_THREADS) # OpenMP (PyTorch and SciPy)
58
59
60 def is_ascii(s=''):
61 # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)
62 s = str(s) # convert list, tuple, None, etc. to str
63 return len(s.encode().decode('ascii', 'ignore')) == len(s)
64
65
66 def is_chinese(s='人工智能'):
67 # Is string composed of any Chinese characters?
68 return bool(re.search('[\u4e00-\u9fff]', str(s)))
69
70
71 def is_colab():
72 # Is environment a Google Colab instance?
73 return 'COLAB_GPU' in os.environ
74
75
76 def is_kaggle():
77 # Is environment a Kaggle Notebook?
78 return os.environ.get('PWD') == '/kaggle/working' and os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com'
79
80
81 def is_docker() -> bool:
82 """Check if the process runs inside a docker container."""
83 if Path("/.dockerenv").exists():
84 return True
85 try: # check if docker is in control groups
86 with open("/proc/self/cgroup") as file:
87 return any("docker" in line for line in file)
88 except OSError:
89 return False
90
91
92 def is_writeable(dir, test=False):
93 # Return True if directory has write permissions, test opening a file with write permissions if test=True
94 if not test:
95 return os.access(dir, os.W_OK) # possible issues on Windows
96 file = Path(dir) / 'tmp.txt'
97 try:
98 with open(file, 'w'): # open file with write permissions
99 pass
100 file.unlink() # remove file
101 return True
102 except OSError:
103 return False
104
105
106 def set_logging(name=None, verbose=VERBOSE):
107 # Sets level and returns logger
108 if is_kaggle() or is_colab():
109 for h in logging.root.handlers:
110 logging.root.removeHandler(h) # remove all handlers associated with the root logger object
111 rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings
112 level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR
113 log = logging.getLogger(name)
114 log.setLevel(level)
115 handler = logging.StreamHandler()
116 handler.setFormatter(logging.Formatter("%(message)s"))
117 handler.setLevel(level)
118 log.addHandler(handler)
119
120
121 set_logging() # run before defining LOGGER
122 LOGGER = logging.getLogger("yolov5") # define globally (used in train.py, val.py, detect.py, etc.)
123 if platform.system() == 'Windows':
124 for fn in LOGGER.info, LOGGER.warning:
125 setattr(LOGGER, fn.__name__, lambda x: fn(emojis(x))) # emoji safe logging
126
127
128 def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'):
129 # Return path of user configuration directory. Prefer environment variable if exists. Make dir if required.
130 env = os.getenv(env_var)
131 if env:
132 path = Path(env) # use environment variable
133 else:
134 cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'} # 3 OS dirs
135 path = Path.home() / cfg.get(platform.system(), '') # OS-specific config dir
136 path = (path if is_writeable(path) else Path('/tmp')) / dir # GCP and AWS lambda fix, only /tmp is writeable
137 path.mkdir(exist_ok=True) # make if required
138 return path
139
140
141 CONFIG_DIR = user_config_dir() # Ultralytics settings dir
142
143
144 class Profile(contextlib.ContextDecorator):
145 # YOLOv5 Profile class. Usage: @Profile() decorator or 'with Profile():' context manager
146 def __init__(self, t=0.0):
147 self.t = t
148 self.cuda = torch.cuda.is_available()
149
150 def __enter__(self):
151 self.start = self.time()
152 return self
153
154 def __exit__(self, type, value, traceback):
155 self.dt = self.time() - self.start # delta-time
156 self.t += self.dt # accumulate dt
157
158 def time(self):
159 if self.cuda:
160 torch.cuda.synchronize()
161 return time.time()
162
163
164 class Timeout(contextlib.ContextDecorator):
165 # YOLOv5 Timeout class. Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager
166 def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True):
167 self.seconds = int(seconds)
168 self.timeout_message = timeout_msg
169 self.suppress = bool(suppress_timeout_errors)
170
171 def _timeout_handler(self, signum, frame):
172 raise TimeoutError(self.timeout_message)
173
174 def __enter__(self):
175 if platform.system() != 'Windows': # not supported on Windows
176 signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM
177 signal.alarm(self.seconds) # start countdown for SIGALRM to be raised
178
179 def __exit__(self, exc_type, exc_val, exc_tb):
180 if platform.system() != 'Windows':
181 signal.alarm(0) # Cancel SIGALRM if it's scheduled
182 if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError
183 return True
184
185
186 class WorkingDirectory(contextlib.ContextDecorator):
187 # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager
188 def __init__(self, new_dir):
189 self.dir = new_dir # new dir
190 self.cwd = Path.cwd().resolve() # current dir
191
192 def __enter__(self):
193 os.chdir(self.dir)
194
195 def __exit__(self, exc_type, exc_val, exc_tb):
196 os.chdir(self.cwd)
197
198
199 def methods(instance):
200 # Get class/instance methods
201 return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")]
202
203
204 def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
205 # Print function arguments (optional args dict)
206 x = inspect.currentframe().f_back # previous frame
207 file, _, func, _, _ = inspect.getframeinfo(x)
208 if args is None: # get args automatically
209 args, _, _, frm = inspect.getargvalues(x)
210 args = {k: v for k, v in frm.items() if k in args}
211 try:
212 file = Path(file).resolve().relative_to(ROOT).with_suffix('')
213 except ValueError:
214 file = Path(file).stem
215 s = (f'{file}: ' if show_file else '') + (f'{func}: ' if show_func else '')
216 LOGGER.info(colorstr(s) + ', '.join(f'{k}={v}' for k, v in args.items()))
217
218
219 def init_seeds(seed=0, deterministic=False):
220 # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html
221 random.seed(seed)
222 np.random.seed(seed)
223 torch.manual_seed(seed)
224 torch.cuda.manual_seed(seed)
225 torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe
226 # torch.backends.cudnn.benchmark = True # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287
227 if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213
228 torch.use_deterministic_algorithms(True)
229 torch.backends.cudnn.deterministic = True
230 os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
231 os.environ['PYTHONHASHSEED'] = str(seed)
232
233
234 def intersect_dicts(da, db, exclude=()):
235 # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
236 return {k: v for k, v in da.items() if k in db and all(x not in k for x in exclude) and v.shape == db[k].shape}
237
238
239 def get_default_args(func):
240 # Get func() default arguments
241 signature = inspect.signature(func)
242 return {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty}
243
244
245 def get_latest_run(search_dir='.'):
246 # Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
247 last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
248 return max(last_list, key=os.path.getctime) if last_list else ''
249
250
251 def file_age(path=__file__):
252 # Return days since last file update
253 dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta
254 return dt.days # + dt.seconds / 86400 # fractional days
255
256
257 def file_date(path=__file__):
258 # Return human-readable file modification date, i.e. '2021-3-26'
259 t = datetime.fromtimestamp(Path(path).stat().st_mtime)
260 return f'{t.year}-{t.month}-{t.day}'
261
262
263 def file_size(path):
264 # Return file/dir size (MB)
265 mb = 1 << 20 # bytes to MiB (1024 ** 2)
266 path = Path(path)
267 if path.is_file():
268 return path.stat().st_size / mb
269 elif path.is_dir():
270 return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb
271 else:
272 return 0.0
273
274
275 def check_online():
276 # Check internet connectivity
277 import socket
278 try:
279 socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility
280 return True
281 except OSError:
282 return False
283
284
285 def git_describe(path=ROOT): # path must be a directory
286 # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
287 try:
288 assert (Path(path) / '.git').is_dir()
289 return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1]
290 except Exception:
291 return ''
292
293
294 @TryExcept()
295 @WorkingDirectory(ROOT)
296 def check_git_status(repo='ultralytics/yolov5', branch='master'):
297 # YOLOv5 status check, recommend 'git pull' if code is out of date
298 url = f'https://github.com/{repo}'
299 msg = f', for updates see {url}'
300 s = colorstr('github: ') # string
301 assert Path('.git').exists(), s + 'skipping check (not a git repository)' + msg
302 assert check_online(), s + 'skipping check (offline)' + msg
303
304 splits = re.split(pattern=r'\s', string=check_output('git remote -v', shell=True).decode())
305 matches = [repo in s for s in splits]
306 if any(matches):
307 remote = splits[matches.index(True) - 1]
308 else:
309 remote = 'ultralytics'
310 check_output(f'git remote add {remote} {url}', shell=True)
311 check_output(f'git fetch {remote}', shell=True, timeout=5) # git fetch
312 local_branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out
313 n = int(check_output(f'git rev-list {local_branch}..{remote}/{branch} --count', shell=True)) # commits behind
314 if n > 0:
315 pull = 'git pull' if remote == 'origin' else f'git pull {remote} {branch}'
316 s += f"⚠️ YOLOv5 is out of date by {n} commit{'s' * (n > 1)}. Use `{pull}` or `git clone {url}` to update."
317 else:
318 s += f'up to date with {url} ✅'
319 LOGGER.info(s)
320
321
322 def check_python(minimum='3.7.0'):
323 # Check current python version vs. required python version
324 check_version(platform.python_version(), minimum, name='Python ', hard=True)
325
326
327 def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False, verbose=False):
328 # Check version vs. required version
329 current, minimum = (pkg.parse_version(x) for x in (current, minimum))
330 result = (current == minimum) if pinned else (current >= minimum) # bool
331 s = f'WARNING ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed' # string
332 if hard:
333 assert result, emojis(s) # assert min requirements met
334 if verbose and not result:
335 LOGGER.warning(s)
336 return result
337
338
339 @TryExcept()
340 def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=''):
341 # Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages or single package str)
342 prefix = colorstr('red', 'bold', 'requirements:')
343 check_python() # check python version
344 if isinstance(requirements, Path): # requirements.txt file
345 file = requirements.resolve()
346 assert file.exists(), f"{prefix} {file} not found, check failed."
347 with file.open() as f:
348 requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
349 elif isinstance(requirements, str):
350 requirements = [requirements]
351
352 s = ''
353 n = 0
354 for r in requirements:
355 try:
356 pkg.require(r)
357 except (pkg.VersionConflict, pkg.DistributionNotFound): # exception if requirements not met
358 s += f'"{r}" '
359 n += 1
360
361 if s and install and AUTOINSTALL: # check environment variable
362 LOGGER.info(f"{prefix} YOLOv5 requirement{'s' * (n > 1)} {s}not found, attempting AutoUpdate...")
363 try:
364 assert check_online(), "AutoUpdate skipped (offline)"
365 LOGGER.info(check_output(f'pip install {s} {cmds}', shell=True).decode())
366 source = file if 'file' in locals() else requirements
367 s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
368 f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
369 LOGGER.info(s)
370 except Exception as e:
371 LOGGER.warning(f'{prefix} ❌ {e}')
372
373
374 def check_img_size(imgsz, s=32, floor=0):
375 # Verify image size is a multiple of stride s in each dimension
376 if isinstance(imgsz, int): # integer i.e. img_size=640
377 new_size = max(make_divisible(imgsz, int(s)), floor)
378 else: # list i.e. img_size=[640, 480]
379 imgsz = list(imgsz) # convert to list if tuple
380 new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
381 if new_size != imgsz:
382 LOGGER.warning(f'WARNING ⚠️ --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
383 return new_size
384
385
386 def check_imshow():
387 # Check if environment supports image displays
388 try:
389 assert not is_docker(), 'cv2.imshow() is disabled in Docker environments'
390 assert not is_colab(), 'cv2.imshow() is disabled in Google Colab environments'
391 cv2.imshow('test', np.zeros((1, 1, 3)))
392 cv2.waitKey(1)
393 cv2.destroyAllWindows()
394 cv2.waitKey(1)
395 return True
396 except Exception as e:
397 LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
398 return False
399
400
401 def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):
402 # Check file(s) for acceptable suffix
403 if file and suffix:
404 if isinstance(suffix, str):
405 suffix = [suffix]
406 for f in file if isinstance(file, (list, tuple)) else [file]:
407 s = Path(f).suffix.lower() # file suffix
408 if len(s):
409 assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}"
410
411
412 def check_yaml(file, suffix=('.yaml', '.yml')):
413 # Search/download YAML file (if necessary) and return path, checking suffix
414 return check_file(file, suffix)
415
416
417 def check_file(file, suffix=''):
418 # Search/download file (if necessary) and return path
419 check_suffix(file, suffix) # optional
420 file = str(file) # convert to str()
421 if Path(file).is_file() or not file: # exists
422 return file
423 elif file.startswith(('http:/', 'https:/')): # download
424 url = file # warning: Pathlib turns :// -> :/
425 file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth
426 if Path(file).is_file():
427 LOGGER.info(f'Found {url} locally at {file}') # file already exists
428 else:
429 LOGGER.info(f'Downloading {url} to {file}...')
430 torch.hub.download_url_to_file(url, file)
431 assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
432 return file
433 elif file.startswith('clearml://'): # ClearML Dataset ID
434 assert 'clearml' in sys.modules, "ClearML is not installed, so cannot use ClearML dataset. Try running 'pip install clearml'."
435 return file
436 else: # search
437 files = []
438 for d in 'data', 'models', 'utils': # search directories
439 files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True)) # find file
440 assert len(files), f'File not found: {file}' # assert file was found
441 assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique
442 return files[0] # return file
443
444
445 def check_font(font=FONT, progress=False):
446 # Download font to CONFIG_DIR if necessary
447 font = Path(font)
448 file = CONFIG_DIR / font.name
449 if not font.exists() and not file.exists():
450 url = f'https://ultralytics.com/assets/{font.name}'
451 LOGGER.info(f'Downloading {url} to {file}...')
452 torch.hub.download_url_to_file(url, str(file), progress=progress)
453
454
455 def check_dataset(data, autodownload=True):
456 # Download, check and/or unzip dataset if not found locally
457
458 # Download (optional)
459 extract_dir = ''
460 if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
461 download(data, dir=f'{DATASETS_DIR}/{Path(data).stem}', unzip=True, delete=False, curl=False, threads=1)
462 data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml'))
463 extract_dir, autodownload = data.parent, False
464
465 # Read yaml (optional)
466 if isinstance(data, (str, Path)):
467 data = yaml_load(data) # dictionary
468
469 # Checks
470 for k in 'train', 'val', 'names':
471 assert k in data, f"data.yaml '{k}:' field missing ❌"
472 if isinstance(data['names'], (list, tuple)): # old array format
473 data['names'] = dict(enumerate(data['names'])) # convert to dict
474 data['nc'] = len(data['names'])
475
476 # Resolve paths
477 path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.'
478 if not path.is_absolute():
479 path = (ROOT / path).resolve()
480 for k in 'train', 'val', 'test':
481 if data.get(k): # prepend path
482 if isinstance(data[k], str):
483 x = (path / data[k]).resolve()
484 if not x.exists() and data[k].startswith('../'):
485 x = (path / data[k][3:]).resolve()
486 data[k] = str(x)
487 else:
488 data[k] = [str((path / x).resolve()) for x in data[k]]
489
490 # Parse yaml
491 train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
492 if val:
493 val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
494 if not all(x.exists() for x in val):
495 LOGGER.info('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()])
496 if not s or not autodownload:
497 raise Exception('Dataset not found ❌')
498 t = time.time()
499 if s.startswith('http') and s.endswith('.zip'): # URL
500 f = Path(s).name # filename
501 LOGGER.info(f'Downloading {s} to {f}...')
502 torch.hub.download_url_to_file(s, f)
503 Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root
504 ZipFile(f).extractall(path=DATASETS_DIR) # unzip
505 Path(f).unlink() # remove zip
506 r = None # success
507 elif s.startswith('bash '): # bash script
508 LOGGER.info(f'Running {s} ...')
509 r = os.system(s)
510 else: # python script
511 r = exec(s, {'yaml': data}) # return None
512 dt = f'({round(time.time() - t, 1)}s)'
513 s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌"
514 LOGGER.info(f"Dataset download {s}")
515 check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts
516 return data # dictionary
517
518
519 def check_amp(model):
520 # Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation
521 from models.common import AutoShape, DetectMultiBackend
522
523 def amp_allclose(model, im):
524 # All close FP32 vs AMP results
525 m = AutoShape(model, verbose=False) # model
526 a = m(im).xywhn[0] # FP32 inference
527 m.amp = True
528 b = m(im).xywhn[0] # AMP inference
529 return a.shape == b.shape and torch.allclose(a, b, atol=0.1) # close to 10% absolute tolerance
530
531 prefix = colorstr('AMP: ')
532 device = next(model.parameters()).device # get model device
533 if device.type in ('cpu', 'mps'):
534 return False # AMP only used on CUDA devices
535 f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check
536 im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3))
537 try:
538 assert amp_allclose(deepcopy(model), im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
539 LOGGER.info(f'{prefix}checks passed ✅')
540 return True
541 except Exception:
542 help_url = 'https://github.com/ultralytics/yolov5/issues/7908'
543 LOGGER.warning(f'{prefix}checks failed ❌, disabling Automatic Mixed Precision. See {help_url}')
544 return False
545
546
547 def yaml_load(file='data.yaml'):
548 # Single-line safe yaml loading
549 with open(file, errors='ignore') as f:
550 return yaml.safe_load(f)
551
552
553 def yaml_save(file='data.yaml', data={}):
554 # Single-line safe yaml saving
555 with open(file, 'w') as f:
556 yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False)
557
558
559 def url2file(url):
560 # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
561 url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/
562 return Path(urllib.parse.unquote(url)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth
563
564
565 def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry=3):
566 # Multithreaded file download and unzip function, used in data.yaml for autodownload
567 def download_one(url, dir):
568 # Download 1 file
569 success = True
570 if Path(url).is_file():
571 f = Path(url) # filename
572 else: # does not exist
573 f = dir / Path(url).name
574 LOGGER.info(f'Downloading {url} to {f}...')
575 for i in range(retry + 1):
576 if curl:
577 s = 'sS' if threads > 1 else '' # silent
578 r = os.system(
579 f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -') # curl download with retry, continue
580 success = r == 0
581 else:
582 torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download
583 success = f.is_file()
584 if success:
585 break
586 elif i < retry:
587 LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
588 else:
589 LOGGER.warning(f'❌ Failed to download {url}...')
590
591 if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
592 LOGGER.info(f'Unzipping {f}...')
593 if f.suffix == '.zip':
594 ZipFile(f).extractall(path=dir) # unzip
595 elif f.suffix == '.tar':
596 os.system(f'tar xf {f} --directory {f.parent}') # unzip
597 elif f.suffix == '.gz':
598 os.system(f'tar xfz {f} --directory {f.parent}') # unzip
599 if delete:
600 f.unlink() # remove zip
601
602 dir = Path(dir)
603 dir.mkdir(parents=True, exist_ok=True) # make directory
604 if threads > 1:
605 pool = ThreadPool(threads)
606 pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multithreaded
607 pool.close()
608 pool.join()
609 else:
610 for u in [url] if isinstance(url, (str, Path)) else url:
611 download_one(u, dir)
612
613
614 def make_divisible(x, divisor):
615 # Returns nearest x divisible by divisor
616 if isinstance(divisor, torch.Tensor):
617 divisor = int(divisor.max()) # to int
618 return math.ceil(x / divisor) * divisor
619
620
621 def clean_str(s):
622 # Cleans a string by replacing special characters with underscore _
623 return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
624
625
626 def one_cycle(y1=0.0, y2=1.0, steps=100):
627 # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
628 return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
629
630
631 def colorstr(*input):
632 # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
633 *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
634 colors = {
635 'black': '\033[30m', # basic colors
636 'red': '\033[31m',
637 'green': '\033[32m',
638 'yellow': '\033[33m',
639 'blue': '\033[34m',
640 'magenta': '\033[35m',
641 'cyan': '\033[36m',
642 'white': '\033[37m',
643 'bright_black': '\033[90m', # bright colors
644 'bright_red': '\033[91m',
645 'bright_green': '\033[92m',
646 'bright_yellow': '\033[93m',
647 'bright_blue': '\033[94m',
648 'bright_magenta': '\033[95m',
649 'bright_cyan': '\033[96m',
650 'bright_white': '\033[97m',
651 'end': '\033[0m', # misc
652 'bold': '\033[1m',
653 'underline': '\033[4m'}
654 return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
655
656
657 def labels_to_class_weights(labels, nc=80):
658 # Get class weights (inverse frequency) from training labels
659 if labels[0] is None: # no labels loaded
660 return torch.Tensor()
661
662 labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
663 classes = labels[:, 0].astype(int) # labels = [class xywh]
664 weights = np.bincount(classes, minlength=nc) # occurrences per class
665
666 # Prepend gridpoint count (for uCE training)
667 # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image
668 # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start
669
670 weights[weights == 0] = 1 # replace empty bins with 1
671 weights = 1 / weights # number of targets per class
672 weights /= weights.sum() # normalize
673 return torch.from_numpy(weights).float()
674
675
676 def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
677 # Produces image weights based on class_weights and image contents
678 # Usage: index = random.choices(range(n), weights=image_weights, k=1) # weighted image sample
679 class_counts = np.array([np.bincount(x[:, 0].astype(int), minlength=nc) for x in labels])
680 return (class_weights.reshape(1, nc) * class_counts).sum(1)
681
682
683 def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
684 # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
685 # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
686 # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
687 # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
688 # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
689 return [
690 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
691 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
692 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
693
694
695 def xyxy2xywh(x):
696 # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
697 y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
698 y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
699 y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
700 y[:, 2] = x[:, 2] - x[:, 0] # width
701 y[:, 3] = x[:, 3] - x[:, 1] # height
702 return y
703
704
705 def xywh2xyxy(x):
706 # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
707 y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
708 y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
709 y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
710 y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
711 y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
712 return y
713
714
715 def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
716 # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
717 y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
718 y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x
719 y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y
720 y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x
721 y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y
722 return y
723
724
725 def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
726 # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
727 if clip:
728 clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
729 y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
730 y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
731 y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
732 y[:, 2] = (x[:, 2] - x[:, 0]) / w # width
733 y[:, 3] = (x[:, 3] - x[:, 1]) / h # height
734 return y
735
736
737 def xyn2xy(x, w=640, h=640, padw=0, padh=0):
738 # Convert normalized segments into pixel segments, shape (n,2)
739 y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
740 y[:, 0] = w * x[:, 0] + padw # top left x
741 y[:, 1] = h * x[:, 1] + padh # top left y
742 return y
743
744
745 def segment2box(segment, width=640, height=640):
746 # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
747 x, y = segment.T # segment xy
748 inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
749 x, y, = x[inside], y[inside]
750 return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy
751
752
753 def segments2boxes(segments):
754 # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
755 boxes = []
756 for s in segments:
757 x, y = s.T # segment xy
758 boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
759 return xyxy2xywh(np.array(boxes)) # cls, xywh
760
761
762 def resample_segments(segments, n=1000):
763 # Up-sample an (n,2) segment
764 for i, s in enumerate(segments):
765 s = np.concatenate((s, s[0:1, :]), axis=0)
766 x = np.linspace(0, len(s) - 1, n)
767 xp = np.arange(len(s))
768 segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy
769 return segments
770
771
772 def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
773 # Rescale boxes (xyxy) from img1_shape to img0_shape
774 if ratio_pad is None: # calculate from img0_shape
775 gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
776 pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
777 else:
778 gain = ratio_pad[0][0]
779 pad = ratio_pad[1]
780
781 boxes[:, [0, 2]] -= pad[0] # x padding
782 boxes[:, [1, 3]] -= pad[1] # y padding
783 boxes[:, :4] /= gain
784 clip_boxes(boxes, img0_shape)
785 return boxes
786
787
788 def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None):
789 # Rescale coords (xyxy) from img1_shape to img0_shape
790 if ratio_pad is None: # calculate from img0_shape
791 gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
792 pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
793 else:
794 gain = ratio_pad[0][0]
795 pad = ratio_pad[1]
796
797 segments[:, 0] -= pad[0] # x padding
798 segments[:, 1] -= pad[1] # y padding
799 segments /= gain
800 clip_segments(segments, img0_shape)
801 return segments
802
803
804 def clip_boxes(boxes, shape):
805 # Clip boxes (xyxy) to image shape (height, width)
806 if isinstance(boxes, torch.Tensor): # faster individually
807 boxes[:, 0].clamp_(0, shape[1]) # x1
808 boxes[:, 1].clamp_(0, shape[0]) # y1
809 boxes[:, 2].clamp_(0, shape[1]) # x2
810 boxes[:, 3].clamp_(0, shape[0]) # y2
811 else: # np.array (faster grouped)
812 boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
813 boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
814
815
816 def clip_segments(boxes, shape):
817 # Clip segments (xy1,xy2,...) to image shape (height, width)
818 if isinstance(boxes, torch.Tensor): # faster individually
819 boxes[:, 0].clamp_(0, shape[1]) # x
820 boxes[:, 1].clamp_(0, shape[0]) # y
821 else: # np.array (faster grouped)
822 boxes[:, 0] = boxes[:, 0].clip(0, shape[1]) # x
823 boxes[:, 1] = boxes[:, 1].clip(0, shape[0]) # y
824
825
826 def non_max_suppression(
827 prediction,
828 conf_thres=0.25,
829 iou_thres=0.45,
830 classes=None,
831 agnostic=False,
832 multi_label=False,
833 labels=(),
834 max_det=300,
835 nm=0, # number of masks
836 ):
837 """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
838
839 Returns:
840 list of detections, on (n,6) tensor per image [xyxy, conf, cls]
841 """
842
843 if isinstance(prediction, (list, tuple)): # YOLOv5 model in validation model, output = (inference_out, loss_out)
844 prediction = prediction[0] # select only inference output
845
846 device = prediction.device
847 mps = 'mps' in device.type # Apple MPS
848 if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
849 prediction = prediction.cpu()
850 bs = prediction.shape[0] # batch size
851 nc = prediction.shape[2] - nm - 5 # number of classes
852 xc = prediction[..., 4] > conf_thres # candidates
853
854 # Checks
855 assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
856 assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
857
858 # Settings
859 # min_wh = 2 # (pixels) minimum box width and height
860 max_wh = 7680 # (pixels) maximum box width and height
861 max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
862 time_limit = 0.5 + 0.05 * bs # seconds to quit after
863 redundant = True # require redundant detections
864 multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
865 merge = False # use merge-NMS
866
867 t = time.time()
868 mi = 5 + nc # mask start index
869 output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
870 for xi, x in enumerate(prediction): # image index, image inference
871 # Apply constraints
872 # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
873 x = x[xc[xi]] # confidence
874
875 # Cat apriori labels if autolabelling
876 if labels and len(labels[xi]):
877 lb = labels[xi]
878 v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
879 v[:, :4] = lb[:, 1:5] # box
880 v[:, 4] = 1.0 # conf
881 v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
882 x = torch.cat((x, v), 0)
883
884 # If none remain process next image
885 if not x.shape[0]:
886 continue
887
888 # Compute conf
889 x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
890
891 # Box/Mask
892 box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)
893 mask = x[:, mi:] # zero columns if no masks
894
895 # Detections matrix nx6 (xyxy, conf, cls)
896 if multi_label:
897 i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
898 x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
899 else: # best class only
900 conf, j = x[:, 5:mi].max(1, keepdim=True)
901 x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
902
903 # Filter by class
904 if classes is not None:
905 x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
906
907 # Apply finite constraint
908 # if not torch.isfinite(x).all():
909 # x = x[torch.isfinite(x).all(1)]
910
911 # Check shape
912 n = x.shape[0] # number of boxes
913 if not n: # no boxes
914 continue
915 elif n > max_nms: # excess boxes
916 x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
917 else:
918 x = x[x[:, 4].argsort(descending=True)] # sort by confidence
919
920 # Batched NMS
921 c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
922 boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
923 i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
924 if i.shape[0] > max_det: # limit detections
925 i = i[:max_det]
926 if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
927 # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
928 iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
929 weights = iou * scores[None] # box weights
930 x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
931 if redundant:
932 i = i[iou.sum(1) > 1] # require redundancy
933
934 output[xi] = x[i]
935 if mps:
936 output[xi] = output[xi].to(device)
937 if (time.time() - t) > time_limit:
938 LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
939 break # time limit exceeded
940
941 return output
942
943
944 def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer()
945 # Strip optimizer from 'f' to finalize training, optionally save as 's'
946 x = torch.load(f, map_location=torch.device('cpu'))
947 if x.get('ema'):
948 x['model'] = x['ema'] # replace model with ema
949 for k in 'optimizer', 'best_fitness', 'wandb_id', 'ema', 'updates': # keys
950 x[k] = None
951 x['epoch'] = -1
952 x['model'].half() # to FP16
953 for p in x['model'].parameters():
954 p.requires_grad = False
955 torch.save(x, s or f)
956 mb = os.path.getsize(s or f) / 1E6 # filesize
957 LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")
958
959
960 def print_mutation(keys, results, hyp, save_dir, bucket, prefix=colorstr('evolve: ')):
961 evolve_csv = save_dir / 'evolve.csv'
962 evolve_yaml = save_dir / 'hyp_evolve.yaml'
963 keys = tuple(keys) + tuple(hyp.keys()) # [results + hyps]
964 keys = tuple(x.strip() for x in keys)
965 vals = results + tuple(hyp.values())
966 n = len(keys)
967
968 # Download (optional)
969 if bucket:
970 url = f'gs://{bucket}/evolve.csv'
971 if gsutil_getsize(url) > (evolve_csv.stat().st_size if evolve_csv.exists() else 0):
972 os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local
973
974 # Log to evolve.csv
975 s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header
976 with open(evolve_csv, 'a') as f:
977 f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n')
978
979 # Save yaml
980 with open(evolve_yaml, 'w') as f:
981 data = pd.read_csv(evolve_csv)
982 data = data.rename(columns=lambda x: x.strip()) # strip keys
983 i = np.argmax(fitness(data.values[:, :4])) #
984 generations = len(data)
985 f.write('# YOLOv5 Hyperparameter Evolution Results\n' + f'# Best generation: {i}\n' +
986 f'# Last generation: {generations - 1}\n' + '# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) +
987 '\n' + '# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n')
988 yaml.safe_dump(data.loc[i][7:].to_dict(), f, sort_keys=False)
989
990 # Print to screen
991 LOGGER.info(prefix + f'{generations} generations finished, current result:\n' + prefix +
992 ', '.join(f'{x.strip():>20s}' for x in keys) + '\n' + prefix + ', '.join(f'{x:20.5g}'
993 for x in vals) + '\n\n')
994
995 if bucket:
996 os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload
997
998
999 def apply_classifier(x, model, img, im0):
1000 # Apply a second stage classifier to YOLO outputs
1001 # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval()
1002 im0 = [im0] if isinstance(im0, np.ndarray) else im0
1003 for i, d in enumerate(x): # per image
1004 if d is not None and len(d):
1005 d = d.clone()
1006
1007 # Reshape and pad cutouts
1008 b = xyxy2xywh(d[:, :4]) # boxes
1009 b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square
1010 b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad
1011 d[:, :4] = xywh2xyxy(b).long()
1012
1013 # Rescale boxes from img_size to im0 size
1014 scale_boxes(img.shape[2:], d[:, :4], im0[i].shape)
1015
1016 # Classes
1017 pred_cls1 = d[:, 5].long()
1018 ims = []
1019 for a in d:
1020 cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
1021 im = cv2.resize(cutout, (224, 224)) # BGR
1022
1023 im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
1024 im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32
1025 im /= 255 # 0 - 255 to 0.0 - 1.0
1026 ims.append(im)
1027
1028 pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction
1029 x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections
1030
1031 return x
1032
1033
1034 def increment_path(path, exist_ok=False, sep='', mkdir=False):
1035 # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
1036 path = Path(path) # os-agnostic
1037 if path.exists() and not exist_ok:
1038 path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
1039
1040 # Method 1
1041 for n in range(2, 9999):
1042 p = f'{path}{sep}{n}{suffix}' # increment path
1043 if not os.path.exists(p): #
1044 break
1045 path = Path(p)
1046
1047 # Method 2 (deprecated)
1048 # dirs = glob.glob(f"{path}{sep}*") # similar paths
1049 # matches = [re.search(rf"{path.stem}{sep}(\d+)", d) for d in dirs]
1050 # i = [int(m.groups()[0]) for m in matches if m] # indices
1051 # n = max(i) + 1 if i else 2 # increment number
1052 # path = Path(f"{path}{sep}{n}{suffix}") # increment path
1053
1054 if mkdir:
1055 path.mkdir(parents=True, exist_ok=True) # make directory
1056
1057 return path
1058
1059
1060 # OpenCV Chinese-friendly functions ------------------------------------------------------------------------------------
1061 imshow_ = cv2.imshow # copy to avoid recursion errors
1062
1063
1064 def imread(path, flags=cv2.IMREAD_COLOR):
1065 return cv2.imdecode(np.fromfile(path, np.uint8), flags)
1066
1067
1068 def imwrite(path, im):
1069 try:
1070 cv2.imencode(Path(path).suffix, im)[1].tofile(path)
1071 return True
1072 except Exception:
1073 return False
1074
1075
1076 def imshow(path, im):
1077 imshow_(path.encode('unicode_escape').decode(), im)
1078
1079
1080 cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow # redefine
1081
1082 # Variables ------------------------------------------------------------------------------------------------------------
1083 NCOLS = 0 if is_docker() else shutil.get_terminal_size().columns # terminal window size for tqdm
1 FROM gcr.io/google-appengine/python
2
3 # Create a virtualenv for dependencies. This isolates these packages from
4 # system-level packages.
5 # Use -p python3 or -p python3.7 to select python version. Default is version 2.
6 RUN virtualenv /env -p python3
7
8 # Setting these environment variables are the same as running
9 # source /env/bin/activate.
10 ENV VIRTUAL_ENV /env
11 ENV PATH /env/bin:$PATH
12
13 RUN apt-get update && apt-get install -y python-opencv
14
15 # Copy the application's requirements.txt and run pip to install all
16 # dependencies into the virtualenv.
17 ADD requirements.txt /app/requirements.txt
18 RUN pip install -r /app/requirements.txt
19
20 # Add the application source code.
21 ADD . /app
22
23 # Run a WSGI server to serve the application. gunicorn must be declared as
24 # a dependency in requirements.txt.
25 CMD gunicorn -b :$PORT main:app
1 # add these requirements in your app on top of the existing ones
2 pip==21.1
3 Flask==1.0.2
4 gunicorn==19.9.0
1 runtime: custom
2 env: flex
3
4 service: yolov5app
5
6 liveness_check:
7 initial_delay_sec: 600
8
9 manual_scaling:
10 instances: 1
11 resources:
12 cpu: 1
13 memory_gb: 4
14 disk_size_gb: 20
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Logging utils
4 """
5
6 import os
7 import warnings
8 from pathlib import Path
9
10 import pkg_resources as pkg
11 import torch
12 from torch.utils.tensorboard import SummaryWriter
13
14 from utils.general import LOGGER, colorstr, cv2
15 from utils.loggers.clearml.clearml_utils import ClearmlLogger
16 from utils.loggers.wandb.wandb_utils import WandbLogger
17 from utils.plots import plot_images, plot_labels, plot_results
18 from utils.torch_utils import de_parallel
19
20 LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet') # *.csv, TensorBoard, Weights & Biases, ClearML
21 RANK = int(os.getenv('RANK', -1))
22
23 try:
24 import wandb
25
26 assert hasattr(wandb, '__version__') # verify package import not local dir
27 if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in {0, -1}:
28 try:
29 wandb_login_success = wandb.login(timeout=30)
30 except wandb.errors.UsageError: # known non-TTY terminal issue
31 wandb_login_success = False
32 if not wandb_login_success:
33 wandb = None
34 except (ImportError, AssertionError):
35 wandb = None
36
37 try:
38 import clearml
39
40 assert hasattr(clearml, '__version__') # verify package import not local dir
41 except (ImportError, AssertionError):
42 clearml = None
43
44 try:
45 if RANK not in [0, -1]:
46 comet_ml = None
47 else:
48 import comet_ml
49
50 assert hasattr(comet_ml, '__version__') # verify package import not local dir
51 from utils.loggers.comet import CometLogger
52
53 except (ModuleNotFoundError, ImportError, AssertionError):
54 comet_ml = None
55
56
57 class Loggers():
58 # YOLOv5 Loggers class
59 def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
60 self.save_dir = save_dir
61 self.weights = weights
62 self.opt = opt
63 self.hyp = hyp
64 self.plots = not opt.noplots # plot results
65 self.logger = logger # for printing results to console
66 self.include = include
67 self.keys = [
68 'train/box_loss',
69 'train/obj_loss',
70 'train/cls_loss', # train loss
71 'metrics/precision',
72 'metrics/recall',
73 'metrics/mAP_0.5',
74 'metrics/mAP_0.5:0.95', # metrics
75 'val/box_loss',
76 'val/obj_loss',
77 'val/cls_loss', # val loss
78 'x/lr0',
79 'x/lr1',
80 'x/lr2'] # params
81 self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95']
82 for k in LOGGERS:
83 setattr(self, k, None) # init empty logger dictionary
84 self.csv = True # always log to csv
85
86 # Messages
87 if not wandb:
88 prefix = colorstr('Weights & Biases: ')
89 s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs in Weights & Biases"
90 self.logger.info(s)
91 if not clearml:
92 prefix = colorstr('ClearML: ')
93 s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML"
94 self.logger.info(s)
95 if not comet_ml:
96 prefix = colorstr('Comet: ')
97 s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLOv5 🚀 runs in Comet"
98 self.logger.info(s)
99 # TensorBoard
100 s = self.save_dir
101 if 'tb' in self.include and not self.opt.evolve:
102 prefix = colorstr('TensorBoard: ')
103 self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
104 self.tb = SummaryWriter(str(s))
105
106 # W&B
107 if wandb and 'wandb' in self.include:
108 wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://')
109 run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None
110 self.opt.hyp = self.hyp # add hyperparameters
111 self.wandb = WandbLogger(self.opt, run_id)
112 # temp warn. because nested artifacts not supported after 0.12.10
113 if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.11'):
114 s = "YOLOv5 temporarily requires wandb version 0.12.10 or below. Some features may not work as expected."
115 self.logger.warning(s)
116 else:
117 self.wandb = None
118
119 # ClearML
120 if clearml and 'clearml' in self.include:
121 self.clearml = ClearmlLogger(self.opt, self.hyp)
122 else:
123 self.clearml = None
124
125 # Comet
126 if comet_ml and 'comet' in self.include:
127 if isinstance(self.opt.resume, str) and self.opt.resume.startswith("comet://"):
128 run_id = self.opt.resume.split("/")[-1]
129 self.comet_logger = CometLogger(self.opt, self.hyp, run_id=run_id)
130
131 else:
132 self.comet_logger = CometLogger(self.opt, self.hyp)
133
134 else:
135 self.comet_logger = None
136
137 @property
138 def remote_dataset(self):
139 # Get data_dict if custom dataset artifact link is provided
140 data_dict = None
141 if self.clearml:
142 data_dict = self.clearml.data_dict
143 if self.wandb:
144 data_dict = self.wandb.data_dict
145 if self.comet_logger:
146 data_dict = self.comet_logger.data_dict
147
148 return data_dict
149
150 def on_train_start(self):
151 if self.comet_logger:
152 self.comet_logger.on_train_start()
153
154 def on_pretrain_routine_start(self):
155 if self.comet_logger:
156 self.comet_logger.on_pretrain_routine_start()
157
158 def on_pretrain_routine_end(self, labels, names):
159 # Callback runs on pre-train routine end
160 if self.plots:
161 plot_labels(labels, names, self.save_dir)
162 paths = self.save_dir.glob('*labels*.jpg') # training labels
163 if self.wandb:
164 self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
165 # if self.clearml:
166 # pass # ClearML saves these images automatically using hooks
167 if self.comet_logger:
168 self.comet_logger.on_pretrain_routine_end(paths)
169
170 def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
171 log_dict = dict(zip(self.keys[0:3], vals))
172 # Callback runs on train batch end
173 # ni: number integrated batches (since train start)
174 if self.plots:
175 if ni < 3:
176 f = self.save_dir / f'train_batch{ni}.jpg' # filename
177 plot_images(imgs, targets, paths, f)
178 if ni == 0 and self.tb and not self.opt.sync_bn:
179 log_tensorboard_graph(self.tb, model, imgsz=(self.opt.imgsz, self.opt.imgsz))
180 if ni == 10 and (self.wandb or self.clearml):
181 files = sorted(self.save_dir.glob('train*.jpg'))
182 if self.wandb:
183 self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
184 if self.clearml:
185 self.clearml.log_debug_samples(files, title='Mosaics')
186
187 if self.comet_logger:
188 self.comet_logger.on_train_batch_end(log_dict, step=ni)
189
190 def on_train_epoch_end(self, epoch):
191 # Callback runs on train epoch end
192 if self.wandb:
193 self.wandb.current_epoch = epoch + 1
194
195 if self.comet_logger:
196 self.comet_logger.on_train_epoch_end(epoch)
197
198 def on_val_start(self):
199 if self.comet_logger:
200 self.comet_logger.on_val_start()
201
202 def on_val_image_end(self, pred, predn, path, names, im):
203 # Callback runs on val image end
204 if self.wandb:
205 self.wandb.val_one_image(pred, predn, path, names, im)
206 if self.clearml:
207 self.clearml.log_image_with_boxes(path, pred, names, im)
208
209 def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out):
210 if self.comet_logger:
211 self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out)
212
213 def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
214 # Callback runs on val end
215 if self.wandb or self.clearml:
216 files = sorted(self.save_dir.glob('val*.jpg'))
217 if self.wandb:
218 self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]})
219 if self.clearml:
220 self.clearml.log_debug_samples(files, title='Validation')
221
222 if self.comet_logger:
223 self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
224
225 def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
226 # Callback runs at the end of each fit (train+val) epoch
227 x = dict(zip(self.keys, vals))
228 if self.csv:
229 file = self.save_dir / 'results.csv'
230 n = len(x) + 1 # number of cols
231 s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header
232 with open(file, 'a') as f:
233 f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
234
235 if self.tb:
236 for k, v in x.items():
237 self.tb.add_scalar(k, v, epoch)
238 elif self.clearml: # log to ClearML if TensorBoard not used
239 for k, v in x.items():
240 title, series = k.split('/')
241 self.clearml.task.get_logger().report_scalar(title, series, v, epoch)
242
243 if self.wandb:
244 if best_fitness == fi:
245 best_results = [epoch] + vals[3:7]
246 for i, name in enumerate(self.best_keys):
247 self.wandb.wandb_run.summary[name] = best_results[i] # log best results in the summary
248 self.wandb.log(x)
249 self.wandb.end_epoch(best_result=best_fitness == fi)
250
251 if self.clearml:
252 self.clearml.current_epoch_logged_images = set() # reset epoch image limit
253 self.clearml.current_epoch += 1
254
255 if self.comet_logger:
256 self.comet_logger.on_fit_epoch_end(x, epoch=epoch)
257
258 def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
259 # Callback runs on model save event
260 if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
261 if self.wandb:
262 self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
263 if self.clearml:
264 self.clearml.task.update_output_model(model_path=str(last),
265 model_name='Latest Model',
266 auto_delete_file=False)
267
268 if self.comet_logger:
269 self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi)
270
271 def on_train_end(self, last, best, epoch, results):
272 # Callback runs on training end, i.e. saving best model
273 if self.plots:
274 plot_results(file=self.save_dir / 'results.csv') # save results.png
275 files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
276 files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter
277 self.logger.info(f"Results saved to {colorstr('bold', self.save_dir)}")
278
279 if self.tb and not self.clearml: # These images are already captured by ClearML by now, we don't want doubles
280 for f in files:
281 self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
282
283 if self.wandb:
284 self.wandb.log(dict(zip(self.keys[3:10], results)))
285 self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]})
286 # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
287 if not self.opt.evolve:
288 wandb.log_artifact(str(best if best.exists() else last),
289 type='model',
290 name=f'run_{self.wandb.wandb_run.id}_model',
291 aliases=['latest', 'best', 'stripped'])
292 self.wandb.finish_run()
293
294 if self.clearml and not self.opt.evolve:
295 self.clearml.task.update_output_model(model_path=str(best if best.exists() else last),
296 name='Best Model',
297 auto_delete_file=False)
298
299 if self.comet_logger:
300 final_results = dict(zip(self.keys[3:10], results))
301 self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results)
302
303 def on_params_update(self, params: dict):
304 # Update hyperparams or configs of the experiment
305 if self.wandb:
306 self.wandb.wandb_run.config.update(params, allow_val_change=True)
307 if self.comet_logger:
308 self.comet_logger.on_params_update(params)
309
310
311 class GenericLogger:
312 """
313 YOLOv5 General purpose logger for non-task specific logging
314 Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...)
315 Arguments
316 opt: Run arguments
317 console_logger: Console logger
318 include: loggers to include
319 """
320
321 def __init__(self, opt, console_logger, include=('tb', 'wandb')):
322 # init default loggers
323 self.save_dir = Path(opt.save_dir)
324 self.include = include
325 self.console_logger = console_logger
326 self.csv = self.save_dir / 'results.csv' # CSV logger
327 if 'tb' in self.include:
328 prefix = colorstr('TensorBoard: ')
329 self.console_logger.info(
330 f"{prefix}Start with 'tensorboard --logdir {self.save_dir.parent}', view at http://localhost:6006/")
331 self.tb = SummaryWriter(str(self.save_dir))
332
333 if wandb and 'wandb' in self.include:
334 self.wandb = wandb.init(project=web_project_name(str(opt.project)),
335 name=None if opt.name == "exp" else opt.name,
336 config=opt)
337 else:
338 self.wandb = None
339
340 def log_metrics(self, metrics, epoch):
341 # Log metrics dictionary to all loggers
342 if self.csv:
343 keys, vals = list(metrics.keys()), list(metrics.values())
344 n = len(metrics) + 1 # number of cols
345 s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header
346 with open(self.csv, 'a') as f:
347 f.write(s + ('%23.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
348
349 if self.tb:
350 for k, v in metrics.items():
351 self.tb.add_scalar(k, v, epoch)
352
353 if self.wandb:
354 self.wandb.log(metrics, step=epoch)
355
356 def log_images(self, files, name='Images', epoch=0):
357 # Log images to all loggers
358 files = [Path(f) for f in (files if isinstance(files, (tuple, list)) else [files])] # to Path
359 files = [f for f in files if f.exists()] # filter by exists
360
361 if self.tb:
362 for f in files:
363 self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
364
365 if self.wandb:
366 self.wandb.log({name: [wandb.Image(str(f), caption=f.name) for f in files]}, step=epoch)
367
368 def log_graph(self, model, imgsz=(640, 640)):
369 # Log model graph to all loggers
370 if self.tb:
371 log_tensorboard_graph(self.tb, model, imgsz)
372
373 def log_model(self, model_path, epoch=0, metadata={}):
374 # Log model to all loggers
375 if self.wandb:
376 art = wandb.Artifact(name=f"run_{wandb.run.id}_model", type="model", metadata=metadata)
377 art.add_file(str(model_path))
378 wandb.log_artifact(art)
379
380 def update_params(self, params):
381 # Update the paramters logged
382 if self.wandb:
383 wandb.run.config.update(params, allow_val_change=True)
384
385
386 def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
387 # Log model graph to TensorBoard
388 try:
389 p = next(model.parameters()) # for device, type
390 imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz # expand
391 im = torch.zeros((1, 3, *imgsz)).to(p.device).type_as(p) # input image (WARNING: must be zeros, not empty)
392 with warnings.catch_warnings():
393 warnings.simplefilter('ignore') # suppress jit trace warning
394 tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), [])
395 except Exception as e:
396 LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}')
397
398
399 def web_project_name(project):
400 # Convert local project name to web project name
401 if not project.startswith('runs/train'):
402 return project
403 suffix = '-Classify' if project.endswith('-cls') else '-Segment' if project.endswith('-seg') else ''
404 return f'YOLOv5{suffix}'
1 # ClearML Integration
2
3 <img align="center" src="https://github.com/thepycoder/clearml_screenshots/raw/main/logos_dark.png#gh-light-mode-only" alt="Clear|ML"><img align="center" src="https://github.com/thepycoder/clearml_screenshots/raw/main/logos_light.png#gh-dark-mode-only" alt="Clear|ML">
4
5 ## About ClearML
6
7 [ClearML](https://cutt.ly/yolov5-tutorial-clearml) is an [open-source](https://github.com/allegroai/clearml) toolbox designed to save you time ⏱️.
8
9 🔨 Track every YOLOv5 training run in the <b>experiment manager</b>
10
11 🔧 Version and easily access your custom training data with the integrated ClearML <b>Data Versioning Tool</b>
12
13 🔦 <b>Remotely train and monitor</b> your YOLOv5 training runs using ClearML Agent
14
15 🔬 Get the very best mAP using ClearML <b>Hyperparameter Optimization</b>
16
17 🔭 Turn your newly trained <b>YOLOv5 model into an API</b> with just a few commands using ClearML Serving
18
19 <br />
20 And so much more. It's up to you how many of these tools you want to use, you can stick to the experiment manager, or chain them all together into an impressive pipeline!
21 <br />
22 <br />
23
24 ![ClearML scalars dashboard](https://github.com/thepycoder/clearml_screenshots/raw/main/experiment_manager_with_compare.gif)
25
26
27 <br />
28 <br />
29
30 ## 🦾 Setting Things Up
31
32 To keep track of your experiments and/or data, ClearML needs to communicate to a server. You have 2 options to get one:
33
34 Either sign up for free to the [ClearML Hosted Service](https://cutt.ly/yolov5-tutorial-clearml) or you can set up your own server, see [here](https://clear.ml/docs/latest/docs/deploying_clearml/clearml_server). Even the server is open-source, so even if you're dealing with sensitive data, you should be good to go!
35
36 1. Install the `clearml` python package:
37
38 ```bash
39 pip install clearml
40 ```
41
42 1. Connect the ClearML SDK to the server by [creating credentials](https://app.clear.ml/settings/workspace-configuration) (go right top to Settings -> Workspace -> Create new credentials), then execute the command below and follow the instructions:
43
44 ```bash
45 clearml-init
46 ```
47
48 That's it! You're done 😎
49
50 <br />
51
52 ## 🚀 Training YOLOv5 With ClearML
53
54 To enable ClearML experiment tracking, simply install the ClearML pip package.
55
56 ```bash
57 pip install clearml
58 ```
59
60 This will enable integration with the YOLOv5 training script. Every training run from now on, will be captured and stored by the ClearML experiment manager. If you want to change the `project_name` or `task_name`, head over to our custom logger, where you can change it: `utils/loggers/clearml/clearml_utils.py`
61
62 ```bash
63 python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --cache
64 ```
65
66 This will capture:
67 - Source code + uncommitted changes
68 - Installed packages
69 - (Hyper)parameters
70 - Model files (use `--save-period n` to save a checkpoint every n epochs)
71 - Console output
72 - Scalars (mAP_0.5, mAP_0.5:0.95, precision, recall, losses, learning rates, ...)
73 - General info such as machine details, runtime, creation date etc.
74 - All produced plots such as label correlogram and confusion matrix
75 - Images with bounding boxes per epoch
76 - Mosaic per epoch
77 - Validation images per epoch
78 - ...
79
80 That's a lot right? 🤯
81 Now, we can visualize all of this information in the ClearML UI to get an overview of our training progress. Add custom columns to the table view (such as e.g. mAP_0.5) so you can easily sort on the best performing model. Or select multiple experiments and directly compare them!
82
83 There even more we can do with all of this information, like hyperparameter optimization and remote execution, so keep reading if you want to see how that works!
84
85 <br />
86
87 ## 🔗 Dataset Version Management
88
89 Versioning your data separately from your code is generally a good idea and makes it easy to aqcuire the latest version too. This repository supports supplying a dataset version ID and it will make sure to get the data if it's not there yet. Next to that, this workflow also saves the used dataset ID as part of the task parameters, so you will always know for sure which data was used in which experiment!
90
91 ![ClearML Dataset Interface](https://github.com/thepycoder/clearml_screenshots/raw/main/clearml_data.gif)
92
93 ### Prepare Your Dataset
94
95 The YOLOv5 repository supports a number of different datasets by using yaml files containing their information. By default datasets are downloaded to the `../datasets` folder in relation to the repository root folder. So if you downloaded the `coco128` dataset using the link in the yaml or with the scripts provided by yolov5, you get this folder structure:
96
97 ```
98 ..
99 |_ yolov5
100 |_ datasets
101 |_ coco128
102 |_ images
103 |_ labels
104 |_ LICENSE
105 |_ README.txt
106 ```
107 But this can be any dataset you wish. Feel free to use your own, as long as you keep to this folder structure.
108
109 Next, ⚠️**copy the corresponding yaml file to the root of the dataset folder**⚠️. This yaml files contains the information ClearML will need to properly use the dataset. You can make this yourself too, of course, just follow the structure of the example yamls.
110
111 Basically we need the following keys: `path`, `train`, `test`, `val`, `nc`, `names`.
112
113 ```
114 ..
115 |_ yolov5
116 |_ datasets
117 |_ coco128
118 |_ images
119 |_ labels
120 |_ coco128.yaml # <---- HERE!
121 |_ LICENSE
122 |_ README.txt
123 ```
124
125 ### Upload Your Dataset
126
127 To get this dataset into ClearML as a versionned dataset, go to the dataset root folder and run the following command:
128 ```bash
129 cd coco128
130 clearml-data sync --project YOLOv5 --name coco128 --folder .
131 ```
132
133 The command `clearml-data sync` is actually a shorthand command. You could also run these commands one after the other:
134 ```bash
135 # Optionally add --parent <parent_dataset_id> if you want to base
136 # this version on another dataset version, so no duplicate files are uploaded!
137 clearml-data create --name coco128 --project YOLOv5
138 clearml-data add --files .
139 clearml-data close
140 ```
141
142 ### Run Training Using A ClearML Dataset
143
144 Now that you have a ClearML dataset, you can very simply use it to train custom YOLOv5 🚀 models!
145
146 ```bash
147 python train.py --img 640 --batch 16 --epochs 3 --data clearml://<your_dataset_id> --weights yolov5s.pt --cache
148 ```
149
150 <br />
151
152 ## 👀 Hyperparameter Optimization
153
154 Now that we have our experiments and data versioned, it's time to take a look at what we can build on top!
155
156 Using the code information, installed packages and environment details, the experiment itself is now **completely reproducible**. In fact, ClearML allows you to clone an experiment and even change its parameters. We can then just rerun it with these new parameters automatically, this is basically what HPO does!
157
158 To **run hyperparameter optimization locally**, we've included a pre-made script for you. Just make sure a training task has been run at least once, so it is in the ClearML experiment manager, we will essentially clone it and change its hyperparameters.
159
160 You'll need to fill in the ID of this `template task` in the script found at `utils/loggers/clearml/hpo.py` and then just run it :) You can change `task.execute_locally()` to `task.execute()` to put it in a ClearML queue and have a remote agent work on it instead.
161
162 ```bash
163 # To use optuna, install it first, otherwise you can change the optimizer to just be RandomSearch
164 pip install optuna
165 python utils/loggers/clearml/hpo.py
166 ```
167
168 ![HPO](https://github.com/thepycoder/clearml_screenshots/raw/main/hpo.png)
169
170 ## 🤯 Remote Execution (advanced)
171
172 Running HPO locally is really handy, but what if we want to run our experiments on a remote machine instead? Maybe you have access to a very powerful GPU machine on-site or you have some budget to use cloud GPUs.
173 This is where the ClearML Agent comes into play. Check out what the agent can do here:
174
175 - [YouTube video](https://youtu.be/MX3BrXnaULs)
176 - [Documentation](https://clear.ml/docs/latest/docs/clearml_agent)
177
178 In short: every experiment tracked by the experiment manager contains enough information to reproduce it on a different machine (installed packages, uncommitted changes etc.). So a ClearML agent does just that: it listens to a queue for incoming tasks and when it finds one, it recreates the environment and runs it while still reporting scalars, plots etc. to the experiment manager.
179
180 You can turn any machine (a cloud VM, a local GPU machine, your own laptop ... ) into a ClearML agent by simply running:
181 ```bash
182 clearml-agent daemon --queue <queues_to_listen_to> [--docker]
183 ```
184
185 ### Cloning, Editing And Enqueuing
186
187 With our agent running, we can give it some work. Remember from the HPO section that we can clone a task and edit the hyperparameters? We can do that from the interface too!
188
189 🪄 Clone the experiment by right clicking it
190
191 🎯 Edit the hyperparameters to what you wish them to be
192
193 ⏳ Enqueue the task to any of the queues by right clicking it
194
195 ![Enqueue a task from the UI](https://github.com/thepycoder/clearml_screenshots/raw/main/enqueue.gif)
196
197 ### Executing A Task Remotely
198
199 Now you can clone a task like we explained above, or simply mark your current script by adding `task.execute_remotely()` and on execution it will be put into a queue, for the agent to start working on!
200
201 To run the YOLOv5 training script remotely, all you have to do is add this line to the training.py script after the clearml logger has been instatiated:
202 ```python
203 # ...
204 # Loggers
205 data_dict = None
206 if RANK in {-1, 0}:
207 loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance
208 if loggers.clearml:
209 loggers.clearml.task.execute_remotely(queue='my_queue') # <------ ADD THIS LINE
210 # Data_dict is either None is user did not choose for ClearML dataset or is filled in by ClearML
211 data_dict = loggers.clearml.data_dict
212 # ...
213 ```
214 When running the training script after this change, python will run the script up until that line, after which it will package the code and send it to the queue instead!
215
216 ### Autoscaling workers
217
218 ClearML comes with autoscalers too! This tool will automatically spin up new remote machines in the cloud of your choice (AWS, GCP, Azure) and turn them into ClearML agents for you whenever there are experiments detected in the queue. Once the tasks are processed, the autoscaler will automatically shut down the remote machines and you stop paying!
219
220 Check out the autoscalers getting started video below.
221
222 [![Watch the video](https://img.youtube.com/vi/j4XVMAaUt3E/0.jpg)](https://youtu.be/j4XVMAaUt3E)
1 """Main Logger class for ClearML experiment tracking."""
2 import glob
3 import re
4 from pathlib import Path
5
6 import numpy as np
7 import yaml
8
9 from utils.plots import Annotator, colors
10
11 try:
12 import clearml
13 from clearml import Dataset, Task
14
15 assert hasattr(clearml, '__version__') # verify package import not local dir
16 except (ImportError, AssertionError):
17 clearml = None
18
19
20 def construct_dataset(clearml_info_string):
21 """Load in a clearml dataset and fill the internal data_dict with its contents.
22 """
23 dataset_id = clearml_info_string.replace('clearml://', '')
24 dataset = Dataset.get(dataset_id=dataset_id)
25 dataset_root_path = Path(dataset.get_local_copy())
26
27 # We'll search for the yaml file definition in the dataset
28 yaml_filenames = list(glob.glob(str(dataset_root_path / "*.yaml")) + glob.glob(str(dataset_root_path / "*.yml")))
29 if len(yaml_filenames) > 1:
30 raise ValueError('More than one yaml file was found in the dataset root, cannot determine which one contains '
31 'the dataset definition this way.')
32 elif len(yaml_filenames) == 0:
33 raise ValueError('No yaml definition found in dataset root path, check that there is a correct yaml file '
34 'inside the dataset root path.')
35 with open(yaml_filenames[0]) as f:
36 dataset_definition = yaml.safe_load(f)
37
38 assert set(dataset_definition.keys()).issuperset(
39 {'train', 'test', 'val', 'nc', 'names'}
40 ), "The right keys were not found in the yaml file, make sure it at least has the following keys: ('train', 'test', 'val', 'nc', 'names')"
41
42 data_dict = dict()
43 data_dict['train'] = str(
44 (dataset_root_path / dataset_definition['train']).resolve()) if dataset_definition['train'] else None
45 data_dict['test'] = str(
46 (dataset_root_path / dataset_definition['test']).resolve()) if dataset_definition['test'] else None
47 data_dict['val'] = str(
48 (dataset_root_path / dataset_definition['val']).resolve()) if dataset_definition['val'] else None
49 data_dict['nc'] = dataset_definition['nc']
50 data_dict['names'] = dataset_definition['names']
51
52 return data_dict
53
54
55 class ClearmlLogger:
56 """Log training runs, datasets, models, and predictions to ClearML.
57
58 This logger sends information to ClearML at app.clear.ml or to your own hosted server. By default,
59 this information includes hyperparameters, system configuration and metrics, model metrics, code information and
60 basic data metrics and analyses.
61
62 By providing additional command line arguments to train.py, datasets,
63 models and predictions can also be logged.
64 """
65
66 def __init__(self, opt, hyp):
67 """
68 - Initialize ClearML Task, this object will capture the experiment
69 - Upload dataset version to ClearML Data if opt.upload_dataset is True
70
71 arguments:
72 opt (namespace) -- Commandline arguments for this run
73 hyp (dict) -- Hyperparameters for this run
74
75 """
76 self.current_epoch = 0
77 # Keep tracked of amount of logged images to enforce a limit
78 self.current_epoch_logged_images = set()
79 # Maximum number of images to log to clearML per epoch
80 self.max_imgs_to_log_per_epoch = 16
81 # Get the interval of epochs when bounding box images should be logged
82 self.bbox_interval = opt.bbox_interval
83 self.clearml = clearml
84 self.task = None
85 self.data_dict = None
86 if self.clearml:
87 self.task = Task.init(
88 project_name='YOLOv5',
89 task_name='training',
90 tags=['YOLOv5'],
91 output_uri=True,
92 auto_connect_frameworks={'pytorch': False}
93 # We disconnect pytorch auto-detection, because we added manual model save points in the code
94 )
95 # ClearML's hooks will already grab all general parameters
96 # Only the hyperparameters coming from the yaml config file
97 # will have to be added manually!
98 self.task.connect(hyp, name='Hyperparameters')
99
100 # Get ClearML Dataset Version if requested
101 if opt.data.startswith('clearml://'):
102 # data_dict should have the following keys:
103 # names, nc (number of classes), test, train, val (all three relative paths to ../datasets)
104 self.data_dict = construct_dataset(opt.data)
105 # Set data to data_dict because wandb will crash without this information and opt is the best way
106 # to give it to them
107 opt.data = self.data_dict
108
109 def log_debug_samples(self, files, title='Debug Samples'):
110 """
111 Log files (images) as debug samples in the ClearML task.
112
113 arguments:
114 files (List(PosixPath)) a list of file paths in PosixPath format
115 title (str) A title that groups together images with the same values
116 """
117 for f in files:
118 if f.exists():
119 it = re.search(r'_batch(\d+)', f.name)
120 iteration = int(it.groups()[0]) if it else 0
121 self.task.get_logger().report_image(title=title,
122 series=f.name.replace(it.group(), ''),
123 local_path=str(f),
124 iteration=iteration)
125
126 def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_threshold=0.25):
127 """
128 Draw the bounding boxes on a single image and report the result as a ClearML debug sample.
129
130 arguments:
131 image_path (PosixPath) the path the original image file
132 boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
133 class_names (dict): dict containing mapping of class int to class name
134 image (Tensor): A torch tensor containing the actual image data
135 """
136 if len(self.current_epoch_logged_images) < self.max_imgs_to_log_per_epoch and self.current_epoch >= 0:
137 # Log every bbox_interval times and deduplicate for any intermittend extra eval runs
138 if self.current_epoch % self.bbox_interval == 0 and image_path not in self.current_epoch_logged_images:
139 im = np.ascontiguousarray(np.moveaxis(image.mul(255).clamp(0, 255).byte().cpu().numpy(), 0, 2))
140 annotator = Annotator(im=im, pil=True)
141 for i, (conf, class_nr, box) in enumerate(zip(boxes[:, 4], boxes[:, 5], boxes[:, :4])):
142 color = colors(i)
143
144 class_name = class_names[int(class_nr)]
145 confidence_percentage = round(float(conf) * 100, 2)
146 label = f"{class_name}: {confidence_percentage}%"
147
148 if conf > conf_threshold:
149 annotator.rectangle(box.cpu().numpy(), outline=color)
150 annotator.box_label(box.cpu().numpy(), label=label, color=color)
151
152 annotated_image = annotator.result()
153 self.task.get_logger().report_image(title='Bounding Boxes',
154 series=image_path.name,
155 iteration=self.current_epoch,
156 image=annotated_image)
157 self.current_epoch_logged_images.add(image_path)
1 from clearml import Task
2 # Connecting ClearML with the current process,
3 # from here on everything is logged automatically
4 from clearml.automation import HyperParameterOptimizer, UniformParameterRange
5 from clearml.automation.optuna import OptimizerOptuna
6
7 task = Task.init(project_name='Hyper-Parameter Optimization',
8 task_name='YOLOv5',
9 task_type=Task.TaskTypes.optimizer,
10 reuse_last_task_id=False)
11
12 # Example use case:
13 optimizer = HyperParameterOptimizer(
14 # This is the experiment we want to optimize
15 base_task_id='<your_template_task_id>',
16 # here we define the hyper-parameters to optimize
17 # Notice: The parameter name should exactly match what you see in the UI: <section_name>/<parameter>
18 # For Example, here we see in the base experiment a section Named: "General"
19 # under it a parameter named "batch_size", this becomes "General/batch_size"
20 # If you have `argparse` for example, then arguments will appear under the "Args" section,
21 # and you should instead pass "Args/batch_size"
22 hyper_parameters=[
23 UniformParameterRange('Hyperparameters/lr0', min_value=1e-5, max_value=1e-1),
24 UniformParameterRange('Hyperparameters/lrf', min_value=0.01, max_value=1.0),
25 UniformParameterRange('Hyperparameters/momentum', min_value=0.6, max_value=0.98),
26 UniformParameterRange('Hyperparameters/weight_decay', min_value=0.0, max_value=0.001),
27 UniformParameterRange('Hyperparameters/warmup_epochs', min_value=0.0, max_value=5.0),
28 UniformParameterRange('Hyperparameters/warmup_momentum', min_value=0.0, max_value=0.95),
29 UniformParameterRange('Hyperparameters/warmup_bias_lr', min_value=0.0, max_value=0.2),
30 UniformParameterRange('Hyperparameters/box', min_value=0.02, max_value=0.2),
31 UniformParameterRange('Hyperparameters/cls', min_value=0.2, max_value=4.0),
32 UniformParameterRange('Hyperparameters/cls_pw', min_value=0.5, max_value=2.0),
33 UniformParameterRange('Hyperparameters/obj', min_value=0.2, max_value=4.0),
34 UniformParameterRange('Hyperparameters/obj_pw', min_value=0.5, max_value=2.0),
35 UniformParameterRange('Hyperparameters/iou_t', min_value=0.1, max_value=0.7),
36 UniformParameterRange('Hyperparameters/anchor_t', min_value=2.0, max_value=8.0),
37 UniformParameterRange('Hyperparameters/fl_gamma', min_value=0.0, max_value=4.0),
38 UniformParameterRange('Hyperparameters/hsv_h', min_value=0.0, max_value=0.1),
39 UniformParameterRange('Hyperparameters/hsv_s', min_value=0.0, max_value=0.9),
40 UniformParameterRange('Hyperparameters/hsv_v', min_value=0.0, max_value=0.9),
41 UniformParameterRange('Hyperparameters/degrees', min_value=0.0, max_value=45.0),
42 UniformParameterRange('Hyperparameters/translate', min_value=0.0, max_value=0.9),
43 UniformParameterRange('Hyperparameters/scale', min_value=0.0, max_value=0.9),
44 UniformParameterRange('Hyperparameters/shear', min_value=0.0, max_value=10.0),
45 UniformParameterRange('Hyperparameters/perspective', min_value=0.0, max_value=0.001),
46 UniformParameterRange('Hyperparameters/flipud', min_value=0.0, max_value=1.0),
47 UniformParameterRange('Hyperparameters/fliplr', min_value=0.0, max_value=1.0),
48 UniformParameterRange('Hyperparameters/mosaic', min_value=0.0, max_value=1.0),
49 UniformParameterRange('Hyperparameters/mixup', min_value=0.0, max_value=1.0),
50 UniformParameterRange('Hyperparameters/copy_paste', min_value=0.0, max_value=1.0)],
51 # this is the objective metric we want to maximize/minimize
52 objective_metric_title='metrics',
53 objective_metric_series='mAP_0.5',
54 # now we decide if we want to maximize it or minimize it (accuracy we maximize)
55 objective_metric_sign='max',
56 # let us limit the number of concurrent experiments,
57 # this in turn will make sure we do dont bombard the scheduler with experiments.
58 # if we have an auto-scaler connected, this, by proxy, will limit the number of machine
59 max_number_of_concurrent_tasks=1,
60 # this is the optimizer class (actually doing the optimization)
61 # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band)
62 optimizer_class=OptimizerOptuna,
63 # If specified only the top K performing Tasks will be kept, the others will be automatically archived
64 save_top_k_tasks_only=5, # 5,
65 compute_time_limit=None,
66 total_max_jobs=20,
67 min_iteration_per_job=None,
68 max_iteration_per_job=None,
69 )
70
71 # report every 10 seconds, this is way too often, but we are testing here
72 optimizer.set_report_period(10 / 60)
73 # You can also use the line below instead to run all the optimizer tasks locally, without using queues or agent
74 # an_optimizer.start_locally(job_complete_callback=job_complete_callback)
75 # set the time limit for the optimization process (2 hours)
76 optimizer.set_time_limit(in_minutes=120.0)
77 # Start the optimization process in the local environment
78 optimizer.start_locally()
79 # wait until process is done (notice we are controlling the optimization process in the background)
80 optimizer.wait()
81 # make sure background optimization stopped
82 optimizer.stop()
83
84 print('We are done, good bye')
1 <img src="https://cdn.comet.ml/img/notebook_logo.png">
2
3 # YOLOv5 with Comet
4
5 This guide will cover how to use YOLOv5 with [Comet](https://bit.ly/yolov5-readme-comet)
6
7 # About Comet
8
9 Comet builds tools that help data scientists, engineers, and team leaders accelerate and optimize machine learning and deep learning models.
10
11 Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://bit.ly/yolov5-colab-comet-panels)!
12 Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!
13
14 # Getting Started
15
16 ## Install Comet
17
18 ```shell
19 pip install comet_ml
20 ```
21
22 ## Configure Comet Credentials
23
24 There are two ways to configure Comet with YOLOv5.
25
26 You can either set your credentials through enviroment variables
27
28 **Environment Variables**
29
30 ```shell
31 export COMET_API_KEY=<Your Comet API Key>
32 export COMET_PROJECT_NAME=<Your Comet Project Name> # This will default to 'yolov5'
33 ```
34
35 Or create a `.comet.config` file in your working directory and set your credentials there.
36
37 **Comet Configuration File**
38
39 ```
40 [comet]
41 api_key=<Your Comet API Key>
42 project_name=<Your Comet Project Name> # This will default to 'yolov5'
43 ```
44
45 ## Run the Training Script
46
47 ```shell
48 # Train YOLOv5s on COCO128 for 5 epochs
49 python train.py --img 640 --batch 16 --epochs 5 --data coco128.yaml --weights yolov5s.pt
50 ```
51
52 That's it! Comet will automatically log your hyperparameters, command line arguments, training and valiation metrics. You can visualize and analyze your runs in the Comet UI
53
54 <img width="1920" alt="yolo-ui" src="https://user-images.githubusercontent.com/7529846/187608607-ff89c3d5-1b8b-4743-a974-9275301b0524.png">
55
56 # Try out an Example!
57 Check out an example of a [completed run here](https://www.comet.com/examples/comet-example-yolov5/a0e29e0e9b984e4a822db2a62d0cb357?experiment-tab=chart&showOutliers=true&smoothing=0&transformY=smoothing&xAxis=step&ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
58
59 Or better yet, try it out yourself in this Colab Notebook
60
61 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing)
62
63 # Log automatically
64
65 By default, Comet will log the following items
66
67 ## Metrics
68 - Box Loss, Object Loss, Classification Loss for the training and validation data
69 - mAP_0.5, mAP_0.5:0.95 metrics for the validation data.
70 - Precision and Recall for the validation data
71
72 ## Parameters
73
74 - Model Hyperparameters
75 - All parameters passed through the command line options
76
77 ## Visualizations
78
79 - Confusion Matrix of the model predictions on the validation data
80 - Plots for the PR and F1 curves across all classes
81 - Correlogram of the Class Labels
82
83 # Configure Comet Logging
84
85 Comet can be configured to log additional data either through command line flags passed to the training script
86 or through environment variables.
87
88 ```shell
89 export COMET_MODE=online # Set whether to run Comet in 'online' or 'offline' mode. Defaults to online
90 export COMET_MODEL_NAME=<your model name> #Set the name for the saved model. Defaults to yolov5
91 export COMET_LOG_CONFUSION_MATRIX=false # Set to disable logging a Comet Confusion Matrix. Defaults to true
92 export COMET_MAX_IMAGE_UPLOADS=<number of allowed images to upload to Comet> # Controls how many total image predictions to log to Comet. Defaults to 100.
93 export COMET_LOG_PER_CLASS_METRICS=true # Set to log evaluation metrics for each detected class at the end of training. Defaults to false
94 export COMET_DEFAULT_CHECKPOINT_FILENAME=<your checkpoint filename> # Set this if you would like to resume training from a different checkpoint. Defaults to 'last.pt'
95 export COMET_LOG_BATCH_LEVEL_METRICS=true # Set this if you would like to log training metrics at the batch level. Defaults to false.
96 export COMET_LOG_PREDICTIONS=true # Set this to false to disable logging model predictions
97 ```
98
99 ## Logging Checkpoints with Comet
100
101 Logging Models to Comet is disabled by default. To enable it, pass the `save-period` argument to the training script. This will save the
102 logged checkpoints to Comet based on the interval value provided by `save-period`
103
104 ```shell
105 python train.py \
106 --img 640 \
107 --batch 16 \
108 --epochs 5 \
109 --data coco128.yaml \
110 --weights yolov5s.pt \
111 --save-period 1
112 ```
113
114 ## Logging Model Predictions
115
116 By default, model predictions (images, ground truth labels and bounding boxes) will be logged to Comet.
117
118 You can control the frequency of logged predictions and the associated images by passing the `bbox_interval` command line argument. Predictions can be visualized using Comet's Object Detection Custom Panel. This frequency corresponds to every Nth batch of data per epoch. In the example below, we are logging every 2nd batch of data for each epoch.
119
120 **Note:** The YOLOv5 validation dataloader will default to a batch size of 32, so you will have to set the logging frequency accordingly.
121
122 Here is an [example project using the Panel](https://www.comet.com/examples/comet-example-yolov5?shareable=YcwMiJaZSXfcEXpGOHDD12vA1&ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
123
124
125 ```shell
126 python train.py \
127 --img 640 \
128 --batch 16 \
129 --epochs 5 \
130 --data coco128.yaml \
131 --weights yolov5s.pt \
132 --bbox_interval 2
133 ```
134
135 ### Controlling the number of Prediction Images logged to Comet
136
137 When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable.
138
139 ```shell
140 env COMET_MAX_IMAGE_UPLOADS=200 python train.py \
141 --img 640 \
142 --batch 16 \
143 --epochs 5 \
144 --data coco128.yaml \
145 --weights yolov5s.pt \
146 --bbox_interval 1
147 ```
148
149 ### Logging Class Level Metrics
150
151 Use the `COMET_LOG_PER_CLASS_METRICS` environment variable to log mAP, precision, recall, f1 for each class.
152
153 ```shell
154 env COMET_LOG_PER_CLASS_METRICS=true python train.py \
155 --img 640 \
156 --batch 16 \
157 --epochs 5 \
158 --data coco128.yaml \
159 --weights yolov5s.pt
160 ```
161
162 ## Uploading a Dataset to Comet Artifacts
163
164 If you would like to store your data using [Comet Artifacts](https://www.comet.com/docs/v2/guides/data-management/using-artifacts/#learn-more?ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration), you can do so using the `upload_dataset` flag.
165
166 The dataset be organized in the way described in the [YOLOv5 documentation](https://docs.ultralytics.com/tutorials/train-custom-datasets/#3-organize-directories). The dataset config `yaml` file must follow the same format as that of the `coco128.yaml` file.
167
168 ```shell
169 python train.py \
170 --img 640 \
171 --batch 16 \
172 --epochs 5 \
173 --data coco128.yaml \
174 --weights yolov5s.pt \
175 --upload_dataset
176 ```
177
178 You can find the uploaded dataset in the Artifacts tab in your Comet Workspace
179 <img width="1073" alt="artifact-1" src="https://user-images.githubusercontent.com/7529846/186929193-162718bf-ec7b-4eb9-8c3b-86b3763ef8ea.png">
180
181 You can preview the data directly in the Comet UI.
182 <img width="1082" alt="artifact-2" src="https://user-images.githubusercontent.com/7529846/186929215-432c36a9-c109-4eb0-944b-84c2786590d6.png">
183
184 Artifacts are versioned and also support adding metadata about the dataset. Comet will automatically log the metadata from your dataset `yaml` file
185 <img width="963" alt="artifact-3" src="https://user-images.githubusercontent.com/7529846/186929256-9d44d6eb-1a19-42de-889a-bcbca3018f2e.png">
186
187 ### Using a saved Artifact
188
189 If you would like to use a dataset from Comet Artifacts, set the `path` variable in your dataset `yaml` file to point to the following Artifact resource URL.
190
191 ```
192 # contents of artifact.yaml file
193 path: "comet://<workspace name>/<artifact name>:<artifact version or alias>"
194 ```
195 Then pass this file to your training script in the following way
196
197 ```shell
198 python train.py \
199 --img 640 \
200 --batch 16 \
201 --epochs 5 \
202 --data artifact.yaml \
203 --weights yolov5s.pt
204 ```
205
206 Artifacts also allow you to track the lineage of data as it flows through your Experimentation workflow. Here you can see a graph that shows you all the experiments that have used your uploaded dataset.
207 <img width="1391" alt="artifact-4" src="https://user-images.githubusercontent.com/7529846/186929264-4c4014fa-fe51-4f3c-a5c5-f6d24649b1b4.png">
208
209 ## Resuming a Training Run
210
211 If your training run is interrupted for any reason, e.g. disrupted internet connection, you can resume the run using the `resume` flag and the Comet Run Path.
212
213 The Run Path has the following format `comet://<your workspace name>/<your project name>/<experiment id>`.
214
215 This will restore the run to its state before the interruption, which includes restoring the model from a checkpoint, restoring all hyperparameters and training arguments and downloading Comet dataset Artifacts if they were used in the original run. The resumed run will continue logging to the existing Experiment in the Comet UI
216
217 ```shell
218 python train.py \
219 --resume "comet://<your run path>"
220 ```
221
222 ## Hyperparameter Search with the Comet Optimizer
223
224 YOLOv5 is also integrated with Comet's Optimizer, making is simple to visualie hyperparameter sweeps in the Comet UI.
225
226 ### Configuring an Optimizer Sweep
227
228 To configure the Comet Optimizer, you will have to create a JSON file with the information about the sweep. An example file has been provided in `utils/loggers/comet/optimizer_config.json`
229
230 ```shell
231 python utils/loggers/comet/hpo.py \
232 --comet_optimizer_config "utils/loggers/comet/optimizer_config.json"
233 ```
234
235 The `hpo.py` script accepts the same arguments as `train.py`. If you wish to pass additional arguments to your sweep simply add them after
236 the script.
237
238 ```shell
239 python utils/loggers/comet/hpo.py \
240 --comet_optimizer_config "utils/loggers/comet/optimizer_config.json" \
241 --save-period 1 \
242 --bbox_interval 1
243 ```
244
245 ### Running a Sweep in Parallel
246
247 ```shell
248 comet optimizer -j <set number of workers> utils/loggers/comet/hpo.py \
249 utils/loggers/comet/optimizer_config.json"
250 ```
251
252 ### Visualizing Results
253
254 Comet provides a number of ways to visualize the results of your sweep. Take a look at a [project with a completed sweep here](https://www.comet.com/examples/comet-example-yolov5/view/PrlArHGuuhDTKC1UuBmTtOSXD/panels?ref=yolov5&utm_source=yolov5&utm_medium=affilliate&utm_campaign=yolov5_comet_integration)
255
256 <img width="1626" alt="hyperparameter-yolo" src="https://user-images.githubusercontent.com/7529846/186914869-7dc1de14-583f-4323-967b-c9a66a29e495.png">
1 import glob
2 import json
3 import logging
4 import os
5 import sys
6 from pathlib import Path
7
8 logger = logging.getLogger(__name__)
9
10 FILE = Path(__file__).resolve()
11 ROOT = FILE.parents[3] # YOLOv5 root directory
12 if str(ROOT) not in sys.path:
13 sys.path.append(str(ROOT)) # add ROOT to PATH
14
15 try:
16 import comet_ml
17
18 # Project Configuration
19 config = comet_ml.config.get_config()
20 COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
21 except (ModuleNotFoundError, ImportError):
22 comet_ml = None
23 COMET_PROJECT_NAME = None
24
25 import PIL
26 import torch
27 import torchvision.transforms as T
28 import yaml
29
30 from utils.dataloaders import img2label_paths
31 from utils.general import check_dataset, scale_boxes, xywh2xyxy
32 from utils.metrics import box_iou
33
34 COMET_PREFIX = "comet://"
35
36 COMET_MODE = os.getenv("COMET_MODE", "online")
37
38 # Model Saving Settings
39 COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
40
41 # Dataset Artifact Settings
42 COMET_UPLOAD_DATASET = os.getenv("COMET_UPLOAD_DATASET", "false").lower() == "true"
43
44 # Evaluation Settings
45 COMET_LOG_CONFUSION_MATRIX = os.getenv("COMET_LOG_CONFUSION_MATRIX", "true").lower() == "true"
46 COMET_LOG_PREDICTIONS = os.getenv("COMET_LOG_PREDICTIONS", "true").lower() == "true"
47 COMET_MAX_IMAGE_UPLOADS = int(os.getenv("COMET_MAX_IMAGE_UPLOADS", 100))
48
49 # Confusion Matrix Settings
50 CONF_THRES = float(os.getenv("CONF_THRES", 0.001))
51 IOU_THRES = float(os.getenv("IOU_THRES", 0.6))
52
53 # Batch Logging Settings
54 COMET_LOG_BATCH_METRICS = os.getenv("COMET_LOG_BATCH_METRICS", "false").lower() == "true"
55 COMET_BATCH_LOGGING_INTERVAL = os.getenv("COMET_BATCH_LOGGING_INTERVAL", 1)
56 COMET_PREDICTION_LOGGING_INTERVAL = os.getenv("COMET_PREDICTION_LOGGING_INTERVAL", 1)
57 COMET_LOG_PER_CLASS_METRICS = os.getenv("COMET_LOG_PER_CLASS_METRICS", "false").lower() == "true"
58
59 RANK = int(os.getenv("RANK", -1))
60
61 to_pil = T.ToPILImage()
62
63
64 class CometLogger:
65 """Log metrics, parameters, source code, models and much more
66 with Comet
67 """
68
69 def __init__(self, opt, hyp, run_id=None, job_type="Training", **experiment_kwargs) -> None:
70 self.job_type = job_type
71 self.opt = opt
72 self.hyp = hyp
73
74 # Comet Flags
75 self.comet_mode = COMET_MODE
76
77 self.save_model = opt.save_period > -1
78 self.model_name = COMET_MODEL_NAME
79
80 # Batch Logging Settings
81 self.log_batch_metrics = COMET_LOG_BATCH_METRICS
82 self.comet_log_batch_interval = COMET_BATCH_LOGGING_INTERVAL
83
84 # Dataset Artifact Settings
85 self.upload_dataset = self.opt.upload_dataset if self.opt.upload_dataset else COMET_UPLOAD_DATASET
86 self.resume = self.opt.resume
87
88 # Default parameters to pass to Experiment objects
89 self.default_experiment_kwargs = {
90 "log_code": False,
91 "log_env_gpu": True,
92 "log_env_cpu": True,
93 "project_name": COMET_PROJECT_NAME,}
94 self.default_experiment_kwargs.update(experiment_kwargs)
95 self.experiment = self._get_experiment(self.comet_mode, run_id)
96
97 self.data_dict = self.check_dataset(self.opt.data)
98 self.class_names = self.data_dict["names"]
99 self.num_classes = self.data_dict["nc"]
100
101 self.logged_images_count = 0
102 self.max_images = COMET_MAX_IMAGE_UPLOADS
103
104 if run_id is None:
105 self.experiment.log_other("Created from", "YOLOv5")
106 if not isinstance(self.experiment, comet_ml.OfflineExperiment):
107 workspace, project_name, experiment_id = self.experiment.url.split("/")[-3:]
108 self.experiment.log_other(
109 "Run Path",
110 f"{workspace}/{project_name}/{experiment_id}",
111 )
112 self.log_parameters(vars(opt))
113 self.log_parameters(self.opt.hyp)
114 self.log_asset_data(
115 self.opt.hyp,
116 name="hyperparameters.json",
117 metadata={"type": "hyp-config-file"},
118 )
119 self.log_asset(
120 f"{self.opt.save_dir}/opt.yaml",
121 metadata={"type": "opt-config-file"},
122 )
123
124 self.comet_log_confusion_matrix = COMET_LOG_CONFUSION_MATRIX
125
126 if hasattr(self.opt, "conf_thres"):
127 self.conf_thres = self.opt.conf_thres
128 else:
129 self.conf_thres = CONF_THRES
130 if hasattr(self.opt, "iou_thres"):
131 self.iou_thres = self.opt.iou_thres
132 else:
133 self.iou_thres = IOU_THRES
134
135 self.log_parameters({"val_iou_threshold": self.iou_thres, "val_conf_threshold": self.conf_thres})
136
137 self.comet_log_predictions = COMET_LOG_PREDICTIONS
138 if self.opt.bbox_interval == -1:
139 self.comet_log_prediction_interval = 1 if self.opt.epochs < 10 else self.opt.epochs // 10
140 else:
141 self.comet_log_prediction_interval = self.opt.bbox_interval
142
143 if self.comet_log_predictions:
144 self.metadata_dict = {}
145 self.logged_image_names = []
146
147 self.comet_log_per_class_metrics = COMET_LOG_PER_CLASS_METRICS
148
149 self.experiment.log_others({
150 "comet_mode": COMET_MODE,
151 "comet_max_image_uploads": COMET_MAX_IMAGE_UPLOADS,
152 "comet_log_per_class_metrics": COMET_LOG_PER_CLASS_METRICS,
153 "comet_log_batch_metrics": COMET_LOG_BATCH_METRICS,
154 "comet_log_confusion_matrix": COMET_LOG_CONFUSION_MATRIX,
155 "comet_model_name": COMET_MODEL_NAME,})
156
157 # Check if running the Experiment with the Comet Optimizer
158 if hasattr(self.opt, "comet_optimizer_id"):
159 self.experiment.log_other("optimizer_id", self.opt.comet_optimizer_id)
160 self.experiment.log_other("optimizer_objective", self.opt.comet_optimizer_objective)
161 self.experiment.log_other("optimizer_metric", self.opt.comet_optimizer_metric)
162 self.experiment.log_other("optimizer_parameters", json.dumps(self.hyp))
163
164 def _get_experiment(self, mode, experiment_id=None):
165 if mode == "offline":
166 if experiment_id is not None:
167 return comet_ml.ExistingOfflineExperiment(
168 previous_experiment=experiment_id,
169 **self.default_experiment_kwargs,
170 )
171
172 return comet_ml.OfflineExperiment(**self.default_experiment_kwargs,)
173
174 else:
175 try:
176 if experiment_id is not None:
177 return comet_ml.ExistingExperiment(
178 previous_experiment=experiment_id,
179 **self.default_experiment_kwargs,
180 )
181
182 return comet_ml.Experiment(**self.default_experiment_kwargs)
183
184 except ValueError:
185 logger.warning("COMET WARNING: "
186 "Comet credentials have not been set. "
187 "Comet will default to offline logging. "
188 "Please set your credentials to enable online logging.")
189 return self._get_experiment("offline", experiment_id)
190
191 return
192
193 def log_metrics(self, log_dict, **kwargs):
194 self.experiment.log_metrics(log_dict, **kwargs)
195
196 def log_parameters(self, log_dict, **kwargs):
197 self.experiment.log_parameters(log_dict, **kwargs)
198
199 def log_asset(self, asset_path, **kwargs):
200 self.experiment.log_asset(asset_path, **kwargs)
201
202 def log_asset_data(self, asset, **kwargs):
203 self.experiment.log_asset_data(asset, **kwargs)
204
205 def log_image(self, img, **kwargs):
206 self.experiment.log_image(img, **kwargs)
207
208 def log_model(self, path, opt, epoch, fitness_score, best_model=False):
209 if not self.save_model:
210 return
211
212 model_metadata = {
213 "fitness_score": fitness_score[-1],
214 "epochs_trained": epoch + 1,
215 "save_period": opt.save_period,
216 "total_epochs": opt.epochs,}
217
218 model_files = glob.glob(f"{path}/*.pt")
219 for model_path in model_files:
220 name = Path(model_path).name
221
222 self.experiment.log_model(
223 self.model_name,
224 file_or_folder=model_path,
225 file_name=name,
226 metadata=model_metadata,
227 overwrite=True,
228 )
229
230 def check_dataset(self, data_file):
231 with open(data_file) as f:
232 data_config = yaml.safe_load(f)
233
234 if data_config['path'].startswith(COMET_PREFIX):
235 path = data_config['path'].replace(COMET_PREFIX, "")
236 data_dict = self.download_dataset_artifact(path)
237
238 return data_dict
239
240 self.log_asset(self.opt.data, metadata={"type": "data-config-file"})
241
242 return check_dataset(data_file)
243
244 def log_predictions(self, image, labelsn, path, shape, predn):
245 if self.logged_images_count >= self.max_images:
246 return
247 detections = predn[predn[:, 4] > self.conf_thres]
248 iou = box_iou(labelsn[:, 1:], detections[:, :4])
249 mask, _ = torch.where(iou > self.iou_thres)
250 if len(mask) == 0:
251 return
252
253 filtered_detections = detections[mask]
254 filtered_labels = labelsn[mask]
255
256 image_id = path.split("/")[-1].split(".")[0]
257 image_name = f"{image_id}_curr_epoch_{self.experiment.curr_epoch}"
258 if image_name not in self.logged_image_names:
259 native_scale_image = PIL.Image.open(path)
260 self.log_image(native_scale_image, name=image_name)
261 self.logged_image_names.append(image_name)
262
263 metadata = []
264 for cls, *xyxy in filtered_labels.tolist():
265 metadata.append({
266 "label": f"{self.class_names[int(cls)]}-gt",
267 "score": 100,
268 "box": {
269 "x": xyxy[0],
270 "y": xyxy[1],
271 "x2": xyxy[2],
272 "y2": xyxy[3]},})
273 for *xyxy, conf, cls in filtered_detections.tolist():
274 metadata.append({
275 "label": f"{self.class_names[int(cls)]}",
276 "score": conf * 100,
277 "box": {
278 "x": xyxy[0],
279 "y": xyxy[1],
280 "x2": xyxy[2],
281 "y2": xyxy[3]},})
282
283 self.metadata_dict[image_name] = metadata
284 self.logged_images_count += 1
285
286 return
287
288 def preprocess_prediction(self, image, labels, shape, pred):
289 nl, _ = labels.shape[0], pred.shape[0]
290
291 # Predictions
292 if self.opt.single_cls:
293 pred[:, 5] = 0
294
295 predn = pred.clone()
296 scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1])
297
298 labelsn = None
299 if nl:
300 tbox = xywh2xyxy(labels[:, 1:5]) # target boxes
301 scale_boxes(image.shape[1:], tbox, shape[0], shape[1]) # native-space labels
302 labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels
303 scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1]) # native-space pred
304
305 return predn, labelsn
306
307 def add_assets_to_artifact(self, artifact, path, asset_path, split):
308 img_paths = sorted(glob.glob(f"{asset_path}/*"))
309 label_paths = img2label_paths(img_paths)
310
311 for image_file, label_file in zip(img_paths, label_paths):
312 image_logical_path, label_logical_path = map(lambda x: os.path.relpath(x, path), [image_file, label_file])
313
314 try:
315 artifact.add(image_file, logical_path=image_logical_path, metadata={"split": split})
316 artifact.add(label_file, logical_path=label_logical_path, metadata={"split": split})
317 except ValueError as e:
318 logger.error('COMET ERROR: Error adding file to Artifact. Skipping file.')
319 logger.error(f"COMET ERROR: {e}")
320 continue
321
322 return artifact
323
324 def upload_dataset_artifact(self):
325 dataset_name = self.data_dict.get("dataset_name", "yolov5-dataset")
326 path = str((ROOT / Path(self.data_dict["path"])).resolve())
327
328 metadata = self.data_dict.copy()
329 for key in ["train", "val", "test"]:
330 split_path = metadata.get(key)
331 if split_path is not None:
332 metadata[key] = split_path.replace(path, "")
333
334 artifact = comet_ml.Artifact(name=dataset_name, artifact_type="dataset", metadata=metadata)
335 for key in metadata.keys():
336 if key in ["train", "val", "test"]:
337 if isinstance(self.upload_dataset, str) and (key != self.upload_dataset):
338 continue
339
340 asset_path = self.data_dict.get(key)
341 if asset_path is not None:
342 artifact = self.add_assets_to_artifact(artifact, path, asset_path, key)
343
344 self.experiment.log_artifact(artifact)
345
346 return
347
348 def download_dataset_artifact(self, artifact_path):
349 logged_artifact = self.experiment.get_artifact(artifact_path)
350 artifact_save_dir = str(Path(self.opt.save_dir) / logged_artifact.name)
351 logged_artifact.download(artifact_save_dir)
352
353 metadata = logged_artifact.metadata
354 data_dict = metadata.copy()
355 data_dict["path"] = artifact_save_dir
356
357 metadata_names = metadata.get("names")
358 if type(metadata_names) == dict:
359 data_dict["names"] = {int(k): v for k, v in metadata.get("names").items()}
360 elif type(metadata_names) == list:
361 data_dict["names"] = {int(k): v for k, v in zip(range(len(metadata_names)), metadata_names)}
362 else:
363 raise "Invalid 'names' field in dataset yaml file. Please use a list or dictionary"
364
365 data_dict = self.update_data_paths(data_dict)
366 return data_dict
367
368 def update_data_paths(self, data_dict):
369 path = data_dict.get("path", "")
370
371 for split in ["train", "val", "test"]:
372 if data_dict.get(split):
373 split_path = data_dict.get(split)
374 data_dict[split] = (f"{path}/{split_path}" if isinstance(split, str) else [
375 f"{path}/{x}" for x in split_path])
376
377 return data_dict
378
379 def on_pretrain_routine_end(self, paths):
380 if self.opt.resume:
381 return
382
383 for path in paths:
384 self.log_asset(str(path))
385
386 if self.upload_dataset:
387 if not self.resume:
388 self.upload_dataset_artifact()
389
390 return
391
392 def on_train_start(self):
393 self.log_parameters(self.hyp)
394
395 def on_train_epoch_start(self):
396 return
397
398 def on_train_epoch_end(self, epoch):
399 self.experiment.curr_epoch = epoch
400
401 return
402
403 def on_train_batch_start(self):
404 return
405
406 def on_train_batch_end(self, log_dict, step):
407 self.experiment.curr_step = step
408 if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0):
409 self.log_metrics(log_dict, step=step)
410
411 return
412
413 def on_train_end(self, files, save_dir, last, best, epoch, results):
414 if self.comet_log_predictions:
415 curr_epoch = self.experiment.curr_epoch
416 self.experiment.log_asset_data(self.metadata_dict, "image-metadata.json", epoch=curr_epoch)
417
418 for f in files:
419 self.log_asset(f, metadata={"epoch": epoch})
420 self.log_asset(f"{save_dir}/results.csv", metadata={"epoch": epoch})
421
422 if not self.opt.evolve:
423 model_path = str(best if best.exists() else last)
424 name = Path(model_path).name
425 if self.save_model:
426 self.experiment.log_model(
427 self.model_name,
428 file_or_folder=model_path,
429 file_name=name,
430 overwrite=True,
431 )
432
433 # Check if running Experiment with Comet Optimizer
434 if hasattr(self.opt, 'comet_optimizer_id'):
435 metric = results.get(self.opt.comet_optimizer_metric)
436 self.experiment.log_other('optimizer_metric_value', metric)
437
438 self.finish_run()
439
440 def on_val_start(self):
441 return
442
443 def on_val_batch_start(self):
444 return
445
446 def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs):
447 if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)):
448 return
449
450 for si, pred in enumerate(outputs):
451 if len(pred) == 0:
452 continue
453
454 image = images[si]
455 labels = targets[targets[:, 0] == si, 1:]
456 shape = shapes[si]
457 path = paths[si]
458 predn, labelsn = self.preprocess_prediction(image, labels, shape, pred)
459 if labelsn is not None:
460 self.log_predictions(image, labelsn, path, shape, predn)
461
462 return
463
464 def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
465 if self.comet_log_per_class_metrics:
466 if self.num_classes > 1:
467 for i, c in enumerate(ap_class):
468 class_name = self.class_names[c]
469 self.experiment.log_metrics(
470 {
471 'mAP@.5': ap50[i],
472 'mAP@.5:.95': ap[i],
473 'precision': p[i],
474 'recall': r[i],
475 'f1': f1[i],
476 'true_positives': tp[i],
477 'false_positives': fp[i],
478 'support': nt[c]},
479 prefix=class_name)
480
481 if self.comet_log_confusion_matrix:
482 epoch = self.experiment.curr_epoch
483 class_names = list(self.class_names.values())
484 class_names.append("background")
485 num_classes = len(class_names)
486
487 self.experiment.log_confusion_matrix(
488 matrix=confusion_matrix.matrix,
489 max_categories=num_classes,
490 labels=class_names,
491 epoch=epoch,
492 column_label='Actual Category',
493 row_label='Predicted Category',
494 file_name=f"confusion-matrix-epoch-{epoch}.json",
495 )
496
497 def on_fit_epoch_end(self, result, epoch):
498 self.log_metrics(result, epoch=epoch)
499
500 def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
501 if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
502 self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
503
504 def on_params_update(self, params):
505 self.log_parameters(params)
506
507 def finish_run(self):
508 self.experiment.end()
1 import logging
2 import os
3 from urllib.parse import urlparse
4
5 try:
6 import comet_ml
7 except (ModuleNotFoundError, ImportError):
8 comet_ml = None
9
10 import yaml
11
12 logger = logging.getLogger(__name__)
13
14 COMET_PREFIX = "comet://"
15 COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
16 COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt")
17
18
19 def download_model_checkpoint(opt, experiment):
20 model_dir = f"{opt.project}/{experiment.name}"
21 os.makedirs(model_dir, exist_ok=True)
22
23 model_name = COMET_MODEL_NAME
24 model_asset_list = experiment.get_model_asset_list(model_name)
25
26 if len(model_asset_list) == 0:
27 logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}")
28 return
29
30 model_asset_list = sorted(
31 model_asset_list,
32 key=lambda x: x["step"],
33 reverse=True,
34 )
35 logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list}
36
37 resource_url = urlparse(opt.weights)
38 checkpoint_filename = resource_url.query
39
40 if checkpoint_filename:
41 asset_id = logged_checkpoint_map.get(checkpoint_filename)
42 else:
43 asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME)
44 checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME
45
46 if asset_id is None:
47 logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment")
48 return
49
50 try:
51 logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}")
52 asset_filename = checkpoint_filename
53
54 model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
55 model_download_path = f"{model_dir}/{asset_filename}"
56 with open(model_download_path, "wb") as f:
57 f.write(model_binary)
58
59 opt.weights = model_download_path
60
61 except Exception as e:
62 logger.warning("COMET WARNING: Unable to download checkpoint from Comet")
63 logger.exception(e)
64
65
66 def set_opt_parameters(opt, experiment):
67 """Update the opts Namespace with parameters
68 from Comet's ExistingExperiment when resuming a run
69
70 Args:
71 opt (argparse.Namespace): Namespace of command line options
72 experiment (comet_ml.APIExperiment): Comet API Experiment object
73 """
74 asset_list = experiment.get_asset_list()
75 resume_string = opt.resume
76
77 for asset in asset_list:
78 if asset["fileName"] == "opt.yaml":
79 asset_id = asset["assetId"]
80 asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
81 opt_dict = yaml.safe_load(asset_binary)
82 for key, value in opt_dict.items():
83 setattr(opt, key, value)
84 opt.resume = resume_string
85
86 # Save hyperparameters to YAML file
87 # Necessary to pass checks in training script
88 save_dir = f"{opt.project}/{experiment.name}"
89 os.makedirs(save_dir, exist_ok=True)
90
91 hyp_yaml_path = f"{save_dir}/hyp.yaml"
92 with open(hyp_yaml_path, "w") as f:
93 yaml.dump(opt.hyp, f)
94 opt.hyp = hyp_yaml_path
95
96
97 def check_comet_weights(opt):
98 """Downloads model weights from Comet and updates the
99 weights path to point to saved weights location
100
101 Args:
102 opt (argparse.Namespace): Command Line arguments passed
103 to YOLOv5 training script
104
105 Returns:
106 None/bool: Return True if weights are successfully downloaded
107 else return None
108 """
109 if comet_ml is None:
110 return
111
112 if isinstance(opt.weights, str):
113 if opt.weights.startswith(COMET_PREFIX):
114 api = comet_ml.API()
115 resource = urlparse(opt.weights)
116 experiment_path = f"{resource.netloc}{resource.path}"
117 experiment = api.get(experiment_path)
118 download_model_checkpoint(opt, experiment)
119 return True
120
121 return None
122
123
124 def check_comet_resume(opt):
125 """Restores run parameters to its original state based on the model checkpoint
126 and logged Experiment parameters.
127
128 Args:
129 opt (argparse.Namespace): Command Line arguments passed
130 to YOLOv5 training script
131
132 Returns:
133 None/bool: Return True if the run is restored successfully
134 else return None
135 """
136 if comet_ml is None:
137 return
138
139 if isinstance(opt.resume, str):
140 if opt.resume.startswith(COMET_PREFIX):
141 api = comet_ml.API()
142 resource = urlparse(opt.resume)
143 experiment_path = f"{resource.netloc}{resource.path}"
144 experiment = api.get(experiment_path)
145 set_opt_parameters(opt, experiment)
146 download_model_checkpoint(opt, experiment)
147
148 return True
149
150 return None
1 import argparse
2 import json
3 import logging
4 import os
5 import sys
6 from pathlib import Path
7
8 import comet_ml
9
10 logger = logging.getLogger(__name__)
11
12 FILE = Path(__file__).resolve()
13 ROOT = FILE.parents[3] # YOLOv5 root directory
14 if str(ROOT) not in sys.path:
15 sys.path.append(str(ROOT)) # add ROOT to PATH
16
17 from train import train
18 from utils.callbacks import Callbacks
19 from utils.general import increment_path
20 from utils.torch_utils import select_device
21
22 # Project Configuration
23 config = comet_ml.config.get_config()
24 COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
25
26
27 def get_args(known=False):
28 parser = argparse.ArgumentParser()
29 parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
30 parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
31 parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
32 parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
33 parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
34 parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
35 parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
36 parser.add_argument('--rect', action='store_true', help='rectangular training')
37 parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
38 parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
39 parser.add_argument('--noval', action='store_true', help='only validate final epoch')
40 parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
41 parser.add_argument('--noplots', action='store_true', help='save no plot files')
42 parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
43 parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
44 parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
45 parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
46 parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
47 parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
48 parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
49 parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
50 parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
51 parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
52 parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
53 parser.add_argument('--name', default='exp', help='save to project/name')
54 parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
55 parser.add_argument('--quad', action='store_true', help='quad dataloader')
56 parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
57 parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
58 parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
59 parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
60 parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
61 parser.add_argument('--seed', type=int, default=0, help='Global training seed')
62 parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
63
64 # Weights & Biases arguments
65 parser.add_argument('--entity', default=None, help='W&B: Entity')
66 parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
67 parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
68 parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
69
70 # Comet Arguments
71 parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.")
72 parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.")
73 parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.")
74 parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.")
75 parser.add_argument("--comet_optimizer_workers",
76 type=int,
77 default=1,
78 help="Comet: Number of Parallel Workers to use with the Comet Optimizer.")
79
80 return parser.parse_known_args()[0] if known else parser.parse_args()
81
82
83 def run(parameters, opt):
84 hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]}
85
86 opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
87 opt.batch_size = parameters.get("batch_size")
88 opt.epochs = parameters.get("epochs")
89
90 device = select_device(opt.device, batch_size=opt.batch_size)
91 train(hyp_dict, opt, device, callbacks=Callbacks())
92
93
94 if __name__ == "__main__":
95 opt = get_args(known=True)
96
97 opt.weights = str(opt.weights)
98 opt.cfg = str(opt.cfg)
99 opt.data = str(opt.data)
100 opt.project = str(opt.project)
101
102 optimizer_id = os.getenv("COMET_OPTIMIZER_ID")
103 if optimizer_id is None:
104 with open(opt.comet_optimizer_config) as f:
105 optimizer_config = json.load(f)
106 optimizer = comet_ml.Optimizer(optimizer_config)
107 else:
108 optimizer = comet_ml.Optimizer(optimizer_id)
109
110 opt.comet_optimizer_id = optimizer.id
111 status = optimizer.status()
112
113 opt.comet_optimizer_objective = status["spec"]["objective"]
114 opt.comet_optimizer_metric = status["spec"]["metric"]
115
116 logger.info("COMET INFO: Starting Hyperparameter Sweep")
117 for parameter in optimizer.get_parameters():
118 run(parameter["parameters"], opt)
1 {
2 "algorithm": "random",
3 "parameters": {
4 "anchor_t": {
5 "type": "discrete",
6 "values": [
7 2,
8 8
9 ]
10 },
11 "batch_size": {
12 "type": "discrete",
13 "values": [
14 16,
15 32,
16 64
17 ]
18 },
19 "box": {
20 "type": "discrete",
21 "values": [
22 0.02,
23 0.2
24 ]
25 },
26 "cls": {
27 "type": "discrete",
28 "values": [
29 0.2
30 ]
31 },
32 "cls_pw": {
33 "type": "discrete",
34 "values": [
35 0.5
36 ]
37 },
38 "copy_paste": {
39 "type": "discrete",
40 "values": [
41 1
42 ]
43 },
44 "degrees": {
45 "type": "discrete",
46 "values": [
47 0,
48 45
49 ]
50 },
51 "epochs": {
52 "type": "discrete",
53 "values": [
54 5
55 ]
56 },
57 "fl_gamma": {
58 "type": "discrete",
59 "values": [
60 0
61 ]
62 },
63 "fliplr": {
64 "type": "discrete",
65 "values": [
66 0
67 ]
68 },
69 "flipud": {
70 "type": "discrete",
71 "values": [
72 0
73 ]
74 },
75 "hsv_h": {
76 "type": "discrete",
77 "values": [
78 0
79 ]
80 },
81 "hsv_s": {
82 "type": "discrete",
83 "values": [
84 0
85 ]
86 },
87 "hsv_v": {
88 "type": "discrete",
89 "values": [
90 0
91 ]
92 },
93 "iou_t": {
94 "type": "discrete",
95 "values": [
96 0.7
97 ]
98 },
99 "lr0": {
100 "type": "discrete",
101 "values": [
102 1e-05,
103 0.1
104 ]
105 },
106 "lrf": {
107 "type": "discrete",
108 "values": [
109 0.01,
110 1
111 ]
112 },
113 "mixup": {
114 "type": "discrete",
115 "values": [
116 1
117 ]
118 },
119 "momentum": {
120 "type": "discrete",
121 "values": [
122 0.6
123 ]
124 },
125 "mosaic": {
126 "type": "discrete",
127 "values": [
128 0
129 ]
130 },
131 "obj": {
132 "type": "discrete",
133 "values": [
134 0.2
135 ]
136 },
137 "obj_pw": {
138 "type": "discrete",
139 "values": [
140 0.5
141 ]
142 },
143 "optimizer": {
144 "type": "categorical",
145 "values": [
146 "SGD",
147 "Adam",
148 "AdamW"
149 ]
150 },
151 "perspective": {
152 "type": "discrete",
153 "values": [
154 0
155 ]
156 },
157 "scale": {
158 "type": "discrete",
159 "values": [
160 0
161 ]
162 },
163 "shear": {
164 "type": "discrete",
165 "values": [
166 0
167 ]
168 },
169 "translate": {
170 "type": "discrete",
171 "values": [
172 0
173 ]
174 },
175 "warmup_bias_lr": {
176 "type": "discrete",
177 "values": [
178 0,
179 0.2
180 ]
181 },
182 "warmup_epochs": {
183 "type": "discrete",
184 "values": [
185 5
186 ]
187 },
188 "warmup_momentum": {
189 "type": "discrete",
190 "values": [
191 0,
192 0.95
193 ]
194 },
195 "weight_decay": {
196 "type": "discrete",
197 "values": [
198 0,
199 0.001
200 ]
201 }
202 },
203 "spec": {
204 "maxCombo": 0,
205 "metric": "metrics/mAP_0.5",
206 "objective": "maximize"
207 },
208 "trials": 1
209 }
1 📚 This guide explains how to use **Weights & Biases** (W&B) with YOLOv5 🚀. UPDATED 29 September 2021.
2
3 - [About Weights & Biases](#about-weights-&-biases)
4 - [First-Time Setup](#first-time-setup)
5 - [Viewing runs](#viewing-runs)
6 - [Disabling wandb](#disabling-wandb)
7 - [Advanced Usage: Dataset Versioning and Evaluation](#advanced-usage)
8 - [Reports: Share your work with the world!](#reports)
9
10 ## About Weights & Biases
11
12 Think of [W&B](https://wandb.ai/site?utm_campaign=repo_yolo_wandbtutorial) like GitHub for machine learning models. With a few lines of code, save everything you need to debug, compare and reproduce your models — architecture, hyperparameters, git commits, model weights, GPU usage, and even datasets and predictions.
13
14 Used by top researchers including teams at OpenAI, Lyft, Github, and MILA, W&B is part of the new standard of best practices for machine learning. How W&B can help you optimize your machine learning workflows:
15
16 - [Debug](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Free-2) model performance in real time
17 - [GPU usage](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#System-4) visualized automatically
18 - [Custom charts](https://wandb.ai/wandb/customizable-charts/reports/Powerful-Custom-Charts-To-Debug-Model-Peformance--VmlldzoyNzY4ODI) for powerful, extensible visualization
19 - [Share insights](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Share-8) interactively with collaborators
20 - [Optimize hyperparameters](https://docs.wandb.com/sweeps) efficiently
21 - [Track](https://docs.wandb.com/artifacts) datasets, pipelines, and production models
22
23 ## First-Time Setup
24
25 <details open>
26 <summary> Toggle Details </summary>
27 When you first train, W&B will prompt you to create a new account and will generate an **API key** for you. If you are an existing user you can retrieve your key from https://wandb.ai/authorize. This key is used to tell W&B where to log your data. You only need to supply your key once, and then it is remembered on the same device.
28
29 W&B will create a cloud **project** (default is 'YOLOv5') for your training runs, and each new training run will be provided a unique run **name** within that project as project/name. You can also manually set your project and run name as:
30
31 ```shell
32 $ python train.py --project ... --name ...
33 ```
34
35 YOLOv5 notebook example: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> <a href="https://www.kaggle.com/ultralytics/yolov5"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"></a>
36 <img width="960" alt="Screen Shot 2021-09-29 at 10 23 13 PM" src="https://user-images.githubusercontent.com/26833433/135392431-1ab7920a-c49d-450a-b0b0-0c86ec86100e.png">
37
38 </details>
39
40 ## Viewing Runs
41
42 <details open>
43 <summary> Toggle Details </summary>
44 Run information streams from your environment to the W&B cloud console as you train. This allows you to monitor and even cancel runs in <b>realtime</b> . All important information is logged:
45
46 - Training & Validation losses
47 - Metrics: Precision, Recall, mAP@0.5, mAP@0.5:0.95
48 - Learning Rate over time
49 - A bounding box debugging panel, showing the training progress over time
50 - GPU: Type, **GPU Utilization**, power, temperature, **CUDA memory usage**
51 - System: Disk I/0, CPU utilization, RAM memory usage
52 - Your trained model as W&B Artifact
53 - Environment: OS and Python types, Git repository and state, **training command**
54
55 <p align="center"><img width="900" alt="Weights & Biases dashboard" src="https://user-images.githubusercontent.com/26833433/135390767-c28b050f-8455-4004-adb0-3b730386e2b2.png"></p>
56 </details>
57
58 ## Disabling wandb
59
60 - training after running `wandb disabled` inside that directory creates no wandb run
61 ![Screenshot (84)](https://user-images.githubusercontent.com/15766192/143441777-c780bdd7-7cb4-4404-9559-b4316030a985.png)
62
63 - To enable wandb again, run `wandb online`
64 ![Screenshot (85)](https://user-images.githubusercontent.com/15766192/143441866-7191b2cb-22f0-4e0f-ae64-2dc47dc13078.png)
65
66 ## Advanced Usage
67
68 You can leverage W&B artifacts and Tables integration to easily visualize and manage your datasets, models and training evaluations. Here are some quick examples to get you started.
69
70 <details open>
71 <h3> 1: Train and Log Evaluation simultaneousy </h3>
72 This is an extension of the previous section, but it'll also training after uploading the dataset. <b> This also evaluation Table</b>
73 Evaluation table compares your predictions and ground truths across the validation set for each epoch. It uses the references to the already uploaded datasets,
74 so no images will be uploaded from your system more than once.
75 <details open>
76 <summary> <b>Usage</b> </summary>
77 <b>Code</b> <code> $ python train.py --upload_data val</code>
78
79 ![Screenshot from 2021-11-21 17-40-06](https://user-images.githubusercontent.com/15766192/142761183-c1696d8c-3f38-45ab-991a-bb0dfd98ae7d.png)
80
81 </details>
82
83 <h3>2. Visualize and Version Datasets</h3>
84 Log, visualize, dynamically query, and understand your data with <a href='https://docs.wandb.ai/guides/data-vis/tables'>W&B Tables</a>. You can use the following command to log your dataset as a W&B Table. This will generate a <code>{dataset}_wandb.yaml</code> file which can be used to train from dataset artifact.
85 <details>
86 <summary> <b>Usage</b> </summary>
87 <b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --project ... --name ... --data .. </code>
88
89 ![Screenshot (64)](https://user-images.githubusercontent.com/15766192/128486078-d8433890-98a3-4d12-8986-b6c0e3fc64b9.png)
90
91 </details>
92
93 <h3> 3: Train using dataset artifact </h3>
94 When you upload a dataset as described in the first section, you get a new config file with an added `_wandb` to its name. This file contains the information that
95 can be used to train a model directly from the dataset artifact. <b> This also logs evaluation </b>
96 <details>
97 <summary> <b>Usage</b> </summary>
98 <b>Code</b> <code> $ python train.py --data {data}_wandb.yaml </code>
99
100 ![Screenshot (72)](https://user-images.githubusercontent.com/15766192/128979739-4cf63aeb-a76f-483f-8861-1c0100b938a5.png)
101
102 </details>
103
104 <h3> 4: Save model checkpoints as artifacts </h3>
105 To enable saving and versioning checkpoints of your experiment, pass `--save_period n` with the base cammand, where `n` represents checkpoint interval.
106 You can also log both the dataset and model checkpoints simultaneously. If not passed, only the final model will be logged
107
108 <details>
109 <summary> <b>Usage</b> </summary>
110 <b>Code</b> <code> $ python train.py --save_period 1 </code>
111
112 ![Screenshot (68)](https://user-images.githubusercontent.com/15766192/128726138-ec6c1f60-639d-437d-b4ee-3acd9de47ef3.png)
113
114 </details>
115
116 </details>
117
118 <h3> 5: Resume runs from checkpoint artifacts. </h3>
119 Any run can be resumed using artifacts if the <code>--resume</code> argument starts with <code>wandb-artifact://</code> prefix followed by the run path, i.e, <code>wandb-artifact://username/project/runid </code>. This doesn't require the model checkpoint to be present on the local system.
120
121 <details>
122 <summary> <b>Usage</b> </summary>
123 <b>Code</b> <code> $ python train.py --resume wandb-artifact://{run_path} </code>
124
125 ![Screenshot (70)](https://user-images.githubusercontent.com/15766192/128728988-4e84b355-6c87-41ae-a591-14aecf45343e.png)
126
127 </details>
128
129 <h3> 6: Resume runs from dataset artifact & checkpoint artifacts. </h3>
130 <b> Local dataset or model checkpoints are not required. This can be used to resume runs directly on a different device </b>
131 The syntax is same as the previous section, but you'll need to lof both the dataset and model checkpoints as artifacts, i.e, set bot <code>--upload_dataset</code> or
132 train from <code>_wandb.yaml</code> file and set <code>--save_period</code>
133
134 <details>
135 <summary> <b>Usage</b> </summary>
136 <b>Code</b> <code> $ python train.py --resume wandb-artifact://{run_path} </code>
137
138 ![Screenshot (70)](https://user-images.githubusercontent.com/15766192/128728988-4e84b355-6c87-41ae-a591-14aecf45343e.png)
139
140 </details>
141
142 </details>
143
144 <h3> Reports </h3>
145 W&B Reports can be created from your saved runs for sharing online. Once a report is created you will receive a link you can use to publically share your results. Here is an example report created from the COCO128 tutorial trainings of all four YOLOv5 models ([link](https://wandb.ai/glenn-jocher/yolov5_tutorial/reports/YOLOv5-COCO128-Tutorial-Results--VmlldzozMDI5OTY)).
146
147 <img width="900" alt="Weights & Biases Reports" src="https://user-images.githubusercontent.com/26833433/135394029-a17eaf86-c6c1-4b1d-bb80-b90e83aaffa7.png">
148
149 ## Environments
150
151 YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):
152
153 - **Google Colab and Kaggle** notebooks with free GPU: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> <a href="https://www.kaggle.com/ultralytics/yolov5"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"></a>
154 - **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
155 - **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/AWS-Quickstart)
156 - **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) <a href="https://hub.docker.com/r/ultralytics/yolov5"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker" alt="Docker Pulls"></a>
157
158 ## Status
159
160 ![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg)
161
162 If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), validation ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on macOS, Windows, and Ubuntu every 24 hours and on every commit.
1 import argparse
2
3 from wandb_utils import WandbLogger
4
5 from utils.general import LOGGER
6
7 WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
8
9
10 def create_dataset_artifact(opt):
11 logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused
12 if not logger.wandb:
13 LOGGER.info("install wandb using `pip install wandb` to log the dataset")
14
15
16 if __name__ == '__main__':
17 parser = argparse.ArgumentParser()
18 parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
19 parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
20 parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project')
21 parser.add_argument('--entity', default=None, help='W&B entity')
22 parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run')
23
24 opt = parser.parse_args()
25 opt.resume = False # Explicitly disallow resume check for dataset upload job
26
27 create_dataset_artifact(opt)
1 import sys
2 from pathlib import Path
3
4 import wandb
5
6 FILE = Path(__file__).resolve()
7 ROOT = FILE.parents[3] # YOLOv5 root directory
8 if str(ROOT) not in sys.path:
9 sys.path.append(str(ROOT)) # add ROOT to PATH
10
11 from train import parse_opt, train
12 from utils.callbacks import Callbacks
13 from utils.general import increment_path
14 from utils.torch_utils import select_device
15
16
17 def sweep():
18 wandb.init()
19 # Get hyp dict from sweep agent. Copy because train() modifies parameters which confused wandb.
20 hyp_dict = vars(wandb.config).get("_items").copy()
21
22 # Workaround: get necessary opt args
23 opt = parse_opt(known=True)
24 opt.batch_size = hyp_dict.get("batch_size")
25 opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
26 opt.epochs = hyp_dict.get("epochs")
27 opt.nosave = True
28 opt.data = hyp_dict.get("data")
29 opt.weights = str(opt.weights)
30 opt.cfg = str(opt.cfg)
31 opt.data = str(opt.data)
32 opt.hyp = str(opt.hyp)
33 opt.project = str(opt.project)
34 device = select_device(opt.device, batch_size=opt.batch_size)
35
36 # train
37 train(hyp_dict, opt, device, callbacks=Callbacks())
38
39
40 if __name__ == "__main__":
41 sweep()
1 # Hyperparameters for training
2 # To set range-
3 # Provide min and max values as:
4 # parameter:
5 #
6 # min: scalar
7 # max: scalar
8 # OR
9 #
10 # Set a specific list of search space-
11 # parameter:
12 # values: [scalar1, scalar2, scalar3...]
13 #
14 # You can use grid, bayesian and hyperopt search strategy
15 # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration
16
17 program: utils/loggers/wandb/sweep.py
18 method: random
19 metric:
20 name: metrics/mAP_0.5
21 goal: maximize
22
23 parameters:
24 # hyperparameters: set either min, max range or values list
25 data:
26 value: "data/coco128.yaml"
27 batch_size:
28 values: [64]
29 epochs:
30 values: [10]
31
32 lr0:
33 distribution: uniform
34 min: 1e-5
35 max: 1e-1
36 lrf:
37 distribution: uniform
38 min: 0.01
39 max: 1.0
40 momentum:
41 distribution: uniform
42 min: 0.6
43 max: 0.98
44 weight_decay:
45 distribution: uniform
46 min: 0.0
47 max: 0.001
48 warmup_epochs:
49 distribution: uniform
50 min: 0.0
51 max: 5.0
52 warmup_momentum:
53 distribution: uniform
54 min: 0.0
55 max: 0.95
56 warmup_bias_lr:
57 distribution: uniform
58 min: 0.0
59 max: 0.2
60 box:
61 distribution: uniform
62 min: 0.02
63 max: 0.2
64 cls:
65 distribution: uniform
66 min: 0.2
67 max: 4.0
68 cls_pw:
69 distribution: uniform
70 min: 0.5
71 max: 2.0
72 obj:
73 distribution: uniform
74 min: 0.2
75 max: 4.0
76 obj_pw:
77 distribution: uniform
78 min: 0.5
79 max: 2.0
80 iou_t:
81 distribution: uniform
82 min: 0.1
83 max: 0.7
84 anchor_t:
85 distribution: uniform
86 min: 2.0
87 max: 8.0
88 fl_gamma:
89 distribution: uniform
90 min: 0.0
91 max: 4.0
92 hsv_h:
93 distribution: uniform
94 min: 0.0
95 max: 0.1
96 hsv_s:
97 distribution: uniform
98 min: 0.0
99 max: 0.9
100 hsv_v:
101 distribution: uniform
102 min: 0.0
103 max: 0.9
104 degrees:
105 distribution: uniform
106 min: 0.0
107 max: 45.0
108 translate:
109 distribution: uniform
110 min: 0.0
111 max: 0.9
112 scale:
113 distribution: uniform
114 min: 0.0
115 max: 0.9
116 shear:
117 distribution: uniform
118 min: 0.0
119 max: 10.0
120 perspective:
121 distribution: uniform
122 min: 0.0
123 max: 0.001
124 flipud:
125 distribution: uniform
126 min: 0.0
127 max: 1.0
128 fliplr:
129 distribution: uniform
130 min: 0.0
131 max: 1.0
132 mosaic:
133 distribution: uniform
134 min: 0.0
135 max: 1.0
136 mixup:
137 distribution: uniform
138 min: 0.0
139 max: 1.0
140 copy_paste:
141 distribution: uniform
142 min: 0.0
143 max: 1.0
1 """Utilities and tools for tracking runs with Weights & Biases."""
2
3 import logging
4 import os
5 import sys
6 from contextlib import contextmanager
7 from pathlib import Path
8 from typing import Dict
9
10 import yaml
11 from tqdm import tqdm
12
13 FILE = Path(__file__).resolve()
14 ROOT = FILE.parents[3] # YOLOv5 root directory
15 if str(ROOT) not in sys.path:
16 sys.path.append(str(ROOT)) # add ROOT to PATH
17
18 from utils.dataloaders import LoadImagesAndLabels, img2label_paths
19 from utils.general import LOGGER, check_dataset, check_file
20
21 try:
22 import wandb
23
24 assert hasattr(wandb, '__version__') # verify package import not local dir
25 except (ImportError, AssertionError):
26 wandb = None
27
28 RANK = int(os.getenv('RANK', -1))
29 WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
30
31
32 def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX):
33 return from_string[len(prefix):]
34
35
36 def check_wandb_config_file(data_config_file):
37 wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1)) # updated data.yaml path
38 if Path(wandb_config).is_file():
39 return wandb_config
40 return data_config_file
41
42
43 def check_wandb_dataset(data_file):
44 is_trainset_wandb_artifact = False
45 is_valset_wandb_artifact = False
46 if isinstance(data_file, dict):
47 # In that case another dataset manager has already processed it and we don't have to
48 return data_file
49 if check_file(data_file) and data_file.endswith('.yaml'):
50 with open(data_file, errors='ignore') as f:
51 data_dict = yaml.safe_load(f)
52 is_trainset_wandb_artifact = isinstance(data_dict['train'],
53 str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX)
54 is_valset_wandb_artifact = isinstance(data_dict['val'],
55 str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX)
56 if is_trainset_wandb_artifact or is_valset_wandb_artifact:
57 return data_dict
58 else:
59 return check_dataset(data_file)
60
61
62 def get_run_info(run_path):
63 run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX))
64 run_id = run_path.stem
65 project = run_path.parent.stem
66 entity = run_path.parent.parent.stem
67 model_artifact_name = 'run_' + run_id + '_model'
68 return entity, project, run_id, model_artifact_name
69
70
71 def check_wandb_resume(opt):
72 process_wandb_config_ddp_mode(opt) if RANK not in [-1, 0] else None
73 if isinstance(opt.resume, str):
74 if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
75 if RANK not in [-1, 0]: # For resuming DDP runs
76 entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
77 api = wandb.Api()
78 artifact = api.artifact(entity + '/' + project + '/' + model_artifact_name + ':latest')
79 modeldir = artifact.download()
80 opt.weights = str(Path(modeldir) / "last.pt")
81 return True
82 return None
83
84
85 def process_wandb_config_ddp_mode(opt):
86 with open(check_file(opt.data), errors='ignore') as f:
87 data_dict = yaml.safe_load(f) # data dict
88 train_dir, val_dir = None, None
89 if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
90 api = wandb.Api()
91 train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
92 train_dir = train_artifact.download()
93 train_path = Path(train_dir) / 'data/images/'
94 data_dict['train'] = str(train_path)
95
96 if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
97 api = wandb.Api()
98 val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
99 val_dir = val_artifact.download()
100 val_path = Path(val_dir) / 'data/images/'
101 data_dict['val'] = str(val_path)
102 if train_dir or val_dir:
103 ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml')
104 with open(ddp_data_path, 'w') as f:
105 yaml.safe_dump(data_dict, f)
106 opt.data = ddp_data_path
107
108
109 class WandbLogger():
110 """Log training runs, datasets, models, and predictions to Weights & Biases.
111
112 This logger sends information to W&B at wandb.ai. By default, this information
113 includes hyperparameters, system configuration and metrics, model metrics,
114 and basic data metrics and analyses.
115
116 By providing additional command line arguments to train.py, datasets,
117 models and predictions can also be logged.
118
119 For more on how this logger is used, see the Weights & Biases documentation:
120 https://docs.wandb.com/guides/integrations/yolov5
121 """
122
123 def __init__(self, opt, run_id=None, job_type='Training'):
124 """
125 - Initialize WandbLogger instance
126 - Upload dataset if opt.upload_dataset is True
127 - Setup training processes if job_type is 'Training'
128
129 arguments:
130 opt (namespace) -- Commandline arguments for this run
131 run_id (str) -- Run ID of W&B run to be resumed
132 job_type (str) -- To set the job_type for this run
133
134 """
135 # Temporary-fix
136 if opt.upload_dataset:
137 opt.upload_dataset = False
138 LOGGER.info("Uploading Dataset functionality is not being supported temporarily due to a bug.")
139
140 # Pre-training routine --
141 self.job_type = job_type
142 self.wandb, self.wandb_run = wandb, None if not wandb else wandb.run
143 self.val_artifact, self.train_artifact = None, None
144 self.train_artifact_path, self.val_artifact_path = None, None
145 self.result_artifact = None
146 self.val_table, self.result_table = None, None
147 self.bbox_media_panel_images = []
148 self.val_table_path_map = None
149 self.max_imgs_to_log = 16
150 self.wandb_artifact_data_dict = None
151 self.data_dict = None
152 # It's more elegant to stick to 1 wandb.init call,
153 # but useful config data is overwritten in the WandbLogger's wandb.init call
154 if isinstance(opt.resume, str): # checks resume from artifact
155 if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
156 entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
157 model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name
158 assert wandb, 'install wandb to resume wandb runs'
159 # Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config
160 self.wandb_run = wandb.init(id=run_id,
161 project=project,
162 entity=entity,
163 resume='allow',
164 allow_val_change=True)
165 opt.resume = model_artifact_name
166 elif self.wandb:
167 self.wandb_run = wandb.init(config=opt,
168 resume="allow",
169 project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
170 entity=opt.entity,
171 name=opt.name if opt.name != 'exp' else None,
172 job_type=job_type,
173 id=run_id,
174 allow_val_change=True) if not wandb.run else wandb.run
175 if self.wandb_run:
176 if self.job_type == 'Training':
177 if opt.upload_dataset:
178 if not opt.resume:
179 self.wandb_artifact_data_dict = self.check_and_upload_dataset(opt)
180
181 if isinstance(opt.data, dict):
182 # This means another dataset manager has already processed the dataset info (e.g. ClearML)
183 # and they will have stored the already processed dict in opt.data
184 self.data_dict = opt.data
185 elif opt.resume:
186 # resume from artifact
187 if isinstance(opt.resume, str) and opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
188 self.data_dict = dict(self.wandb_run.config.data_dict)
189 else: # local resume
190 self.data_dict = check_wandb_dataset(opt.data)
191 else:
192 self.data_dict = check_wandb_dataset(opt.data)
193 self.wandb_artifact_data_dict = self.wandb_artifact_data_dict or self.data_dict
194
195 # write data_dict to config. useful for resuming from artifacts. Do this only when not resuming.
196 self.wandb_run.config.update({'data_dict': self.wandb_artifact_data_dict}, allow_val_change=True)
197 self.setup_training(opt)
198
199 if self.job_type == 'Dataset Creation':
200 self.wandb_run.config.update({"upload_dataset": True})
201 self.data_dict = self.check_and_upload_dataset(opt)
202
203 def check_and_upload_dataset(self, opt):
204 """
205 Check if the dataset format is compatible and upload it as W&B artifact
206
207 arguments:
208 opt (namespace)-- Commandline arguments for current run
209
210 returns:
211 Updated dataset info dictionary where local dataset paths are replaced by WAND_ARFACT_PREFIX links.
212 """
213 assert wandb, 'Install wandb to upload dataset'
214 config_path = self.log_dataset_artifact(opt.data, opt.single_cls,
215 'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
216 with open(config_path, errors='ignore') as f:
217 wandb_data_dict = yaml.safe_load(f)
218 return wandb_data_dict
219
220 def setup_training(self, opt):
221 """
222 Setup the necessary processes for training YOLO models:
223 - Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX
224 - Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
225 - Setup log_dict, initialize bbox_interval
226
227 arguments:
228 opt (namespace) -- commandline arguments for this run
229
230 """
231 self.log_dict, self.current_epoch = {}, 0
232 self.bbox_interval = opt.bbox_interval
233 if isinstance(opt.resume, str):
234 modeldir, _ = self.download_model_artifact(opt)
235 if modeldir:
236 self.weights = Path(modeldir) / "last.pt"
237 config = self.wandb_run.config
238 opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp, opt.imgsz = str(
239 self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs,\
240 config.hyp, config.imgsz
241 data_dict = self.data_dict
242 if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download
243 self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(
244 data_dict.get('train'), opt.artifact_alias)
245 self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(
246 data_dict.get('val'), opt.artifact_alias)
247
248 if self.train_artifact_path is not None:
249 train_path = Path(self.train_artifact_path) / 'data/images/'
250 data_dict['train'] = str(train_path)
251 if self.val_artifact_path is not None:
252 val_path = Path(self.val_artifact_path) / 'data/images/'
253 data_dict['val'] = str(val_path)
254
255 if self.val_artifact is not None:
256 self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
257 columns = ["epoch", "id", "ground truth", "prediction"]
258 columns.extend(self.data_dict['names'])
259 self.result_table = wandb.Table(columns)
260 self.val_table = self.val_artifact.get("val")
261 if self.val_table_path_map is None:
262 self.map_val_table_path()
263 if opt.bbox_interval == -1:
264 self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1
265 if opt.evolve or opt.noplots:
266 self.bbox_interval = opt.bbox_interval = opt.epochs + 1 # disable bbox_interval
267 train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None
268 # Update the the data_dict to point to local artifacts dir
269 if train_from_artifact:
270 self.data_dict = data_dict
271
272 def download_dataset_artifact(self, path, alias):
273 """
274 download the model checkpoint artifact if the path starts with WANDB_ARTIFACT_PREFIX
275
276 arguments:
277 path -- path of the dataset to be used for training
278 alias (str)-- alias of the artifact to be download/used for training
279
280 returns:
281 (str, wandb.Artifact) -- path of the downladed dataset and it's corresponding artifact object if dataset
282 is found otherwise returns (None, None)
283 """
284 if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
285 artifact_path = Path(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
286 dataset_artifact = wandb.use_artifact(artifact_path.as_posix().replace("\\", "/"))
287 assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
288 datadir = dataset_artifact.download()
289 return datadir, dataset_artifact
290 return None, None
291
292 def download_model_artifact(self, opt):
293 """
294 download the model checkpoint artifact if the resume path starts with WANDB_ARTIFACT_PREFIX
295
296 arguments:
297 opt (namespace) -- Commandline arguments for this run
298 """
299 if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
300 model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest")
301 assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist'
302 modeldir = model_artifact.download()
303 # epochs_trained = model_artifact.metadata.get('epochs_trained')
304 total_epochs = model_artifact.metadata.get('total_epochs')
305 is_finished = total_epochs is None
306 assert not is_finished, 'training is finished, can only resume incomplete runs.'
307 return modeldir, model_artifact
308 return None, None
309
310 def log_model(self, path, opt, epoch, fitness_score, best_model=False):
311 """
312 Log the model checkpoint as W&B artifact
313
314 arguments:
315 path (Path) -- Path of directory containing the checkpoints
316 opt (namespace) -- Command line arguments for this run
317 epoch (int) -- Current epoch number
318 fitness_score (float) -- fitness score for current epoch
319 best_model (boolean) -- Boolean representing if the current checkpoint is the best yet.
320 """
321 model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model',
322 type='model',
323 metadata={
324 'original_url': str(path),
325 'epochs_trained': epoch + 1,
326 'save period': opt.save_period,
327 'project': opt.project,
328 'total_epochs': opt.epochs,
329 'fitness_score': fitness_score})
330 model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
331 wandb.log_artifact(model_artifact,
332 aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
333 LOGGER.info(f"Saving model artifact on epoch {epoch + 1}")
334
335 def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
336 """
337 Log the dataset as W&B artifact and return the new data file with W&B links
338
339 arguments:
340 data_file (str) -- the .yaml file with information about the dataset like - path, classes etc.
341 single_class (boolean) -- train multi-class data as single-class
342 project (str) -- project name. Used to construct the artifact path
343 overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new
344 file with _wandb postfix. Eg -> data_wandb.yaml
345
346 returns:
347 the new .yaml file with artifact links. it can be used to start training directly from artifacts
348 """
349 upload_dataset = self.wandb_run.config.upload_dataset
350 log_val_only = isinstance(upload_dataset, str) and upload_dataset == 'val'
351 self.data_dict = check_dataset(data_file) # parse and check
352 data = dict(self.data_dict)
353 nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
354 names = {k: v for k, v in enumerate(names)} # to index dictionary
355
356 # log train set
357 if not log_val_only:
358 self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(data['train'], rect=True, batch_size=1),
359 names,
360 name='train') if data.get('train') else None
361 if data.get('train'):
362 data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train')
363
364 self.val_artifact = self.create_dataset_table(
365 LoadImagesAndLabels(data['val'], rect=True, batch_size=1), names, name='val') if data.get('val') else None
366 if data.get('val'):
367 data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val')
368
369 path = Path(data_file)
370 # create a _wandb.yaml file with artifacts links if both train and test set are logged
371 if not log_val_only:
372 path = (path.stem if overwrite_config else path.stem + '_wandb') + '.yaml' # updated data.yaml path
373 path = ROOT / 'data' / path
374 data.pop('download', None)
375 data.pop('path', None)
376 with open(path, 'w') as f:
377 yaml.safe_dump(data, f)
378 LOGGER.info(f"Created dataset config file {path}")
379
380 if self.job_type == 'Training': # builds correct artifact pipeline graph
381 if not log_val_only:
382 self.wandb_run.log_artifact(
383 self.train_artifact) # calling use_artifact downloads the dataset. NOT NEEDED!
384 self.wandb_run.use_artifact(self.val_artifact)
385 self.val_artifact.wait()
386 self.val_table = self.val_artifact.get('val')
387 self.map_val_table_path()
388 else:
389 self.wandb_run.log_artifact(self.train_artifact)
390 self.wandb_run.log_artifact(self.val_artifact)
391 return path
392
393 def map_val_table_path(self):
394 """
395 Map the validation dataset Table like name of file -> it's id in the W&B Table.
396 Useful for - referencing artifacts for evaluation.
397 """
398 self.val_table_path_map = {}
399 LOGGER.info("Mapping dataset")
400 for i, data in enumerate(tqdm(self.val_table.data)):
401 self.val_table_path_map[data[3]] = data[0]
402
403 def create_dataset_table(self, dataset: LoadImagesAndLabels, class_to_id: Dict[int, str], name: str = 'dataset'):
404 """
405 Create and return W&B artifact containing W&B Table of the dataset.
406
407 arguments:
408 dataset -- instance of LoadImagesAndLabels class used to iterate over the data to build Table
409 class_to_id -- hash map that maps class ids to labels
410 name -- name of the artifact
411
412 returns:
413 dataset artifact to be logged or used
414 """
415 # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
416 artifact = wandb.Artifact(name=name, type="dataset")
417 img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
418 img_files = tqdm(dataset.im_files) if not img_files else img_files
419 for img_file in img_files:
420 if Path(img_file).is_dir():
421 artifact.add_dir(img_file, name='data/images')
422 labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
423 artifact.add_dir(labels_path, name='data/labels')
424 else:
425 artifact.add_file(img_file, name='data/images/' + Path(img_file).name)
426 label_file = Path(img2label_paths([img_file])[0])
427 artifact.add_file(str(label_file), name='data/labels/' +
428 label_file.name) if label_file.exists() else None
429 table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
430 class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
431 for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
432 box_data, img_classes = [], {}
433 for cls, *xywh in labels[:, 1:].tolist():
434 cls = int(cls)
435 box_data.append({
436 "position": {
437 "middle": [xywh[0], xywh[1]],
438 "width": xywh[2],
439 "height": xywh[3]},
440 "class_id": cls,
441 "box_caption": "%s" % (class_to_id[cls])})
442 img_classes[cls] = class_to_id[cls]
443 boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space
444 table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()),
445 Path(paths).name)
446 artifact.add(table, name)
447 return artifact
448
449 def log_training_progress(self, predn, path, names):
450 """
451 Build evaluation Table. Uses reference from validation dataset table.
452
453 arguments:
454 predn (list): list of predictions in the native space in the format - [xmin, ymin, xmax, ymax, confidence, class]
455 path (str): local path of the current evaluation image
456 names (dict(int, str)): hash map that maps class ids to labels
457 """
458 class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()])
459 box_data = []
460 avg_conf_per_class = [0] * len(self.data_dict['names'])
461 pred_class_count = {}
462 for *xyxy, conf, cls in predn.tolist():
463 if conf >= 0.25:
464 cls = int(cls)
465 box_data.append({
466 "position": {
467 "minX": xyxy[0],
468 "minY": xyxy[1],
469 "maxX": xyxy[2],
470 "maxY": xyxy[3]},
471 "class_id": cls,
472 "box_caption": f"{names[cls]} {conf:.3f}",
473 "scores": {
474 "class_score": conf},
475 "domain": "pixel"})
476 avg_conf_per_class[cls] += conf
477
478 if cls in pred_class_count:
479 pred_class_count[cls] += 1
480 else:
481 pred_class_count[cls] = 1
482
483 for pred_class in pred_class_count.keys():
484 avg_conf_per_class[pred_class] = avg_conf_per_class[pred_class] / pred_class_count[pred_class]
485
486 boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
487 id = self.val_table_path_map[Path(path).name]
488 self.result_table.add_data(self.current_epoch, id, self.val_table.data[id][1],
489 wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set),
490 *avg_conf_per_class)
491
492 def val_one_image(self, pred, predn, path, names, im):
493 """
494 Log validation data for one image. updates the result Table if validation dataset is uploaded and log bbox media panel
495
496 arguments:
497 pred (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
498 predn (list): list of predictions in the native space - [xmin, ymin, xmax, ymax, confidence, class]
499 path (str): local path of the current evaluation image
500 """
501 if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact
502 self.log_training_progress(predn, path, names)
503
504 if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
505 if self.current_epoch % self.bbox_interval == 0:
506 box_data = [{
507 "position": {
508 "minX": xyxy[0],
509 "minY": xyxy[1],
510 "maxX": xyxy[2],
511 "maxY": xyxy[3]},
512 "class_id": int(cls),
513 "box_caption": f"{names[int(cls)]} {conf:.3f}",
514 "scores": {
515 "class_score": conf},
516 "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
517 boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
518 self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes, caption=path.name))
519
520 def log(self, log_dict):
521 """
522 save the metrics to the logging dictionary
523
524 arguments:
525 log_dict (Dict) -- metrics/media to be logged in current step
526 """
527 if self.wandb_run:
528 for key, value in log_dict.items():
529 self.log_dict[key] = value
530
531 def end_epoch(self, best_result=False):
532 """
533 commit the log_dict, model artifacts and Tables to W&B and flush the log_dict.
534
535 arguments:
536 best_result (boolean): Boolean representing if the result of this evaluation is best or not
537 """
538 if self.wandb_run:
539 with all_logging_disabled():
540 if self.bbox_media_panel_images:
541 self.log_dict["BoundingBoxDebugger"] = self.bbox_media_panel_images
542 try:
543 wandb.log(self.log_dict)
544 except BaseException as e:
545 LOGGER.info(
546 f"An error occurred in wandb logger. The training will proceed without interruption. More info\n{e}"
547 )
548 self.wandb_run.finish()
549 self.wandb_run = None
550
551 self.log_dict = {}
552 self.bbox_media_panel_images = []
553 if self.result_artifact:
554 self.result_artifact.add(self.result_table, 'result')
555 wandb.log_artifact(self.result_artifact,
556 aliases=[
557 'latest', 'last', 'epoch ' + str(self.current_epoch),
558 ('best' if best_result else '')])
559
560 wandb.log({"evaluation": self.result_table})
561 columns = ["epoch", "id", "ground truth", "prediction"]
562 columns.extend(self.data_dict['names'])
563 self.result_table = wandb.Table(columns)
564 self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
565
566 def finish_run(self):
567 """
568 Log metrics if any and finish the current W&B run
569 """
570 if self.wandb_run:
571 if self.log_dict:
572 with all_logging_disabled():
573 wandb.log(self.log_dict)
574 wandb.run.finish()
575
576
577 @contextmanager
578 def all_logging_disabled(highest_level=logging.CRITICAL):
579 """ source - https://gist.github.com/simon-weber/7853144
580 A context manager that will prevent any logging messages triggered during the body from being processed.
581 :param highest_level: the maximum logging level in use.
582 This would only need to be changed if a custom level greater than CRITICAL is defined.
583 """
584 previous_level = logging.root.manager.disable
585 logging.disable(highest_level)
586 try:
587 yield
588 finally:
589 logging.disable(previous_level)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Loss functions
4 """
5
6 import torch
7 import torch.nn as nn
8
9 from utils.metrics import bbox_iou
10 from utils.torch_utils import de_parallel
11
12
13 def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
14 # return positive, negative label smoothing BCE targets
15 return 1.0 - 0.5 * eps, 0.5 * eps
16
17
18 class BCEBlurWithLogitsLoss(nn.Module):
19 # BCEwithLogitLoss() with reduced missing label effects.
20 def __init__(self, alpha=0.05):
21 super().__init__()
22 self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss()
23 self.alpha = alpha
24
25 def forward(self, pred, true):
26 loss = self.loss_fcn(pred, true)
27 pred = torch.sigmoid(pred) # prob from logits
28 dx = pred - true # reduce only missing label effects
29 # dx = (pred - true).abs() # reduce missing label and false label effects
30 alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
31 loss *= alpha_factor
32 return loss.mean()
33
34
35 class FocalLoss(nn.Module):
36 # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
37 def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
38 super().__init__()
39 self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
40 self.gamma = gamma
41 self.alpha = alpha
42 self.reduction = loss_fcn.reduction
43 self.loss_fcn.reduction = 'none' # required to apply FL to each element
44
45 def forward(self, pred, true):
46 loss = self.loss_fcn(pred, true)
47 # p_t = torch.exp(-loss)
48 # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
49
50 # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
51 pred_prob = torch.sigmoid(pred) # prob from logits
52 p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
53 alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
54 modulating_factor = (1.0 - p_t) ** self.gamma
55 loss *= alpha_factor * modulating_factor
56
57 if self.reduction == 'mean':
58 return loss.mean()
59 elif self.reduction == 'sum':
60 return loss.sum()
61 else: # 'none'
62 return loss
63
64
65 class QFocalLoss(nn.Module):
66 # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
67 def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
68 super().__init__()
69 self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
70 self.gamma = gamma
71 self.alpha = alpha
72 self.reduction = loss_fcn.reduction
73 self.loss_fcn.reduction = 'none' # required to apply FL to each element
74
75 def forward(self, pred, true):
76 loss = self.loss_fcn(pred, true)
77
78 pred_prob = torch.sigmoid(pred) # prob from logits
79 alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
80 modulating_factor = torch.abs(true - pred_prob) ** self.gamma
81 loss *= alpha_factor * modulating_factor
82
83 if self.reduction == 'mean':
84 return loss.mean()
85 elif self.reduction == 'sum':
86 return loss.sum()
87 else: # 'none'
88 return loss
89
90
91 class ComputeLoss:
92 sort_obj_iou = False
93
94 # Compute losses
95 def __init__(self, model, autobalance=False):
96 device = next(model.parameters()).device # get model device
97 h = model.hyp # hyperparameters
98
99 # Define criteria
100 BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
101 BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
102
103 # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
104 self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets
105
106 # Focal loss
107 g = h['fl_gamma'] # focal loss gamma
108 if g > 0:
109 BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
110
111 m = de_parallel(model).model[-1] # Detect() module
112 self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7
113 self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index
114 self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
115 self.na = m.na # number of anchors
116 self.nc = m.nc # number of classes
117 self.nl = m.nl # number of layers
118 self.anchors = m.anchors
119 self.device = device
120
121 def __call__(self, p, targets): # predictions, targets
122 lcls = torch.zeros(1, device=self.device) # class loss
123 lbox = torch.zeros(1, device=self.device) # box loss
124 lobj = torch.zeros(1, device=self.device) # object loss
125 tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets
126
127 # Losses
128 for i, pi in enumerate(p): # layer index, layer predictions
129 b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
130 tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj
131
132 n = b.shape[0] # number of targets
133 if n:
134 # pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1) # faster, requires torch 1.8.0
135 pxy, pwh, _, pcls = pi[b, a, gj, gi].split((2, 2, 1, self.nc), 1) # target-subset of predictions
136
137 # Regression
138 pxy = pxy.sigmoid() * 2 - 0.5
139 pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
140 pbox = torch.cat((pxy, pwh), 1) # predicted box
141 iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target)
142 lbox += (1.0 - iou).mean() # iou loss
143
144 # Objectness
145 iou = iou.detach().clamp(0).type(tobj.dtype)
146 if self.sort_obj_iou:
147 j = iou.argsort()
148 b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
149 if self.gr < 1:
150 iou = (1.0 - self.gr) + self.gr * iou
151 tobj[b, a, gj, gi] = iou # iou ratio
152
153 # Classification
154 if self.nc > 1: # cls loss (only if multiple classes)
155 t = torch.full_like(pcls, self.cn, device=self.device) # targets
156 t[range(n), tcls[i]] = self.cp
157 lcls += self.BCEcls(pcls, t) # BCE
158
159 # Append targets to text file
160 # with open('targets.txt', 'a') as file:
161 # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
162
163 obji = self.BCEobj(pi[..., 4], tobj)
164 lobj += obji * self.balance[i] # obj loss
165 if self.autobalance:
166 self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
167
168 if self.autobalance:
169 self.balance = [x / self.balance[self.ssi] for x in self.balance]
170 lbox *= self.hyp['box']
171 lobj *= self.hyp['obj']
172 lcls *= self.hyp['cls']
173 bs = tobj.shape[0] # batch size
174
175 return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
176
177 def build_targets(self, p, targets):
178 # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
179 na, nt = self.na, targets.shape[0] # number of anchors, targets
180 tcls, tbox, indices, anch = [], [], [], []
181 gain = torch.ones(7, device=self.device) # normalized to gridspace gain
182 ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
183 targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None]), 2) # append anchor indices
184
185 g = 0.5 # bias
186 off = torch.tensor(
187 [
188 [0, 0],
189 [1, 0],
190 [0, 1],
191 [-1, 0],
192 [0, -1], # j,k,l,m
193 # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
194 ],
195 device=self.device).float() * g # offsets
196
197 for i in range(self.nl):
198 anchors, shape = self.anchors[i], p[i].shape
199 gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain
200
201 # Match targets to anchors
202 t = targets * gain # shape(3,n,7)
203 if nt:
204 # Matches
205 r = t[..., 4:6] / anchors[:, None] # wh ratio
206 j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare
207 # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
208 t = t[j] # filter
209
210 # Offsets
211 gxy = t[:, 2:4] # grid xy
212 gxi = gain[[2, 3]] - gxy # inverse
213 j, k = ((gxy % 1 < g) & (gxy > 1)).T
214 l, m = ((gxi % 1 < g) & (gxi > 1)).T
215 j = torch.stack((torch.ones_like(j), j, k, l, m))
216 t = t.repeat((5, 1, 1))[j]
217 offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
218 else:
219 t = targets[0]
220 offsets = 0
221
222 # Define
223 bc, gxy, gwh, a = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors
224 a, (b, c) = a.long().view(-1), bc.long().T # anchors, image, class
225 gij = (gxy - offsets).long()
226 gi, gj = gij.T # grid indices
227
228 # Append
229 indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
230 tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
231 anch.append(anchors[a]) # anchors
232 tcls.append(c) # class
233
234 return tcls, tbox, indices, anch
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Model validation metrics
4 """
5
6 import math
7 import warnings
8 from pathlib import Path
9
10 import matplotlib.pyplot as plt
11 import numpy as np
12 import torch
13
14 from utils import TryExcept, threaded
15
16
17 def fitness(x):
18 # Model fitness as a weighted combination of metrics
19 w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
20 return (x[:, :4] * w).sum(1)
21
22
23 def smooth(y, f=0.05):
24 # Box filter of fraction f
25 nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd)
26 p = np.ones(nf // 2) # ones padding
27 yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded
28 return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed
29
30
31 def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""):
32 """ Compute the average precision, given the recall and precision curves.
33 Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
34 # Arguments
35 tp: True positives (nparray, nx1 or nx10).
36 conf: Objectness value from 0-1 (nparray).
37 pred_cls: Predicted object classes (nparray).
38 target_cls: True object classes (nparray).
39 plot: Plot precision-recall curve at mAP@0.5
40 save_dir: Plot save directory
41 # Returns
42 The average precision as computed in py-faster-rcnn.
43 """
44
45 # Sort by objectness
46 i = np.argsort(-conf)
47 tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
48
49 # Find unique classes
50 unique_classes, nt = np.unique(target_cls, return_counts=True)
51 nc = unique_classes.shape[0] # number of classes, number of detections
52
53 # Create Precision-Recall curve and compute AP for each class
54 px, py = np.linspace(0, 1, 1000), [] # for plotting
55 ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
56 for ci, c in enumerate(unique_classes):
57 i = pred_cls == c
58 n_l = nt[ci] # number of labels
59 n_p = i.sum() # number of predictions
60 if n_p == 0 or n_l == 0:
61 continue
62
63 # Accumulate FPs and TPs
64 fpc = (1 - tp[i]).cumsum(0)
65 tpc = tp[i].cumsum(0)
66
67 # Recall
68 recall = tpc / (n_l + eps) # recall curve
69 r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
70
71 # Precision
72 precision = tpc / (tpc + fpc) # precision curve
73 p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
74
75 # AP from recall-precision curve
76 for j in range(tp.shape[1]):
77 ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
78 if plot and j == 0:
79 py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
80
81 # Compute F1 (harmonic mean of precision and recall)
82 f1 = 2 * p * r / (p + r + eps)
83 names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
84 names = dict(enumerate(names)) # to dict
85 if plot:
86 plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names)
87 plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1')
88 plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision')
89 plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall')
90
91 i = smooth(f1.mean(0), 0.1).argmax() # max F1 index
92 p, r, f1 = p[:, i], r[:, i], f1[:, i]
93 tp = (r * nt).round() # true positives
94 fp = (tp / (p + eps) - tp).round() # false positives
95 return tp, fp, p, r, f1, ap, unique_classes.astype(int)
96
97
98 def compute_ap(recall, precision):
99 """ Compute the average precision, given the recall and precision curves
100 # Arguments
101 recall: The recall curve (list)
102 precision: The precision curve (list)
103 # Returns
104 Average precision, precision curve, recall curve
105 """
106
107 # Append sentinel values to beginning and end
108 mrec = np.concatenate(([0.0], recall, [1.0]))
109 mpre = np.concatenate(([1.0], precision, [0.0]))
110
111 # Compute the precision envelope
112 mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
113
114 # Integrate area under curve
115 method = 'interp' # methods: 'continuous', 'interp'
116 if method == 'interp':
117 x = np.linspace(0, 1, 101) # 101-point interp (COCO)
118 ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
119 else: # 'continuous'
120 i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
121 ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
122
123 return ap, mpre, mrec
124
125
126 class ConfusionMatrix:
127 # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
128 def __init__(self, nc, conf=0.25, iou_thres=0.45):
129 self.matrix = np.zeros((nc + 1, nc + 1))
130 self.nc = nc # number of classes
131 self.conf = conf
132 self.iou_thres = iou_thres
133
134 def process_batch(self, detections, labels):
135 """
136 Return intersection-over-union (Jaccard index) of boxes.
137 Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
138 Arguments:
139 detections (Array[N, 6]), x1, y1, x2, y2, conf, class
140 labels (Array[M, 5]), class, x1, y1, x2, y2
141 Returns:
142 None, updates confusion matrix accordingly
143 """
144 if detections is None:
145 gt_classes = labels.int()
146 for gc in gt_classes:
147 self.matrix[self.nc, gc] += 1 # background FN
148 return
149
150 detections = detections[detections[:, 4] > self.conf]
151 gt_classes = labels[:, 0].int()
152 detection_classes = detections[:, 5].int()
153 iou = box_iou(labels[:, 1:], detections[:, :4])
154
155 x = torch.where(iou > self.iou_thres)
156 if x[0].shape[0]:
157 matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
158 if x[0].shape[0] > 1:
159 matches = matches[matches[:, 2].argsort()[::-1]]
160 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
161 matches = matches[matches[:, 2].argsort()[::-1]]
162 matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
163 else:
164 matches = np.zeros((0, 3))
165
166 n = matches.shape[0] > 0
167 m0, m1, _ = matches.transpose().astype(int)
168 for i, gc in enumerate(gt_classes):
169 j = m0 == i
170 if n and sum(j) == 1:
171 self.matrix[detection_classes[m1[j]], gc] += 1 # correct
172 else:
173 self.matrix[self.nc, gc] += 1 # true background
174
175 if n:
176 for i, dc in enumerate(detection_classes):
177 if not any(m1 == i):
178 self.matrix[dc, self.nc] += 1 # predicted background
179
180 def matrix(self):
181 return self.matrix
182
183 def tp_fp(self):
184 tp = self.matrix.diagonal() # true positives
185 fp = self.matrix.sum(1) - tp # false positives
186 # fn = self.matrix.sum(0) - tp # false negatives (missed detections)
187 return tp[:-1], fp[:-1] # remove background class
188
189 @TryExcept('WARNING ⚠️ ConfusionMatrix plot failure: ')
190 def plot(self, normalize=True, save_dir='', names=()):
191 import seaborn as sn
192
193 array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns
194 array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
195
196 fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True)
197 nc, nn = self.nc, len(names) # number of classes, names
198 sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size
199 labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels
200 ticklabels = (names + ['background']) if labels else "auto"
201 with warnings.catch_warnings():
202 warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered
203 sn.heatmap(array,
204 ax=ax,
205 annot=nc < 30,
206 annot_kws={
207 "size": 8},
208 cmap='Blues',
209 fmt='.2f',
210 square=True,
211 vmin=0.0,
212 xticklabels=ticklabels,
213 yticklabels=ticklabels).set_facecolor((1, 1, 1))
214 ax.set_ylabel('True')
215 ax.set_ylabel('Predicted')
216 ax.set_title('Confusion Matrix')
217 fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
218 plt.close(fig)
219
220 def print(self):
221 for i in range(self.nc + 1):
222 print(' '.join(map(str, self.matrix[i])))
223
224
225 def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
226 # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4)
227
228 # Get the coordinates of bounding boxes
229 if xywh: # transform from xywh to xyxy
230 (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, 1), box2.chunk(4, 1)
231 w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
232 b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
233 b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
234 else: # x1, y1, x2, y2 = box1
235 b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, 1)
236 b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, 1)
237 w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1
238 w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1
239
240 # Intersection area
241 inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
242 (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
243
244 # Union Area
245 union = w1 * h1 + w2 * h2 - inter + eps
246
247 # IoU
248 iou = inter / union
249 if CIoU or DIoU or GIoU:
250 cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width
251 ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
252 if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
253 c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
254 rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2
255 if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
256 v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2)
257 with torch.no_grad():
258 alpha = v / (v - iou + (1 + eps))
259 return iou - (rho2 / c2 + v * alpha) # CIoU
260 return iou - rho2 / c2 # DIoU
261 c_area = cw * ch + eps # convex area
262 return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf
263 return iou # IoU
264
265
266 def box_area(box):
267 # box = xyxy(4,n)
268 return (box[2] - box[0]) * (box[3] - box[1])
269
270
271 def box_iou(box1, box2, eps=1e-7):
272 # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
273 """
274 Return intersection-over-union (Jaccard index) of boxes.
275 Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
276 Arguments:
277 box1 (Tensor[N, 4])
278 box2 (Tensor[M, 4])
279 Returns:
280 iou (Tensor[N, M]): the NxM matrix containing the pairwise
281 IoU values for every element in boxes1 and boxes2
282 """
283
284 # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
285 (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1)
286 inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
287
288 # IoU = inter / (area1 + area2 - inter)
289 return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps)
290
291
292 def bbox_ioa(box1, box2, eps=1e-7):
293 """ Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
294 box1: np.array of shape(4)
295 box2: np.array of shape(nx4)
296 returns: np.array of shape(n)
297 """
298
299 # Get the coordinates of bounding boxes
300 b1_x1, b1_y1, b1_x2, b1_y2 = box1
301 b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
302
303 # Intersection area
304 inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
305 (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
306
307 # box2 area
308 box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
309
310 # Intersection over box2 area
311 return inter_area / box2_area
312
313
314 def wh_iou(wh1, wh2, eps=1e-7):
315 # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
316 wh1 = wh1[:, None] # [N,1,2]
317 wh2 = wh2[None] # [1,M,2]
318 inter = torch.min(wh1, wh2).prod(2) # [N,M]
319 return inter / (wh1.prod(2) + wh2.prod(2) - inter + eps) # iou = inter / (area1 + area2 - inter)
320
321
322 # Plots ----------------------------------------------------------------------------------------------------------------
323
324
325 @threaded
326 def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()):
327 # Precision-recall curve
328 fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
329 py = np.stack(py, axis=1)
330
331 if 0 < len(names) < 21: # display per-class legend if < 21 classes
332 for i, y in enumerate(py.T):
333 ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision)
334 else:
335 ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
336
337 ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
338 ax.set_xlabel('Recall')
339 ax.set_ylabel('Precision')
340 ax.set_xlim(0, 1)
341 ax.set_ylim(0, 1)
342 ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
343 ax.set_title('Precision-Recall Curve')
344 fig.savefig(save_dir, dpi=250)
345 plt.close(fig)
346
347
348 @threaded
349 def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'):
350 # Metric-confidence curve
351 fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
352
353 if 0 < len(names) < 21: # display per-class legend if < 21 classes
354 for i, y in enumerate(py):
355 ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric)
356 else:
357 ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric)
358
359 y = smooth(py.mean(0), 0.05)
360 ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
361 ax.set_xlabel(xlabel)
362 ax.set_ylabel(ylabel)
363 ax.set_xlim(0, 1)
364 ax.set_ylim(0, 1)
365 ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
366 ax.set_title(f'{ylabel}-Confidence Curve')
367 fig.savefig(save_dir, dpi=250)
368 plt.close(fig)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Plotting utils
4 """
5
6 import contextlib
7 import math
8 import os
9 from copy import copy
10 from pathlib import Path
11 from urllib.error import URLError
12
13 import cv2
14 import matplotlib
15 import matplotlib.pyplot as plt
16 import numpy as np
17 import pandas as pd
18 import seaborn as sn
19 import torch
20 from PIL import Image, ImageDraw, ImageFont
21
22 from utils import TryExcept, threaded
23 from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_boxes, increment_path,
24 is_ascii, xywh2xyxy, xyxy2xywh)
25 from utils.metrics import fitness
26 from utils.segment.general import scale_image
27
28 # Settings
29 RANK = int(os.getenv('RANK', -1))
30 matplotlib.rc('font', **{'size': 11})
31 matplotlib.use('Agg') # for writing to files only
32
33
34 class Colors:
35 # Ultralytics color palette https://ultralytics.com/
36 def __init__(self):
37 # hex = matplotlib.colors.TABLEAU_COLORS.values()
38 hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
39 '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
40 self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
41 self.n = len(self.palette)
42
43 def __call__(self, i, bgr=False):
44 c = self.palette[int(i) % self.n]
45 return (c[2], c[1], c[0]) if bgr else c
46
47 @staticmethod
48 def hex2rgb(h): # rgb order (PIL)
49 return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
50
51
52 colors = Colors() # create instance for 'from utils.plots import colors'
53
54
55 def check_pil_font(font=FONT, size=10):
56 # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
57 font = Path(font)
58 font = font if font.exists() else (CONFIG_DIR / font.name)
59 try:
60 return ImageFont.truetype(str(font) if font.exists() else font.name, size)
61 except Exception: # download if missing
62 try:
63 check_font(font)
64 return ImageFont.truetype(str(font), size)
65 except TypeError:
66 check_requirements('Pillow>=8.4.0') # known issue https://github.com/ultralytics/yolov5/issues/5374
67 except URLError: # not online
68 return ImageFont.load_default()
69
70
71 class Annotator:
72 # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
73 def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
74 assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
75 non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic
76 self.pil = pil or non_ascii
77 if self.pil: # use PIL
78 self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
79 self.draw = ImageDraw.Draw(self.im)
80 self.font = check_pil_font(font='Arial.Unicode.ttf' if non_ascii else font,
81 size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12))
82 else: # use cv2
83 self.im = im
84 self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
85
86 def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
87 # Add one xyxy box to image with label
88 if self.pil or not is_ascii(label):
89 self.draw.rectangle(box, width=self.lw, outline=color) # box
90 if label:
91 w, h = self.font.getsize(label) # text width, height
92 outside = box[1] - h >= 0 # label fits outside box
93 self.draw.rectangle(
94 (box[0], box[1] - h if outside else box[1], box[0] + w + 1,
95 box[1] + 1 if outside else box[1] + h + 1),
96 fill=color,
97 )
98 # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0
99 self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
100 else: # cv2
101 p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
102 cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
103 if label:
104 tf = max(self.lw - 1, 1) # font thickness
105 w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
106 outside = p1[1] - h >= 3
107 p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
108 # cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled
109 # cv2.rectangle(self.im, p1, p2, color, 1, cv2.LINE_AA) # filled
110 cv2.putText(self.im,
111 # label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
112 label, (p1[0], p1[1] - 2 ),
113 0,
114 # self.lw / 3,
115 self.lw / 5,
116 txt_color,
117 # thickness=tf,
118 thickness=1,
119 lineType=cv2.LINE_AA)
120
121 def masks(self, masks, colors, im_gpu=None, alpha=0.5):
122 """Plot masks at once.
123 Args:
124 masks (tensor): predicted masks on cuda, shape: [n, h, w]
125 colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
126 im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
127 alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
128 """
129 if self.pil:
130 # convert to numpy first
131 self.im = np.asarray(self.im).copy()
132 if im_gpu is None:
133 # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
134 if len(masks) == 0:
135 return
136 if isinstance(masks, torch.Tensor):
137 masks = torch.as_tensor(masks, dtype=torch.uint8)
138 masks = masks.permute(1, 2, 0).contiguous()
139 masks = masks.cpu().numpy()
140 # masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
141 masks = scale_image(masks.shape[:2], masks, self.im.shape)
142 masks = np.asarray(masks, dtype=np.float32)
143 colors = np.asarray(colors, dtype=np.float32) # shape(n,3)
144 s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together
145 masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3)
146 self.im[:] = masks * alpha + self.im * (1 - s * alpha)
147 else:
148 if len(masks) == 0:
149 self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
150 colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0
151 colors = colors[:, None, None] # shape(n,1,1,3)
152 masks = masks.unsqueeze(3) # shape(n,h,w,1)
153 masks_color = masks * (colors * alpha) # shape(n,h,w,3)
154
155 inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1)
156 mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3)
157
158 im_gpu = im_gpu.flip(dims=[0]) # flip channel
159 im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3)
160 im_gpu = im_gpu * inv_alph_masks[-1] + mcs
161 im_mask = (im_gpu * 255).byte().cpu().numpy()
162 self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape)
163 if self.pil:
164 # convert im back to PIL and update draw
165 self.fromarray(self.im)
166
167 def rectangle(self, xy, fill=None, outline=None, width=1):
168 # Add rectangle to image (PIL-only)
169 self.draw.rectangle(xy, fill, outline, width)
170
171 def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
172 # Add text to image (PIL-only)
173 if anchor == 'bottom': # start y from font bottom
174 w, h = self.font.getsize(text) # text width, height
175 xy[1] += 1 - h
176 self.draw.text(xy, text, fill=txt_color, font=self.font)
177
178 def fromarray(self, im):
179 # Update self.im from a numpy array
180 self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
181 self.draw = ImageDraw.Draw(self.im)
182
183 def result(self):
184 # Return annotated image as array
185 return np.asarray(self.im)
186
187
188 def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
189 """
190 x: Features to be visualized
191 module_type: Module type
192 stage: Module stage within model
193 n: Maximum number of feature maps to plot
194 save_dir: Directory to save results
195 """
196 if 'Detect' not in module_type:
197 batch, channels, height, width = x.shape # batch, channels, height, width
198 if height > 1 and width > 1:
199 f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename
200
201 blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels
202 n = min(n, channels) # number of plots
203 fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols
204 ax = ax.ravel()
205 plt.subplots_adjust(wspace=0.05, hspace=0.05)
206 for i in range(n):
207 ax[i].imshow(blocks[i].squeeze()) # cmap='gray'
208 ax[i].axis('off')
209
210 LOGGER.info(f'Saving {f}... ({n}/{channels})')
211 plt.savefig(f, dpi=300, bbox_inches='tight')
212 plt.close()
213 np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save
214
215
216 def hist2d(x, y, n=100):
217 # 2d histogram used in labels.png and evolve.png
218 xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
219 hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
220 xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
221 yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
222 return np.log(hist[xidx, yidx])
223
224
225 def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
226 from scipy.signal import butter, filtfilt
227
228 # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
229 def butter_lowpass(cutoff, fs, order):
230 nyq = 0.5 * fs
231 normal_cutoff = cutoff / nyq
232 return butter(order, normal_cutoff, btype='low', analog=False)
233
234 b, a = butter_lowpass(cutoff, fs, order=order)
235 return filtfilt(b, a, data) # forward-backward filter
236
237
238 def output_to_target(output, max_det=300):
239 # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
240 targets = []
241 for i, o in enumerate(output):
242 box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
243 j = torch.full((conf.shape[0], 1), i)
244 targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
245 return torch.cat(targets, 0).numpy()
246
247
248 @threaded
249 def plot_images(images, targets, paths=None, fname='images.jpg', names=None):
250 # Plot image grid with labels
251 if isinstance(images, torch.Tensor):
252 images = images.cpu().float().numpy()
253 if isinstance(targets, torch.Tensor):
254 targets = targets.cpu().numpy()
255
256 max_size = 1920 # max image size
257 max_subplots = 16 # max image subplots, i.e. 4x4
258 bs, _, h, w = images.shape # batch size, _, height, width
259 bs = min(bs, max_subplots) # limit plot images
260 ns = np.ceil(bs ** 0.5) # number of subplots (square)
261 if np.max(images[0]) <= 1:
262 images *= 255 # de-normalise (optional)
263
264 # Build Image
265 mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
266 for i, im in enumerate(images):
267 if i == max_subplots: # if last batch has fewer images than we expect
268 break
269 x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
270 im = im.transpose(1, 2, 0)
271 mosaic[y:y + h, x:x + w, :] = im
272
273 # Resize (optional)
274 scale = max_size / ns / max(h, w)
275 if scale < 1:
276 h = math.ceil(scale * h)
277 w = math.ceil(scale * w)
278 mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
279
280 # Annotate
281 fs = int((h + w) * ns * 0.01) # font size
282 annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
283 for i in range(i + 1):
284 x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
285 annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders
286 if paths:
287 annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames
288 if len(targets) > 0:
289 ti = targets[targets[:, 0] == i] # image targets
290 boxes = xywh2xyxy(ti[:, 2:6]).T
291 classes = ti[:, 1].astype('int')
292 labels = ti.shape[1] == 6 # labels if no conf column
293 conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred)
294
295 if boxes.shape[1]:
296 if boxes.max() <= 1.01: # if normalized with tolerance 0.01
297 boxes[[0, 2]] *= w # scale to pixels
298 boxes[[1, 3]] *= h
299 elif scale < 1: # absolute coords need scale if image scales
300 boxes *= scale
301 boxes[[0, 2]] += x
302 boxes[[1, 3]] += y
303 for j, box in enumerate(boxes.T.tolist()):
304 cls = classes[j]
305 color = colors(cls)
306 cls = names[cls] if names else cls
307 if labels or conf[j] > 0.25: # 0.25 conf thresh
308 label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
309 annotator.box_label(box, label, color=color)
310 annotator.im.save(fname) # save
311
312
313 def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
314 # Plot LR simulating training for full epochs
315 optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals
316 y = []
317 for _ in range(epochs):
318 scheduler.step()
319 y.append(optimizer.param_groups[0]['lr'])
320 plt.plot(y, '.-', label='LR')
321 plt.xlabel('epoch')
322 plt.ylabel('LR')
323 plt.grid()
324 plt.xlim(0, epochs)
325 plt.ylim(0)
326 plt.savefig(Path(save_dir) / 'LR.png', dpi=200)
327 plt.close()
328
329
330 def plot_val_txt(): # from utils.plots import *; plot_val()
331 # Plot val.txt histograms
332 x = np.loadtxt('val.txt', dtype=np.float32)
333 box = xyxy2xywh(x[:, :4])
334 cx, cy = box[:, 0], box[:, 1]
335
336 fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
337 ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
338 ax.set_aspect('equal')
339 plt.savefig('hist2d.png', dpi=300)
340
341 fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
342 ax[0].hist(cx, bins=600)
343 ax[1].hist(cy, bins=600)
344 plt.savefig('hist1d.png', dpi=200)
345
346
347 def plot_targets_txt(): # from utils.plots import *; plot_targets_txt()
348 # Plot targets.txt histograms
349 x = np.loadtxt('targets.txt', dtype=np.float32).T
350 s = ['x targets', 'y targets', 'width targets', 'height targets']
351 fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
352 ax = ax.ravel()
353 for i in range(4):
354 ax[i].hist(x[i], bins=100, label=f'{x[i].mean():.3g} +/- {x[i].std():.3g}')
355 ax[i].legend()
356 ax[i].set_title(s[i])
357 plt.savefig('targets.jpg', dpi=200)
358
359
360 def plot_val_study(file='', dir='', x=None): # from utils.plots import *; plot_val_study()
361 # Plot file=study.txt generated by val.py (or plot all study*.txt in dir)
362 save_dir = Path(file).parent if file else Path(dir)
363 plot2 = False # plot additional results
364 if plot2:
365 ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
366
367 fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
368 # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
369 for f in sorted(save_dir.glob('study*.txt')):
370 y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
371 x = np.arange(y.shape[1]) if x is None else np.array(x)
372 if plot2:
373 s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_preprocess (ms/img)', 't_inference (ms/img)', 't_NMS (ms/img)']
374 for i in range(7):
375 ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8)
376 ax[i].set_title(s[i])
377
378 j = y[3].argmax() + 1
379 ax2.plot(y[5, 1:j],
380 y[3, 1:j] * 1E2,
381 '.-',
382 linewidth=2,
383 markersize=8,
384 label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO'))
385
386 ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],
387 'k.-',
388 linewidth=2,
389 markersize=8,
390 alpha=.25,
391 label='EfficientDet')
392
393 ax2.grid(alpha=0.2)
394 ax2.set_yticks(np.arange(20, 60, 5))
395 ax2.set_xlim(0, 57)
396 ax2.set_ylim(25, 55)
397 ax2.set_xlabel('GPU Speed (ms/img)')
398 ax2.set_ylabel('COCO AP val')
399 ax2.legend(loc='lower right')
400 f = save_dir / 'study.png'
401 print(f'Saving {f}...')
402 plt.savefig(f, dpi=300)
403
404
405 @TryExcept() # known issue https://github.com/ultralytics/yolov5/issues/5395
406 def plot_labels(labels, names=(), save_dir=Path('')):
407 # plot dataset labels
408 LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ")
409 c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes
410 nc = int(c.max() + 1) # number of classes
411 x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
412
413 # seaborn correlogram
414 sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
415 plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200)
416 plt.close()
417
418 # matplotlib labels
419 matplotlib.use('svg') # faster
420 ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
421 y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
422 with contextlib.suppress(Exception): # color histogram bars by class
423 [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # known issue #3195
424 ax[0].set_ylabel('instances')
425 if 0 < len(names) < 30:
426 ax[0].set_xticks(range(len(names)))
427 ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10)
428 else:
429 ax[0].set_xlabel('classes')
430 sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
431 sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)
432
433 # rectangles
434 labels[:, 1:3] = 0.5 # center
435 labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000
436 img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)
437 for cls, *box in labels[:1000]:
438 ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot
439 ax[1].imshow(img)
440 ax[1].axis('off')
441
442 for a in [0, 1, 2, 3]:
443 for s in ['top', 'right', 'left', 'bottom']:
444 ax[a].spines[s].set_visible(False)
445
446 plt.savefig(save_dir / 'labels.jpg', dpi=200)
447 matplotlib.use('Agg')
448 plt.close()
449
450
451 def imshow_cls(im, labels=None, pred=None, names=None, nmax=25, verbose=False, f=Path('images.jpg')):
452 # Show classification image grid with labels (optional) and predictions (optional)
453 from utils.augmentations import denormalize
454
455 names = names or [f'class{i}' for i in range(1000)]
456 blocks = torch.chunk(denormalize(im.clone()).cpu().float(), len(im),
457 dim=0) # select batch index 0, block by channels
458 n = min(len(blocks), nmax) # number of plots
459 m = min(8, round(n ** 0.5)) # 8 x 8 default
460 fig, ax = plt.subplots(math.ceil(n / m), m) # 8 rows x n/8 cols
461 ax = ax.ravel() if m > 1 else [ax]
462 # plt.subplots_adjust(wspace=0.05, hspace=0.05)
463 for i in range(n):
464 ax[i].imshow(blocks[i].squeeze().permute((1, 2, 0)).numpy().clip(0.0, 1.0))
465 ax[i].axis('off')
466 if labels is not None:
467 s = names[labels[i]] + (f'—{names[pred[i]]}' if pred is not None else '')
468 ax[i].set_title(s, fontsize=8, verticalalignment='top')
469 plt.savefig(f, dpi=300, bbox_inches='tight')
470 plt.close()
471 if verbose:
472 LOGGER.info(f"Saving {f}")
473 if labels is not None:
474 LOGGER.info('True: ' + ' '.join(f'{names[i]:3s}' for i in labels[:nmax]))
475 if pred is not None:
476 LOGGER.info('Predicted:' + ' '.join(f'{names[i]:3s}' for i in pred[:nmax]))
477 return f
478
479
480 def plot_evolve(evolve_csv='path/to/evolve.csv'): # from utils.plots import *; plot_evolve()
481 # Plot evolve.csv hyp evolution results
482 evolve_csv = Path(evolve_csv)
483 data = pd.read_csv(evolve_csv)
484 keys = [x.strip() for x in data.columns]
485 x = data.values
486 f = fitness(x)
487 j = np.argmax(f) # max fitness index
488 plt.figure(figsize=(10, 12), tight_layout=True)
489 matplotlib.rc('font', **{'size': 8})
490 print(f'Best results from row {j} of {evolve_csv}:')
491 for i, k in enumerate(keys[7:]):
492 v = x[:, 7 + i]
493 mu = v[j] # best single result
494 plt.subplot(6, 5, i + 1)
495 plt.scatter(v, f, c=hist2d(v, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
496 plt.plot(mu, f.max(), 'k+', markersize=15)
497 plt.title(f'{k} = {mu:.3g}', fontdict={'size': 9}) # limit to 40 characters
498 if i % 5 != 0:
499 plt.yticks([])
500 print(f'{k:>15}: {mu:.3g}')
501 f = evolve_csv.with_suffix('.png') # filename
502 plt.savefig(f, dpi=200)
503 plt.close()
504 print(f'Saved {f}')
505
506
507 def plot_results(file='path/to/results.csv', dir=''):
508 # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
509 save_dir = Path(file).parent if file else Path(dir)
510 fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
511 ax = ax.ravel()
512 files = list(save_dir.glob('results*.csv'))
513 assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.'
514 for f in files:
515 try:
516 data = pd.read_csv(f)
517 s = [x.strip() for x in data.columns]
518 x = data.values[:, 0]
519 for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
520 y = data.values[:, j].astype('float')
521 # y[y == 0] = np.nan # don't show zero values
522 ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8)
523 ax[i].set_title(s[j], fontsize=12)
524 # if j in [8, 9, 10]: # share train and val loss y axes
525 # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
526 except Exception as e:
527 LOGGER.info(f'Warning: Plotting error for {f}: {e}')
528 ax[1].legend()
529 fig.savefig(save_dir / 'results.png', dpi=200)
530 plt.close()
531
532
533 def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
534 # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
535 ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
536 s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS']
537 files = list(Path(save_dir).glob('frames*.txt'))
538 for fi, f in enumerate(files):
539 try:
540 results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows
541 n = results.shape[1] # number of rows
542 x = np.arange(start, min(stop, n) if stop else n)
543 results = results[:, x]
544 t = (results[0] - results[0].min()) # set t0=0s
545 results[0] = x
546 for i, a in enumerate(ax):
547 if i < len(results):
548 label = labels[fi] if len(labels) else f.stem.replace('frames_', '')
549 a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5)
550 a.set_title(s[i])
551 a.set_xlabel('time (s)')
552 # if fi == len(files) - 1:
553 # a.set_ylim(bottom=0)
554 for side in ['top', 'right']:
555 a.spines[side].set_visible(False)
556 else:
557 a.remove()
558 except Exception as e:
559 print(f'Warning: Plotting error for {f}; {e}')
560 ax[1].legend()
561 plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200)
562
563
564 def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True):
565 # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
566 xyxy = torch.tensor(xyxy).view(-1, 4)
567 b = xyxy2xywh(xyxy) # boxes
568 if square:
569 b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
570 b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
571 xyxy = xywh2xyxy(b).long()
572 clip_boxes(xyxy, im.shape)
573 crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
574 if save:
575 file.parent.mkdir(parents=True, exist_ok=True) # make directory
576 f = str(increment_path(file).with_suffix('.jpg'))
577 # cv2.imwrite(f, crop) # save BGR, https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue
578 Image.fromarray(crop[..., ::-1]).save(f, quality=95, subsampling=0) # save RGB
579 return crop
File mode changed
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Image augmentation functions
4 """
5
6 import math
7 import random
8
9 import cv2
10 import numpy as np
11
12 from ..augmentations import box_candidates
13 from ..general import resample_segments, segment2box
14
15
16 def mixup(im, labels, segments, im2, labels2, segments2):
17 # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
18 r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
19 im = (im * r + im2 * (1 - r)).astype(np.uint8)
20 labels = np.concatenate((labels, labels2), 0)
21 segments = np.concatenate((segments, segments2), 0)
22 return im, labels, segments
23
24
25 def random_perspective(im,
26 targets=(),
27 segments=(),
28 degrees=10,
29 translate=.1,
30 scale=.1,
31 shear=10,
32 perspective=0.0,
33 border=(0, 0)):
34 # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
35 # targets = [cls, xyxy]
36
37 height = im.shape[0] + border[0] * 2 # shape(h,w,c)
38 width = im.shape[1] + border[1] * 2
39
40 # Center
41 C = np.eye(3)
42 C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
43 C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
44
45 # Perspective
46 P = np.eye(3)
47 P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
48 P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
49
50 # Rotation and Scale
51 R = np.eye(3)
52 a = random.uniform(-degrees, degrees)
53 # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
54 s = random.uniform(1 - scale, 1 + scale)
55 # s = 2 ** random.uniform(-scale, scale)
56 R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
57
58 # Shear
59 S = np.eye(3)
60 S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
61 S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
62
63 # Translation
64 T = np.eye(3)
65 T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels)
66 T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels)
67
68 # Combined rotation matrix
69 M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
70 if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
71 if perspective:
72 im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
73 else: # affine
74 im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
75
76 # Visualize
77 # import matplotlib.pyplot as plt
78 # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
79 # ax[0].imshow(im[:, :, ::-1]) # base
80 # ax[1].imshow(im2[:, :, ::-1]) # warped
81
82 # Transform label coordinates
83 n = len(targets)
84 new_segments = []
85 if n:
86 new = np.zeros((n, 4))
87 segments = resample_segments(segments) # upsample
88 for i, segment in enumerate(segments):
89 xy = np.ones((len(segment), 3))
90 xy[:, :2] = segment
91 xy = xy @ M.T # transform
92 xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine
93
94 # clip
95 new[i] = segment2box(xy, width, height)
96 new_segments.append(xy)
97
98 # filter candidates
99 i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01)
100 targets = targets[i]
101 targets[:, 1:5] = new[i]
102 new_segments = np.array(new_segments)[i]
103
104 return im, targets, new_segments
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Dataloaders
4 """
5
6 import os
7 import random
8
9 import cv2
10 import numpy as np
11 import torch
12 from torch.utils.data import DataLoader, distributed
13
14 from ..augmentations import augment_hsv, copy_paste, letterbox
15 from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker
16 from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
17 from ..torch_utils import torch_distributed_zero_first
18 from .augmentations import mixup, random_perspective
19
20 RANK = int(os.getenv('RANK', -1))
21
22
23 def create_dataloader(path,
24 imgsz,
25 batch_size,
26 stride,
27 single_cls=False,
28 hyp=None,
29 augment=False,
30 cache=False,
31 pad=0.0,
32 rect=False,
33 rank=-1,
34 workers=8,
35 image_weights=False,
36 quad=False,
37 prefix='',
38 shuffle=False,
39 mask_downsample_ratio=1,
40 overlap_mask=False):
41 if rect and shuffle:
42 LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
43 shuffle = False
44 with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
45 dataset = LoadImagesAndLabelsAndMasks(
46 path,
47 imgsz,
48 batch_size,
49 augment=augment, # augmentation
50 hyp=hyp, # hyperparameters
51 rect=rect, # rectangular batches
52 cache_images=cache,
53 single_cls=single_cls,
54 stride=int(stride),
55 pad=pad,
56 image_weights=image_weights,
57 prefix=prefix,
58 downsample_ratio=mask_downsample_ratio,
59 overlap=overlap_mask)
60
61 batch_size = min(batch_size, len(dataset))
62 nd = torch.cuda.device_count() # number of CUDA devices
63 nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers
64 sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
65 loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
66 generator = torch.Generator()
67 generator.manual_seed(6148914691236517205 + RANK)
68 return loader(
69 dataset,
70 batch_size=batch_size,
71 shuffle=shuffle and sampler is None,
72 num_workers=nw,
73 sampler=sampler,
74 pin_memory=True,
75 collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
76 worker_init_fn=seed_worker,
77 generator=generator,
78 ), dataset
79
80
81 class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing
82
83 def __init__(
84 self,
85 path,
86 img_size=640,
87 batch_size=16,
88 augment=False,
89 hyp=None,
90 rect=False,
91 image_weights=False,
92 cache_images=False,
93 single_cls=False,
94 stride=32,
95 pad=0,
96 prefix="",
97 downsample_ratio=1,
98 overlap=False,
99 ):
100 super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls,
101 stride, pad, prefix)
102 self.downsample_ratio = downsample_ratio
103 self.overlap = overlap
104
105 def __getitem__(self, index):
106 index = self.indices[index] # linear, shuffled, or image_weights
107
108 hyp = self.hyp
109 mosaic = self.mosaic and random.random() < hyp['mosaic']
110 masks = []
111 if mosaic:
112 # Load mosaic
113 img, labels, segments = self.load_mosaic(index)
114 shapes = None
115
116 # MixUp augmentation
117 if random.random() < hyp["mixup"]:
118 img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1)))
119
120 else:
121 # Load image
122 img, (h0, w0), (h, w) = self.load_image(index)
123
124 # Letterbox
125 shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
126 img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
127 shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
128
129 labels = self.labels[index].copy()
130 # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
131 segments = self.segments[index].copy()
132 if len(segments):
133 for i_s in range(len(segments)):
134 segments[i_s] = xyn2xy(
135 segments[i_s],
136 ratio[0] * w,
137 ratio[1] * h,
138 padw=pad[0],
139 padh=pad[1],
140 )
141 if labels.size: # normalized xywh to pixel xyxy format
142 labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
143
144 if self.augment:
145 img, labels, segments = random_perspective(img,
146 labels,
147 segments=segments,
148 degrees=hyp["degrees"],
149 translate=hyp["translate"],
150 scale=hyp["scale"],
151 shear=hyp["shear"],
152 perspective=hyp["perspective"])
153
154 nl = len(labels) # number of labels
155 if nl:
156 labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
157 if self.overlap:
158 masks, sorted_idx = polygons2masks_overlap(img.shape[:2],
159 segments,
160 downsample_ratio=self.downsample_ratio)
161 masks = masks[None] # (640, 640) -> (1, 640, 640)
162 labels = labels[sorted_idx]
163 else:
164 masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
165
166 masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] //
167 self.downsample_ratio, img.shape[1] //
168 self.downsample_ratio))
169 # TODO: albumentations support
170 if self.augment:
171 # Albumentations
172 # there are some augmentation that won't change boxes and masks,
173 # so just be it for now.
174 img, labels = self.albumentations(img, labels)
175 nl = len(labels) # update after albumentations
176
177 # HSV color-space
178 augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])
179
180 # Flip up-down
181 if random.random() < hyp["flipud"]:
182 img = np.flipud(img)
183 if nl:
184 labels[:, 2] = 1 - labels[:, 2]
185 masks = torch.flip(masks, dims=[1])
186
187 # Flip left-right
188 if random.random() < hyp["fliplr"]:
189 img = np.fliplr(img)
190 if nl:
191 labels[:, 1] = 1 - labels[:, 1]
192 masks = torch.flip(masks, dims=[2])
193
194 # Cutouts # labels = cutout(img, labels, p=0.5)
195
196 labels_out = torch.zeros((nl, 6))
197 if nl:
198 labels_out[:, 1:] = torch.from_numpy(labels)
199
200 # Convert
201 img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
202 img = np.ascontiguousarray(img)
203
204 return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
205
206 def load_mosaic(self, index):
207 # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
208 labels4, segments4 = [], []
209 s = self.img_size
210 yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
211
212 # 3 additional image indices
213 indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
214 for i, index in enumerate(indices):
215 # Load image
216 img, _, (h, w) = self.load_image(index)
217
218 # place img in img4
219 if i == 0: # top left
220 img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
221 x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
222 x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
223 elif i == 1: # top right
224 x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
225 x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
226 elif i == 2: # bottom left
227 x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
228 x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
229 elif i == 3: # bottom right
230 x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
231 x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
232
233 img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
234 padw = x1a - x1b
235 padh = y1a - y1b
236
237 labels, segments = self.labels[index].copy(), self.segments[index].copy()
238
239 if labels.size:
240 labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
241 segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
242 labels4.append(labels)
243 segments4.extend(segments)
244
245 # Concat/clip labels
246 labels4 = np.concatenate(labels4, 0)
247 for x in (labels4[:, 1:], *segments4):
248 np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
249 # img4, labels4 = replicate(img4, labels4) # replicate
250
251 # Augment
252 img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
253 img4, labels4, segments4 = random_perspective(img4,
254 labels4,
255 segments4,
256 degrees=self.hyp["degrees"],
257 translate=self.hyp["translate"],
258 scale=self.hyp["scale"],
259 shear=self.hyp["shear"],
260 perspective=self.hyp["perspective"],
261 border=self.mosaic_border) # border to remove
262 return img4, labels4, segments4
263
264 @staticmethod
265 def collate_fn(batch):
266 img, label, path, shapes, masks = zip(*batch) # transposed
267 batched_masks = torch.cat(masks, 0)
268 for i, l in enumerate(label):
269 l[:, 0] = i # add target image index for build_targets()
270 return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks
271
272
273 def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
274 """
275 Args:
276 img_size (tuple): The image size.
277 polygons (np.ndarray): [N, M], N is the number of polygons,
278 M is the number of points(Be divided by 2).
279 """
280 mask = np.zeros(img_size, dtype=np.uint8)
281 polygons = np.asarray(polygons)
282 polygons = polygons.astype(np.int32)
283 shape = polygons.shape
284 polygons = polygons.reshape(shape[0], -1, 2)
285 cv2.fillPoly(mask, polygons, color=color)
286 nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
287 # NOTE: fillPoly firstly then resize is trying the keep the same way
288 # of loss calculation when mask-ratio=1.
289 mask = cv2.resize(mask, (nw, nh))
290 return mask
291
292
293 def polygons2masks(img_size, polygons, color, downsample_ratio=1):
294 """
295 Args:
296 img_size (tuple): The image size.
297 polygons (list[np.ndarray]): each polygon is [N, M],
298 N is the number of polygons,
299 M is the number of points(Be divided by 2).
300 """
301 masks = []
302 for si in range(len(polygons)):
303 mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
304 masks.append(mask)
305 return np.array(masks)
306
307
308 def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
309 """Return a (640, 640) overlap mask."""
310 masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
311 dtype=np.int32 if len(segments) > 255 else np.uint8)
312 areas = []
313 ms = []
314 for si in range(len(segments)):
315 mask = polygon2mask(
316 img_size,
317 [segments[si].reshape(-1)],
318 downsample_ratio=downsample_ratio,
319 color=1,
320 )
321 ms.append(mask)
322 areas.append(mask.sum())
323 areas = np.asarray(areas)
324 index = np.argsort(-areas)
325 ms = np.array(ms)[index]
326 for i in range(len(segments)):
327 mask = ms[i] * (i + 1)
328 masks = masks + mask
329 masks = np.clip(masks, a_min=0, a_max=i + 1)
330 return masks, index
1 import cv2
2 import numpy as np
3 import torch
4 import torch.nn.functional as F
5
6
7 def crop_mask(masks, boxes):
8 """
9 "Crop" predicted masks by zeroing out everything not in the predicted bbox.
10 Vectorized by Chong (thanks Chong).
11
12 Args:
13 - masks should be a size [h, w, n] tensor of masks
14 - boxes should be a size [n, 4] tensor of bbox coords in relative point form
15 """
16
17 n, h, w = masks.shape
18 x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
19 r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
20 c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
21
22 return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
23
24
25 def process_mask_upsample(protos, masks_in, bboxes, shape):
26 """
27 Crop after upsample.
28 proto_out: [mask_dim, mask_h, mask_w]
29 out_masks: [n, mask_dim], n is number of masks after nms
30 bboxes: [n, 4], n is number of masks after nms
31 shape:input_image_size, (h, w)
32
33 return: h, w, n
34 """
35
36 c, mh, mw = protos.shape # CHW
37 masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
38 masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
39 masks = crop_mask(masks, bboxes) # CHW
40 return masks.gt_(0.5)
41
42
43 def process_mask(protos, masks_in, bboxes, shape, upsample=False):
44 """
45 Crop before upsample.
46 proto_out: [mask_dim, mask_h, mask_w]
47 out_masks: [n, mask_dim], n is number of masks after nms
48 bboxes: [n, 4], n is number of masks after nms
49 shape:input_image_size, (h, w)
50
51 return: h, w, n
52 """
53
54 c, mh, mw = protos.shape # CHW
55 ih, iw = shape
56 masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
57
58 downsampled_bboxes = bboxes.clone()
59 downsampled_bboxes[:, 0] *= mw / iw
60 downsampled_bboxes[:, 2] *= mw / iw
61 downsampled_bboxes[:, 3] *= mh / ih
62 downsampled_bboxes[:, 1] *= mh / ih
63
64 masks = crop_mask(masks, downsampled_bboxes) # CHW
65 if upsample:
66 masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
67 return masks.gt_(0.5)
68
69
70 def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
71 """
72 img1_shape: model input shape, [h, w]
73 img0_shape: origin pic shape, [h, w, 3]
74 masks: [h, w, num]
75 """
76 # Rescale coordinates (xyxy) from im1_shape to im0_shape
77 if ratio_pad is None: # calculate from im0_shape
78 gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
79 pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
80 else:
81 pad = ratio_pad[1]
82 top, left = int(pad[1]), int(pad[0]) # y, x
83 bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
84
85 if len(masks.shape) < 2:
86 raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
87 masks = masks[top:bottom, left:right]
88 # masks = masks.permute(2, 0, 1).contiguous()
89 # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]
90 # masks = masks.permute(1, 2, 0).contiguous()
91 masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
92
93 if len(masks.shape) == 2:
94 masks = masks[:, :, None]
95 return masks
96
97
98 def mask_iou(mask1, mask2, eps=1e-7):
99 """
100 mask1: [N, n] m1 means number of predicted objects
101 mask2: [M, n] m2 means number of gt objects
102 Note: n means image_w x image_h
103
104 return: masks iou, [N, M]
105 """
106 intersection = torch.matmul(mask1, mask2.t()).clamp(0)
107 union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection
108 return intersection / (union + eps)
109
110
111 def masks_iou(mask1, mask2, eps=1e-7):
112 """
113 mask1: [N, n] m1 means number of predicted objects
114 mask2: [N, n] m2 means number of gt objects
115 Note: n means image_w x image_h
116
117 return: masks iou, (N, )
118 """
119 intersection = (mask1 * mask2).sum(1).clamp(0) # (N, )
120 union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection
121 return intersection / (union + eps)
122
123
124 def masks2segments(masks, strategy='largest'):
125 # Convert masks(n,160,160) into segments(n,xy)
126 segments = []
127 for x in masks.int().cpu().numpy().astype('uint8'):
128 c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
129 if strategy == 'concat': # concatenate all segments
130 c = np.concatenate([x.reshape(-1, 2) for x in c])
131 elif strategy == 'largest': # select largest segment
132 c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
133 segments.append(c.astype('float32'))
134 return segments
1 import torch
2 import torch.nn as nn
3 import torch.nn.functional as F
4
5 from ..general import xywh2xyxy
6 from ..loss import FocalLoss, smooth_BCE
7 from ..metrics import bbox_iou
8 from ..torch_utils import de_parallel
9 from .general import crop_mask
10
11
12 class ComputeLoss:
13 # Compute losses
14 def __init__(self, model, autobalance=False, overlap=False):
15 self.sort_obj_iou = False
16 self.overlap = overlap
17 device = next(model.parameters()).device # get model device
18 h = model.hyp # hyperparameters
19 self.device = device
20
21 # Define criteria
22 BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
23 BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
24
25 # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
26 self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets
27
28 # Focal loss
29 g = h['fl_gamma'] # focal loss gamma
30 if g > 0:
31 BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
32
33 m = de_parallel(model).model[-1] # Detect() module
34 self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7
35 self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index
36 self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
37 self.na = m.na # number of anchors
38 self.nc = m.nc # number of classes
39 self.nl = m.nl # number of layers
40 self.nm = m.nm # number of masks
41 self.anchors = m.anchors
42 self.device = device
43
44 def __call__(self, preds, targets, masks): # predictions, targets, model
45 p, proto = preds
46 bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width
47 lcls = torch.zeros(1, device=self.device)
48 lbox = torch.zeros(1, device=self.device)
49 lobj = torch.zeros(1, device=self.device)
50 lseg = torch.zeros(1, device=self.device)
51 tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets
52
53 # Losses
54 for i, pi in enumerate(p): # layer index, layer predictions
55 b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
56 tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj
57
58 n = b.shape[0] # number of targets
59 if n:
60 pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1) # subset of predictions
61
62 # Box regression
63 pxy = pxy.sigmoid() * 2 - 0.5
64 pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
65 pbox = torch.cat((pxy, pwh), 1) # predicted box
66 iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target)
67 lbox += (1.0 - iou).mean() # iou loss
68
69 # Objectness
70 iou = iou.detach().clamp(0).type(tobj.dtype)
71 if self.sort_obj_iou:
72 j = iou.argsort()
73 b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
74 if self.gr < 1:
75 iou = (1.0 - self.gr) + self.gr * iou
76 tobj[b, a, gj, gi] = iou # iou ratio
77
78 # Classification
79 if self.nc > 1: # cls loss (only if multiple classes)
80 t = torch.full_like(pcls, self.cn, device=self.device) # targets
81 t[range(n), tcls[i]] = self.cp
82 lcls += self.BCEcls(pcls, t) # BCE
83
84 # Mask regression
85 if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample
86 masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]
87 marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized
88 mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
89 for bi in b.unique():
90 j = b == bi # matching index
91 if self.overlap:
92 mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
93 else:
94 mask_gti = masks[tidxs[i]][j]
95 lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
96
97 obji = self.BCEobj(pi[..., 4], tobj)
98 lobj += obji * self.balance[i] # obj loss
99 if self.autobalance:
100 self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
101
102 if self.autobalance:
103 self.balance = [x / self.balance[self.ssi] for x in self.balance]
104 lbox *= self.hyp["box"]
105 lobj *= self.hyp["obj"]
106 lcls *= self.hyp["cls"]
107 lseg *= self.hyp["box"] / bs
108
109 loss = lbox + lobj + lcls + lseg
110 return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
111
112 def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
113 # Mask loss for one image
114 pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80)
115 loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
116 return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
117
118 def build_targets(self, p, targets):
119 # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
120 na, nt = self.na, targets.shape[0] # number of anchors, targets
121 tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], []
122 gain = torch.ones(8, device=self.device) # normalized to gridspace gain
123 ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
124 if self.overlap:
125 batch = p[0].shape[0]
126 ti = []
127 for i in range(batch):
128 num = (targets[:, 0] == i).sum() # find number of targets of each image
129 ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num)
130 ti = torch.cat(ti, 1) # (na, nt)
131 else:
132 ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1)
133 targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices
134
135 g = 0.5 # bias
136 off = torch.tensor(
137 [
138 [0, 0],
139 [1, 0],
140 [0, 1],
141 [-1, 0],
142 [0, -1], # j,k,l,m
143 # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
144 ],
145 device=self.device).float() * g # offsets
146
147 for i in range(self.nl):
148 anchors, shape = self.anchors[i], p[i].shape
149 gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain
150
151 # Match targets to anchors
152 t = targets * gain # shape(3,n,7)
153 if nt:
154 # Matches
155 r = t[..., 4:6] / anchors[:, None] # wh ratio
156 j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare
157 # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
158 t = t[j] # filter
159
160 # Offsets
161 gxy = t[:, 2:4] # grid xy
162 gxi = gain[[2, 3]] - gxy # inverse
163 j, k = ((gxy % 1 < g) & (gxy > 1)).T
164 l, m = ((gxi % 1 < g) & (gxi > 1)).T
165 j = torch.stack((torch.ones_like(j), j, k, l, m))
166 t = t.repeat((5, 1, 1))[j]
167 offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
168 else:
169 t = targets[0]
170 offsets = 0
171
172 # Define
173 bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors
174 (a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class
175 gij = (gxy - offsets).long()
176 gi, gj = gij.T # grid indices
177
178 # Append
179 indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
180 tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
181 anch.append(anchors[a]) # anchors
182 tcls.append(c) # class
183 tidxs.append(tidx)
184 xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6]) # xywh normalized
185
186 return tcls, tbox, indices, anch, tidxs, xywhn
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Model validation metrics
4 """
5
6 import numpy as np
7
8 from ..metrics import ap_per_class
9
10
11 def fitness(x):
12 # Model fitness as a weighted combination of metrics
13 w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
14 return (x[:, :8] * w).sum(1)
15
16
17 def ap_per_class_box_and_mask(
18 tp_m,
19 tp_b,
20 conf,
21 pred_cls,
22 target_cls,
23 plot=False,
24 save_dir=".",
25 names=(),
26 ):
27 """
28 Args:
29 tp_b: tp of boxes.
30 tp_m: tp of masks.
31 other arguments see `func: ap_per_class`.
32 """
33 results_boxes = ap_per_class(tp_b,
34 conf,
35 pred_cls,
36 target_cls,
37 plot=plot,
38 save_dir=save_dir,
39 names=names,
40 prefix="Box")[2:]
41 results_masks = ap_per_class(tp_m,
42 conf,
43 pred_cls,
44 target_cls,
45 plot=plot,
46 save_dir=save_dir,
47 names=names,
48 prefix="Mask")[2:]
49
50 results = {
51 "boxes": {
52 "p": results_boxes[0],
53 "r": results_boxes[1],
54 "ap": results_boxes[3],
55 "f1": results_boxes[2],
56 "ap_class": results_boxes[4]},
57 "masks": {
58 "p": results_masks[0],
59 "r": results_masks[1],
60 "ap": results_masks[3],
61 "f1": results_masks[2],
62 "ap_class": results_masks[4]}}
63 return results
64
65
66 class Metric:
67
68 def __init__(self) -> None:
69 self.p = [] # (nc, )
70 self.r = [] # (nc, )
71 self.f1 = [] # (nc, )
72 self.all_ap = [] # (nc, 10)
73 self.ap_class_index = [] # (nc, )
74
75 @property
76 def ap50(self):
77 """AP@0.5 of all classes.
78 Return:
79 (nc, ) or [].
80 """
81 return self.all_ap[:, 0] if len(self.all_ap) else []
82
83 @property
84 def ap(self):
85 """AP@0.5:0.95
86 Return:
87 (nc, ) or [].
88 """
89 return self.all_ap.mean(1) if len(self.all_ap) else []
90
91 @property
92 def mp(self):
93 """mean precision of all classes.
94 Return:
95 float.
96 """
97 return self.p.mean() if len(self.p) else 0.0
98
99 @property
100 def mr(self):
101 """mean recall of all classes.
102 Return:
103 float.
104 """
105 return self.r.mean() if len(self.r) else 0.0
106
107 @property
108 def map50(self):
109 """Mean AP@0.5 of all classes.
110 Return:
111 float.
112 """
113 return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
114
115 @property
116 def map(self):
117 """Mean AP@0.5:0.95 of all classes.
118 Return:
119 float.
120 """
121 return self.all_ap.mean() if len(self.all_ap) else 0.0
122
123 def mean_results(self):
124 """Mean of results, return mp, mr, map50, map"""
125 return (self.mp, self.mr, self.map50, self.map)
126
127 def class_result(self, i):
128 """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
129 return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
130
131 def get_maps(self, nc):
132 maps = np.zeros(nc) + self.map
133 for i, c in enumerate(self.ap_class_index):
134 maps[c] = self.ap[i]
135 return maps
136
137 def update(self, results):
138 """
139 Args:
140 results: tuple(p, r, ap, f1, ap_class)
141 """
142 p, r, all_ap, f1, ap_class_index = results
143 self.p = p
144 self.r = r
145 self.all_ap = all_ap
146 self.f1 = f1
147 self.ap_class_index = ap_class_index
148
149
150 class Metrics:
151 """Metric for boxes and masks."""
152
153 def __init__(self) -> None:
154 self.metric_box = Metric()
155 self.metric_mask = Metric()
156
157 def update(self, results):
158 """
159 Args:
160 results: Dict{'boxes': Dict{}, 'masks': Dict{}}
161 """
162 self.metric_box.update(list(results["boxes"].values()))
163 self.metric_mask.update(list(results["masks"].values()))
164
165 def mean_results(self):
166 return self.metric_box.mean_results() + self.metric_mask.mean_results()
167
168 def class_result(self, i):
169 return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
170
171 def get_maps(self, nc):
172 return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
173
174 @property
175 def ap_class_index(self):
176 # boxes and masks have the same ap_class_index
177 return self.metric_box.ap_class_index
178
179
180 KEYS = [
181 "train/box_loss",
182 "train/seg_loss", # train loss
183 "train/obj_loss",
184 "train/cls_loss",
185 "metrics/precision(B)",
186 "metrics/recall(B)",
187 "metrics/mAP_0.5(B)",
188 "metrics/mAP_0.5:0.95(B)", # metrics
189 "metrics/precision(M)",
190 "metrics/recall(M)",
191 "metrics/mAP_0.5(M)",
192 "metrics/mAP_0.5:0.95(M)", # metrics
193 "val/box_loss",
194 "val/seg_loss", # val loss
195 "val/obj_loss",
196 "val/cls_loss",
197 "x/lr0",
198 "x/lr1",
199 "x/lr2",]
200
201 BEST_KEYS = [
202 "best/epoch",
203 "best/precision(B)",
204 "best/recall(B)",
205 "best/mAP_0.5(B)",
206 "best/mAP_0.5:0.95(B)",
207 "best/precision(M)",
208 "best/recall(M)",
209 "best/mAP_0.5(M)",
210 "best/mAP_0.5:0.95(M)",]
1 import contextlib
2 import math
3 from pathlib import Path
4
5 import cv2
6 import matplotlib.pyplot as plt
7 import numpy as np
8 import pandas as pd
9 import torch
10
11 from .. import threaded
12 from ..general import xywh2xyxy
13 from ..plots import Annotator, colors
14
15
16 @threaded
17 def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None):
18 # Plot image grid with labels
19 if isinstance(images, torch.Tensor):
20 images = images.cpu().float().numpy()
21 if isinstance(targets, torch.Tensor):
22 targets = targets.cpu().numpy()
23 if isinstance(masks, torch.Tensor):
24 masks = masks.cpu().numpy().astype(int)
25
26 max_size = 1920 # max image size
27 max_subplots = 16 # max image subplots, i.e. 4x4
28 bs, _, h, w = images.shape # batch size, _, height, width
29 bs = min(bs, max_subplots) # limit plot images
30 ns = np.ceil(bs ** 0.5) # number of subplots (square)
31 if np.max(images[0]) <= 1:
32 images *= 255 # de-normalise (optional)
33
34 # Build Image
35 mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
36 for i, im in enumerate(images):
37 if i == max_subplots: # if last batch has fewer images than we expect
38 break
39 x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
40 im = im.transpose(1, 2, 0)
41 mosaic[y:y + h, x:x + w, :] = im
42
43 # Resize (optional)
44 scale = max_size / ns / max(h, w)
45 if scale < 1:
46 h = math.ceil(scale * h)
47 w = math.ceil(scale * w)
48 mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
49
50 # Annotate
51 fs = int((h + w) * ns * 0.01) # font size
52 annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
53 for i in range(i + 1):
54 x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
55 annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders
56 if paths:
57 annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames
58 if len(targets) > 0:
59 idx = targets[:, 0] == i
60 ti = targets[idx] # image targets
61
62 boxes = xywh2xyxy(ti[:, 2:6]).T
63 classes = ti[:, 1].astype('int')
64 labels = ti.shape[1] == 6 # labels if no conf column
65 conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred)
66
67 if boxes.shape[1]:
68 if boxes.max() <= 1.01: # if normalized with tolerance 0.01
69 boxes[[0, 2]] *= w # scale to pixels
70 boxes[[1, 3]] *= h
71 elif scale < 1: # absolute coords need scale if image scales
72 boxes *= scale
73 boxes[[0, 2]] += x
74 boxes[[1, 3]] += y
75 for j, box in enumerate(boxes.T.tolist()):
76 cls = classes[j]
77 color = colors(cls)
78 cls = names[cls] if names else cls
79 if labels or conf[j] > 0.25: # 0.25 conf thresh
80 label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
81 annotator.box_label(box, label, color=color)
82
83 # Plot masks
84 if len(masks):
85 if masks.max() > 1.0: # mean that masks are overlap
86 image_masks = masks[[i]] # (1, 640, 640)
87 nl = len(ti)
88 index = np.arange(nl).reshape(nl, 1, 1) + 1
89 image_masks = np.repeat(image_masks, nl, axis=0)
90 image_masks = np.where(image_masks == index, 1.0, 0.0)
91 else:
92 image_masks = masks[idx]
93
94 im = np.asarray(annotator.im).copy()
95 for j, box in enumerate(boxes.T.tolist()):
96 if labels or conf[j] > 0.25: # 0.25 conf thresh
97 color = colors(classes[j])
98 mh, mw = image_masks[j].shape
99 if mh != h or mw != w:
100 mask = image_masks[j].astype(np.uint8)
101 mask = cv2.resize(mask, (w, h))
102 mask = mask.astype(bool)
103 else:
104 mask = image_masks[j].astype(bool)
105 with contextlib.suppress(Exception):
106 im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
107 annotator.fromarray(im)
108 annotator.im.save(fname) # save
109
110
111 def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
112 # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
113 save_dir = Path(file).parent if file else Path(dir)
114 fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
115 ax = ax.ravel()
116 files = list(save_dir.glob("results*.csv"))
117 assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
118 for f in files:
119 try:
120 data = pd.read_csv(f)
121 index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] +
122 0.1 * data.values[:, 11])
123 s = [x.strip() for x in data.columns]
124 x = data.values[:, 0]
125 for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]):
126 y = data.values[:, j]
127 # y[y == 0] = np.nan # don't show zero values
128 ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2)
129 if best:
130 # best
131 ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3)
132 ax[i].set_title(s[j] + f"\n{round(y[index], 5)}")
133 else:
134 # last
135 ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3)
136 ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}")
137 # if j in [8, 9, 10]: # share train and val loss y axes
138 # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
139 except Exception as e:
140 print(f"Warning: Plotting error for {f}: {e}")
141 ax[1].legend()
142 fig.savefig(save_dir / "results.png", dpi=200)
143 plt.close()
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 PyTorch utils
4 """
5
6 import math
7 import os
8 import platform
9 import subprocess
10 import time
11 import warnings
12 from contextlib import contextmanager
13 from copy import deepcopy
14 from pathlib import Path
15
16 import torch
17 import torch.distributed as dist
18 import torch.nn as nn
19 import torch.nn.functional as F
20 from torch.nn.parallel import DistributedDataParallel as DDP
21
22 from utils.general import LOGGER, check_version, colorstr, file_date, git_describe
23
24 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
25 RANK = int(os.getenv('RANK', -1))
26 WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
27
28 try:
29 import thop # for FLOPs computation
30 except ImportError:
31 thop = None
32
33 # Suppress PyTorch warnings
34 warnings.filterwarnings('ignore', message='User provided device_type of \'cuda\', but CUDA is not available. Disabling')
35
36
37 def smart_inference_mode(torch_1_9=check_version(torch.__version__, '1.9.0')):
38 # Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator
39 def decorate(fn):
40 return (torch.inference_mode if torch_1_9 else torch.no_grad)()(fn)
41
42 return decorate
43
44
45 def smartCrossEntropyLoss(label_smoothing=0.0):
46 # Returns nn.CrossEntropyLoss with label smoothing enabled for torch>=1.10.0
47 if check_version(torch.__version__, '1.10.0'):
48 return nn.CrossEntropyLoss(label_smoothing=label_smoothing)
49 if label_smoothing > 0:
50 LOGGER.warning(f'WARNING ⚠️ label smoothing {label_smoothing} requires torch>=1.10.0')
51 return nn.CrossEntropyLoss()
52
53
54 def smart_DDP(model):
55 # Model DDP creation with checks
56 assert not check_version(torch.__version__, '1.12.0', pinned=True), \
57 'torch==1.12.0 torchvision==0.13.0 DDP training is not supported due to a known issue. ' \
58 'Please upgrade or downgrade torch to use DDP. See https://github.com/ultralytics/yolov5/issues/8395'
59 if check_version(torch.__version__, '1.11.0'):
60 return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True)
61 else:
62 return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
63
64
65 def reshape_classifier_output(model, n=1000):
66 # Update a TorchVision classification model to class count 'n' if required
67 from models.common import Classify
68 name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1] # last module
69 if isinstance(m, Classify): # YOLOv5 Classify() head
70 if m.linear.out_features != n:
71 m.linear = nn.Linear(m.linear.in_features, n)
72 elif isinstance(m, nn.Linear): # ResNet, EfficientNet
73 if m.out_features != n:
74 setattr(model, name, nn.Linear(m.in_features, n))
75 elif isinstance(m, nn.Sequential):
76 types = [type(x) for x in m]
77 if nn.Linear in types:
78 i = types.index(nn.Linear) # nn.Linear index
79 if m[i].out_features != n:
80 m[i] = nn.Linear(m[i].in_features, n)
81 elif nn.Conv2d in types:
82 i = types.index(nn.Conv2d) # nn.Conv2d index
83 if m[i].out_channels != n:
84 m[i] = nn.Conv2d(m[i].in_channels, n, m[i].kernel_size, m[i].stride, bias=m[i].bias)
85
86
87 @contextmanager
88 def torch_distributed_zero_first(local_rank: int):
89 # Decorator to make all processes in distributed training wait for each local_master to do something
90 if local_rank not in [-1, 0]:
91 dist.barrier(device_ids=[local_rank])
92 yield
93 if local_rank == 0:
94 dist.barrier(device_ids=[0])
95
96
97 def device_count():
98 # Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Supports Linux and Windows
99 assert platform.system() in ('Linux', 'Windows'), 'device_count() only supported on Linux or Windows'
100 try:
101 cmd = 'nvidia-smi -L | wc -l' if platform.system() == 'Linux' else 'nvidia-smi -L | find /c /v ""' # Windows
102 return int(subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1])
103 except Exception:
104 return 0
105
106
107 def select_device(device='', batch_size=0, newline=True):
108 # device = None or 'cpu' or 0 or '0' or '0,1,2,3'
109 s = f'YOLOv5 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} '
110 device = str(device).strip().lower().replace('cuda:', '').replace('none', '') # to string, 'cuda:0' to '0'
111 cpu = device == 'cpu'
112 mps = device == 'mps' # Apple Metal Performance Shaders (MPS)
113 if cpu or mps:
114 os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
115 elif device: # non-cpu device requested
116 os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available()
117 assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \
118 f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)"
119
120 if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available
121 devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7
122 n = len(devices) # device count
123 if n > 1 and batch_size > 0: # check batch_size is divisible by device_count
124 assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
125 space = ' ' * (len(s) + 1)
126 for i, d in enumerate(devices):
127 p = torch.cuda.get_device_properties(i)
128 s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB
129 arg = 'cuda:0'
130 elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available(): # prefer MPS if available
131 s += 'MPS\n'
132 arg = 'mps'
133 else: # revert to CPU
134 s += 'CPU\n'
135 arg = 'cpu'
136
137 if not newline:
138 s = s.rstrip()
139 LOGGER.info(s)
140 return torch.device(arg)
141
142
143 def time_sync():
144 # PyTorch-accurate time
145 if torch.cuda.is_available():
146 torch.cuda.synchronize()
147 return time.time()
148
149
150 def profile(input, ops, n=10, device=None):
151 """ YOLOv5 speed/memory/FLOPs profiler
152 Usage:
153 input = torch.randn(16, 3, 640, 640)
154 m1 = lambda x: x * torch.sigmoid(x)
155 m2 = nn.SiLU()
156 profile(input, [m1, m2], n=100) # profile over 100 iterations
157 """
158 results = []
159 if not isinstance(device, torch.device):
160 device = select_device(device)
161 print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
162 f"{'input':>24s}{'output':>24s}")
163
164 for x in input if isinstance(input, list) else [input]:
165 x = x.to(device)
166 x.requires_grad = True
167 for m in ops if isinstance(ops, list) else [ops]:
168 m = m.to(device) if hasattr(m, 'to') else m # device
169 m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
170 tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward
171 try:
172 flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs
173 except Exception:
174 flops = 0
175
176 try:
177 for _ in range(n):
178 t[0] = time_sync()
179 y = m(x)
180 t[1] = time_sync()
181 try:
182 _ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward()
183 t[2] = time_sync()
184 except Exception: # no backward method
185 # print(e) # for debug
186 t[2] = float('nan')
187 tf += (t[1] - t[0]) * 1000 / n # ms per op forward
188 tb += (t[2] - t[1]) * 1000 / n # ms per op backward
189 mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB)
190 s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y)) # shapes
191 p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters
192 print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}')
193 results.append([p, flops, mem, tf, tb, s_in, s_out])
194 except Exception as e:
195 print(e)
196 results.append(None)
197 torch.cuda.empty_cache()
198 return results
199
200
201 def is_parallel(model):
202 # Returns True if model is of type DP or DDP
203 return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
204
205
206 def de_parallel(model):
207 # De-parallelize a model: returns single-GPU model if model is of type DP or DDP
208 return model.module if is_parallel(model) else model
209
210
211 def initialize_weights(model):
212 for m in model.modules():
213 t = type(m)
214 if t is nn.Conv2d:
215 pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
216 elif t is nn.BatchNorm2d:
217 m.eps = 1e-3
218 m.momentum = 0.03
219 elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
220 m.inplace = True
221
222
223 def find_modules(model, mclass=nn.Conv2d):
224 # Finds layer indices matching module class 'mclass'
225 return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
226
227
228 def sparsity(model):
229 # Return global model sparsity
230 a, b = 0, 0
231 for p in model.parameters():
232 a += p.numel()
233 b += (p == 0).sum()
234 return b / a
235
236
237 def prune(model, amount=0.3):
238 # Prune model to requested global sparsity
239 import torch.nn.utils.prune as prune
240 for name, m in model.named_modules():
241 if isinstance(m, nn.Conv2d):
242 prune.l1_unstructured(m, name='weight', amount=amount) # prune
243 prune.remove(m, 'weight') # make permanent
244 LOGGER.info(f'Model pruned to {sparsity(model):.3g} global sparsity')
245
246
247 def fuse_conv_and_bn(conv, bn):
248 # Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
249 fusedconv = nn.Conv2d(conv.in_channels,
250 conv.out_channels,
251 kernel_size=conv.kernel_size,
252 stride=conv.stride,
253 padding=conv.padding,
254 dilation=conv.dilation,
255 groups=conv.groups,
256 bias=True).requires_grad_(False).to(conv.weight.device)
257
258 # Prepare filters
259 w_conv = conv.weight.clone().view(conv.out_channels, -1)
260 w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
261 fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
262
263 # Prepare spatial bias
264 b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
265 b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
266 fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
267
268 return fusedconv
269
270
271 def model_info(model, verbose=False, imgsz=640):
272 # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
273 n_p = sum(x.numel() for x in model.parameters()) # number parameters
274 n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
275 if verbose:
276 print(f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}")
277 for i, (name, p) in enumerate(model.named_parameters()):
278 name = name.replace('module_list.', '')
279 print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
280 (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
281
282 try: # FLOPs
283 p = next(model.parameters())
284 stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 # max stride
285 im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format
286 flops = thop.profile(deepcopy(model), inputs=(im,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs
287 imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float
288 fs = f', {flops * imgsz[0] / stride * imgsz[1] / stride:.1f} GFLOPs' # 640x640 GFLOPs
289 except Exception:
290 fs = ''
291
292 name = Path(model.yaml_file).stem.replace('yolov5', 'YOLOv5') if hasattr(model, 'yaml_file') else 'Model'
293 LOGGER.info(f"{name} summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
294
295
296 def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
297 # Scales img(bs,3,y,x) by ratio constrained to gs-multiple
298 if ratio == 1.0:
299 return img
300 h, w = img.shape[2:]
301 s = (int(h * ratio), int(w * ratio)) # new size
302 img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
303 if not same_shape: # pad/crop img
304 h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w))
305 return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
306
307
308 def copy_attr(a, b, include=(), exclude=()):
309 # Copy attributes from b to a, options to only include [...] and to exclude [...]
310 for k, v in b.__dict__.items():
311 if (len(include) and k not in include) or k.startswith('_') or k in exclude:
312 continue
313 else:
314 setattr(a, k, v)
315
316
317 def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e-5):
318 # YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay
319 g = [], [], [] # optimizer parameter groups
320 bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d()
321 for v in model.modules():
322 if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias (no decay)
323 g[2].append(v.bias)
324 if isinstance(v, bn): # weight (no decay)
325 g[1].append(v.weight)
326 elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
327 g[0].append(v.weight)
328
329 if name == 'Adam':
330 optimizer = torch.optim.Adam(g[2], lr=lr, betas=(momentum, 0.999)) # adjust beta1 to momentum
331 elif name == 'AdamW':
332 optimizer = torch.optim.AdamW(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
333 elif name == 'RMSProp':
334 optimizer = torch.optim.RMSprop(g[2], lr=lr, momentum=momentum)
335 elif name == 'SGD':
336 optimizer = torch.optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
337 else:
338 raise NotImplementedError(f'Optimizer {name} not implemented.')
339
340 optimizer.add_param_group({'params': g[0], 'weight_decay': decay}) # add g0 with weight_decay
341 optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights)
342 LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}) with parameter groups "
343 f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias")
344 return optimizer
345
346
347 def smart_hub_load(repo='ultralytics/yolov5', model='yolov5s', **kwargs):
348 # YOLOv5 torch.hub.load() wrapper with smart error/issue handling
349 if check_version(torch.__version__, '1.9.1'):
350 kwargs['skip_validation'] = True # validation causes GitHub API rate limit errors
351 if check_version(torch.__version__, '1.12.0'):
352 kwargs['trust_repo'] = True # argument required starting in torch 0.12
353 try:
354 return torch.hub.load(repo, model, **kwargs)
355 except Exception:
356 return torch.hub.load(repo, model, force_reload=True, **kwargs)
357
358
359 def smart_resume(ckpt, optimizer, ema=None, weights='yolov5s.pt', epochs=300, resume=True):
360 # Resume training from a partially trained checkpoint
361 best_fitness = 0.0
362 start_epoch = ckpt['epoch'] + 1
363 if ckpt['optimizer'] is not None:
364 optimizer.load_state_dict(ckpt['optimizer']) # optimizer
365 best_fitness = ckpt['best_fitness']
366 if ema and ckpt.get('ema'):
367 ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) # EMA
368 ema.updates = ckpt['updates']
369 if resume:
370 assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.\n' \
371 f"Start a new training without --resume, i.e. 'python train.py --weights {weights}'"
372 LOGGER.info(f'Resuming training from {weights} from epoch {start_epoch} to {epochs} total epochs')
373 if epochs < start_epoch:
374 LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
375 epochs += ckpt['epoch'] # finetune additional epochs
376 return best_fitness, start_epoch, epochs
377
378
379 class EarlyStopping:
380 # YOLOv5 simple early stopper
381 def __init__(self, patience=30):
382 self.best_fitness = 0.0 # i.e. mAP
383 self.best_epoch = 0
384 self.patience = patience or float('inf') # epochs to wait after fitness stops improving to stop
385 self.possible_stop = False # possible stop may occur next epoch
386
387 def __call__(self, epoch, fitness):
388 if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training
389 self.best_epoch = epoch
390 self.best_fitness = fitness
391 delta = epoch - self.best_epoch # epochs without improvement
392 self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch
393 stop = delta >= self.patience # stop training if patience exceeded
394 if stop:
395 LOGGER.info(f'Stopping training early as no improvement observed in last {self.patience} epochs. '
396 f'Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n'
397 f'To update EarlyStopping(patience={self.patience}) pass a new patience value, '
398 f'i.e. `python train.py --patience 300` or use `--patience 0` to disable EarlyStopping.')
399 return stop
400
401
402 class ModelEMA:
403 """ Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models
404 Keeps a moving average of everything in the model state_dict (parameters and buffers)
405 For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
406 """
407
408 def __init__(self, model, decay=0.9999, tau=2000, updates=0):
409 # Create EMA
410 self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA
411 self.updates = updates # number of EMA updates
412 self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs)
413 for p in self.ema.parameters():
414 p.requires_grad_(False)
415
416 def update(self, model):
417 # Update EMA parameters
418 self.updates += 1
419 d = self.decay(self.updates)
420
421 msd = de_parallel(model).state_dict() # model state_dict
422 for k, v in self.ema.state_dict().items():
423 if v.dtype.is_floating_point: # true for FP16 and FP32
424 v *= d
425 v += (1 - d) * msd[k].detach()
426 # assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32'
427
428 def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
429 # Update EMA attributes
430 copy_attr(self.ema, model, include, exclude)
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """ Utils to interact with the Triton Inference Server
3 """
4
5 import typing
6 from urllib.parse import urlparse
7
8 import torch
9
10
11 class TritonRemoteModel:
12 """ A wrapper over a model served by the Triton Inference Server. It can
13 be configured to communicate over GRPC or HTTP. It accepts Torch Tensors
14 as input and returns them as outputs.
15 """
16
17 def __init__(self, url: str):
18 """
19 Keyword arguments:
20 url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000
21 """
22
23 parsed_url = urlparse(url)
24 if parsed_url.scheme == "grpc":
25 from tritonclient.grpc import InferenceServerClient, InferInput
26
27 self.client = InferenceServerClient(parsed_url.netloc) # Triton GRPC client
28 model_repository = self.client.get_model_repository_index()
29 self.model_name = model_repository.models[0].name
30 self.metadata = self.client.get_model_metadata(self.model_name, as_json=True)
31
32 def create_input_placeholders() -> typing.List[InferInput]:
33 return [
34 InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
35
36 else:
37 from tritonclient.http import InferenceServerClient, InferInput
38
39 self.client = InferenceServerClient(parsed_url.netloc) # Triton HTTP client
40 model_repository = self.client.get_model_repository_index()
41 self.model_name = model_repository[0]['name']
42 self.metadata = self.client.get_model_metadata(self.model_name)
43
44 def create_input_placeholders() -> typing.List[InferInput]:
45 return [
46 InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
47
48 self._create_input_placeholders_fn = create_input_placeholders
49
50 @property
51 def runtime(self):
52 """Returns the model runtime"""
53 return self.metadata.get("backend", self.metadata.get("platform"))
54
55 def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]:
56 """ Invokes the model. Parameters can be provided via args or kwargs.
57 args, if provided, are assumed to match the order of inputs of the model.
58 kwargs are matched with the model input names.
59 """
60 inputs = self._create_inputs(*args, **kwargs)
61 response = self.client.infer(model_name=self.model_name, inputs=inputs)
62 result = []
63 for output in self.metadata['outputs']:
64 tensor = torch.as_tensor(response.as_numpy(output['name']))
65 result.append(tensor)
66 return result[0] if len(result) == 1 else result
67
68 def _create_inputs(self, *args, **kwargs):
69 args_len, kwargs_len = len(args), len(kwargs)
70 if not args_len and not kwargs_len:
71 raise RuntimeError("No inputs provided.")
72 if args_len and kwargs_len:
73 raise RuntimeError("Cannot specify args and kwargs at the same time")
74
75 placeholders = self._create_input_placeholders_fn()
76 if args_len:
77 if args_len != len(placeholders):
78 raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.")
79 for input, value in zip(placeholders, args):
80 input.set_data_from_numpy(value.cpu().numpy())
81 else:
82 for input in placeholders:
83 value = kwargs[input.name]
84 input.set_data_from_numpy(value.cpu().numpy())
85 return placeholders
1 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2 """
3 Validate a trained YOLOv5 detection model on a detection dataset
4
5 Usage:
6 $ python val.py --weights yolov5s.pt --data coco128.yaml --img 640
7
8 Usage - formats:
9 $ python val.py --weights yolov5s.pt # PyTorch
10 yolov5s.torchscript # TorchScript
11 yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
12 yolov5s.xml # OpenVINO
13 yolov5s.engine # TensorRT
14 yolov5s.mlmodel # CoreML (macOS-only)
15 yolov5s_saved_model # TensorFlow SavedModel
16 yolov5s.pb # TensorFlow GraphDef
17 yolov5s.tflite # TensorFlow Lite
18 yolov5s_edgetpu.tflite # TensorFlow Edge TPU
19 yolov5s_paddle_model # PaddlePaddle
20 """
21
22 import argparse
23 import json
24 import os
25 import sys
26 from pathlib import Path
27
28 import numpy as np
29 import torch
30 from tqdm import tqdm
31
32 FILE = Path(__file__).resolve()
33 ROOT = FILE.parents[0] # YOLOv5 root directory
34 if str(ROOT) not in sys.path:
35 sys.path.append(str(ROOT)) # add ROOT to PATH
36 ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
37
38 from models.common import DetectMultiBackend
39 from utils.callbacks import Callbacks
40 from utils.dataloaders import create_dataloader
41 from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml,
42 coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
43 scale_boxes, xywh2xyxy, xyxy2xywh)
44 from utils.metrics import ConfusionMatrix, ap_per_class, box_iou
45 from utils.plots import output_to_target, plot_images, plot_val_study
46 from utils.torch_utils import select_device, smart_inference_mode
47
48
49 def save_one_txt(predn, save_conf, shape, file):
50 # Save one txt result
51 gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh
52 for *xyxy, conf, cls in predn.tolist():
53 xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
54 line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
55 with open(file, 'a') as f:
56 f.write(('%g ' * len(line)).rstrip() % line + '\n')
57
58
59 def save_one_json(predn, jdict, path, class_map):
60 # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
61 image_id = int(path.stem) if path.stem.isnumeric() else path.stem
62 box = xyxy2xywh(predn[:, :4]) # xywh
63 box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
64 for p, b in zip(predn.tolist(), box.tolist()):
65 jdict.append({
66 'image_id': image_id,
67 'category_id': class_map[int(p[5])],
68 'bbox': [round(x, 3) for x in b],
69 'score': round(p[4], 5)})
70
71
72 def process_batch(detections, labels, iouv):
73 """
74 Return correct prediction matrix
75 Arguments:
76 detections (array[N, 6]), x1, y1, x2, y2, conf, class
77 labels (array[M, 5]), class, x1, y1, x2, y2
78 Returns:
79 correct (array[N, 10]), for 10 IoU levels
80 """
81 correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
82 iou = box_iou(labels[:, 1:], detections[:, :4])
83 correct_class = labels[:, 0:1] == detections[:, 5]
84 for i in range(len(iouv)):
85 x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match
86 if x[0].shape[0]:
87 matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou]
88 if x[0].shape[0] > 1:
89 matches = matches[matches[:, 2].argsort()[::-1]]
90 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
91 # matches = matches[matches[:, 2].argsort()[::-1]]
92 matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
93 correct[matches[:, 1].astype(int), i] = True
94 return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
95
96
97 @smart_inference_mode()
98 def run(
99 data,
100 weights=None, # model.pt path(s)
101 batch_size=32, # batch size
102 imgsz=640, # inference size (pixels)
103 conf_thres=0.001, # confidence threshold
104 iou_thres=0.6, # NMS IoU threshold
105 max_det=300, # maximum detections per image
106 task='val', # train, val, test, speed or study
107 device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
108 workers=8, # max dataloader workers (per RANK in DDP mode)
109 single_cls=False, # treat as single-class dataset
110 augment=False, # augmented inference
111 verbose=False, # verbose output
112 save_txt=False, # save results to *.txt
113 save_hybrid=False, # save label+prediction hybrid results to *.txt
114 save_conf=False, # save confidences in --save-txt labels
115 save_json=False, # save a COCO-JSON results file
116 project=ROOT / 'runs/val', # save to project/name
117 name='exp', # save to project/name
118 exist_ok=False, # existing project/name ok, do not increment
119 half=True, # use FP16 half-precision inference
120 dnn=False, # use OpenCV DNN for ONNX inference
121 model=None,
122 dataloader=None,
123 save_dir=Path(''),
124 plots=True,
125 callbacks=Callbacks(),
126 compute_loss=None,
127 ):
128 # Initialize/load model and set device
129 training = model is not None
130 if training: # called by train.py
131 device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model
132 half &= device.type != 'cpu' # half precision only supported on CUDA
133 model.half() if half else model.float()
134 else: # called directly
135 device = select_device(device, batch_size=batch_size)
136
137 # Directories
138 save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
139 (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
140
141 # Load model
142 model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
143 stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
144 imgsz = check_img_size(imgsz, s=stride) # check image size
145 half = model.fp16 # FP16 supported on limited backends with CUDA
146 if engine:
147 batch_size = model.batch_size
148 else:
149 device = model.device
150 if not (pt or jit):
151 batch_size = 1 # export.py models default to batch-size 1
152 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
153
154 # Data
155 data = check_dataset(data) # check
156
157 # Configure
158 model.eval()
159 cuda = device.type != 'cpu'
160 is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset
161 nc = 1 if single_cls else int(data['nc']) # number of classes
162 iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95
163 niou = iouv.numel()
164
165 # Dataloader
166 if not training:
167 if pt and not single_cls: # check --weights are trained on --data
168 ncm = model.model.nc
169 assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \
170 f'classes). Pass correct combination of --weights and --data that are trained together.'
171 model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup
172 pad, rect = (0.0, False) if task == 'speed' else (0.5, pt) # square inference for benchmarks
173 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images
174 dataloader = create_dataloader(data[task],
175 imgsz,
176 batch_size,
177 stride,
178 single_cls,
179 pad=pad,
180 rect=rect,
181 workers=workers,
182 prefix=colorstr(f'{task}: '))[0]
183
184 seen = 0
185 confusion_matrix = ConfusionMatrix(nc=nc)
186 names = model.names if hasattr(model, 'names') else model.module.names # get class names
187 if isinstance(names, (list, tuple)): # old format
188 names = dict(enumerate(names))
189 class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
190 s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95')
191 tp, fp, p, r, f1, mp, mr, map50, ap50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
192 dt = Profile(), Profile(), Profile() # profiling times
193 loss = torch.zeros(3, device=device)
194 jdict, stats, ap, ap_class = [], [], [], []
195 callbacks.run('on_val_start')
196 pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
197 for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
198 callbacks.run('on_val_batch_start')
199 with dt[0]:
200 if cuda:
201 im = im.to(device, non_blocking=True)
202 targets = targets.to(device)
203 im = im.half() if half else im.float() # uint8 to fp16/32
204 im /= 255 # 0 - 255 to 0.0 - 1.0
205 nb, _, height, width = im.shape # batch size, channels, height, width
206
207 # Inference
208 with dt[1]:
209 preds, train_out = model(im) if compute_loss else (model(im, augment=augment), None)
210
211 # Loss
212 if compute_loss:
213 loss += compute_loss(train_out, targets)[1] # box, obj, cls
214
215 # NMS
216 targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels
217 lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
218 with dt[2]:
219 preds = non_max_suppression(preds,
220 conf_thres,
221 iou_thres,
222 labels=lb,
223 multi_label=True,
224 agnostic=single_cls,
225 max_det=max_det)
226
227 # Metrics
228 for si, pred in enumerate(preds):
229 labels = targets[targets[:, 0] == si, 1:]
230 nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions
231 path, shape = Path(paths[si]), shapes[si][0]
232 correct = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init
233 seen += 1
234
235 if npr == 0:
236 if nl:
237 stats.append((correct, *torch.zeros((2, 0), device=device), labels[:, 0]))
238 if plots:
239 confusion_matrix.process_batch(detections=None, labels=labels[:, 0])
240 continue
241
242 # Predictions
243 if single_cls:
244 pred[:, 5] = 0
245 predn = pred.clone()
246 scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred
247
248 # Evaluate
249 if nl:
250 tbox = xywh2xyxy(labels[:, 1:5]) # target boxes
251 scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels
252 labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels
253 correct = process_batch(predn, labelsn, iouv)
254 if plots:
255 confusion_matrix.process_batch(predn, labelsn)
256 stats.append((correct, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls)
257
258 # Save/log
259 if save_txt:
260 save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
261 if save_json:
262 save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary
263 callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
264
265 # Plot images
266 if plots and batch_i < 3:
267 plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels
268 plot_images(im, output_to_target(preds), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred
269
270 callbacks.run('on_val_batch_end', batch_i, im, targets, paths, shapes, preds)
271
272 # Compute metrics
273 stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy
274 if len(stats) and stats[0].any():
275 tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
276 ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95
277 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
278 nt = np.bincount(stats[3].astype(int), minlength=nc) # number of targets per class
279
280 # Print results
281 pf = '%22s' + '%11i' * 2 + '%11.3g' * 4 # print format
282 LOGGER.info(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
283 if nt.sum() == 0:
284 LOGGER.warning(f'WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels')
285
286 # Print results per class
287 if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
288 for i, c in enumerate(ap_class):
289 LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
290
291 # Print speeds
292 t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
293 if not training:
294 shape = (batch_size, 3, imgsz, imgsz)
295 LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)
296
297 # Plots
298 if plots:
299 confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
300 callbacks.run('on_val_end', nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
301
302 # Save JSON
303 if save_json and len(jdict):
304 w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights
305 anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json
306 pred_json = str(save_dir / f"{w}_predictions.json") # predictions json
307 LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...')
308 with open(pred_json, 'w') as f:
309 json.dump(jdict, f)
310
311 try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
312 check_requirements('pycocotools')
313 from pycocotools.coco import COCO
314 from pycocotools.cocoeval import COCOeval
315
316 anno = COCO(anno_json) # init annotations api
317 pred = anno.loadRes(pred_json) # init predictions api
318 eval = COCOeval(anno, pred, 'bbox')
319 if is_coco:
320 eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate
321 eval.evaluate()
322 eval.accumulate()
323 eval.summarize()
324 map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5)
325 except Exception as e:
326 LOGGER.info(f'pycocotools unable to run: {e}')
327
328 # Return results
329 model.float() # for training
330 if not training:
331 s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
332 LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
333 maps = np.zeros(nc) + map
334 for i, c in enumerate(ap_class):
335 maps[c] = ap[i]
336 return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
337
338
339 def parse_opt():
340 parser = argparse.ArgumentParser()
341 parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
342 parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
343 parser.add_argument('--batch-size', type=int, default=32, help='batch size')
344 parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
345 parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
346 parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
347 parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image')
348 parser.add_argument('--task', default='val', help='train, val, test, speed or study')
349 parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
350 parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
351 parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
352 parser.add_argument('--augment', action='store_true', help='augmented inference')
353 parser.add_argument('--verbose', action='store_true', help='report mAP by class')
354 parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
355 parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')
356 parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
357 parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file')
358 parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name')
359 parser.add_argument('--name', default='exp', help='save to project/name')
360 parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
361 parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
362 parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
363 opt = parser.parse_args()
364 opt.data = check_yaml(opt.data) # check YAML
365 opt.save_json |= opt.data.endswith('coco.yaml')
366 opt.save_txt |= opt.save_hybrid
367 print_args(vars(opt))
368 return opt
369
370
371 def main(opt):
372 check_requirements(exclude=('tensorboard', 'thop'))
373
374 if opt.task in ('train', 'val', 'test'): # run normally
375 if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466
376 LOGGER.info(f'WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results')
377 if opt.save_hybrid:
378 LOGGER.info('WARNING ⚠️ --save-hybrid will return high mAP from hybrid labels, not from predictions alone')
379 run(**vars(opt))
380
381 else:
382 weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
383 opt.half = True # FP16 for fastest results
384 if opt.task == 'speed': # speed benchmarks
385 # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
386 opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
387 for opt.weights in weights:
388 run(**vars(opt), plots=False)
389
390 elif opt.task == 'study': # speed vs mAP benchmarks
391 # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt...
392 for opt.weights in weights:
393 f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt' # filename to save to
394 x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis
395 for opt.imgsz in x: # img-size
396 LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...')
397 r, _, t = run(**vars(opt), plots=False)
398 y.append(r + t) # results and times
399 np.savetxt(f, y, fmt='%10.4g') # save
400 os.system('zip -r study.zip study_*.txt')
401 plot_val_study(x=x) # plot
402
403
404 if __name__ == "__main__":
405 opt = parse_opt()
406 main(opt)
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!