位置: IT常识 - 正文

手把手YOLOv5输出热力图(yolov5输出参数)

编辑:rootadmin
手把手YOLOv5输出热力图 环境要求

推荐整理分享手把手YOLOv5输出热力图(yolov5输出参数),希望有所帮助,仅作参考,欢迎阅读内容。

文章相关热门搜索词:yolov5输入,yolov2输出,yolov4的输出,yolov3输出是什么,yolov1输出,yolov4的输出,yolov1输出,yolov5 output,内容如对您有帮助,希望把文章链接给更多的朋友!

我的版本是YOLOV5 7.0

先看结果:

结果仅供参考

具体步骤一:

首先配置好YOLO V5环境 这个采用pip install requirements即可 具体配置环境可以看我其他的博客有详细介绍 GPU环境自己配置

步骤二:手把手YOLOv5输出热力图(yolov5输出参数)

运行YOLO 没问题,输出结果:

步骤三

在项目文件夹下添加main_gradcam.py文件 main_gradcam.py

import osimport randomimport timeimport argparseimport numpy as npfrom models.gradcam import YOLOV5GradCAM, YOLOV5GradCAMPPfrom models.yolov5_object_detector import YOLOV5TorchObjectDetectorimport cv2# 数据集类别名names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # class names# yolov5s网络中的三个detect层target_layers = ['model_17_cv3_act', 'model_20_cv3_act', 'model_23_cv3_act']# Argumentsparser = argparse.ArgumentParser()parser.add_argument('--model-path', type=str, default="yolov5s.pt", help='Path to the model')parser.add_argument('--img-path', type=str, default='data/images/bus.jpg', help='input image path')parser.add_argument('--output-dir', type=str, default='runs/result17', help='output dir')parser.add_argument('--img-size', type=int, default=640, help="input image size")parser.add_argument('--target-layer', type=str, default='model_17_cv3_act', help='The layer hierarchical address to which gradcam will applied,' ' the names should be separated by underline')parser.add_argument('--method', type=str, default='gradcam', help='gradcam method')parser.add_argument('--device', type=str, default='cuda', help='cuda or cpu')parser.add_argument('--no_text_box', action='store_true', help='do not show label and box on the heatmap')args = parser.parse_args()def get_res_img(bbox, mask, res_img): mask = mask.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy().astype( np.uint8) heatmap = cv2.applyColorMap(mask, cv2.COLORMAP_JET) # n_heatmat = (Box.fill_outer_box(heatmap, bbox) / 255).astype(np.float32) n_heatmat = (heatmap / 255).astype(np.float32) res_img = res_img / 255 res_img = cv2.add(res_img, n_heatmat) res_img = (res_img / res_img.max()) return res_img, n_heatmatdef plot_one_box(x, img, color=None, label=None, line_thickness=3): # this is a bug in cv2. It does not put box on a converted image from torch unless it's buffered and read again! cv2.imwrite('temp.jpg', (img * 255).astype(np.uint8)) img = cv2.imread('temp.jpg') # Plots one bounding box on image img tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness color = color or [random.randint(0, 255) for _ in range(3)] c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) if label: tf = max(tl - 1, 1) # font thickness t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] outside = c1[1] - t_size[1] - 3 >= 0 # label fits outside box up c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 if outside else c1[1] + t_size[1] + 3 outsize_right = c2[0] - img.shape[:2][1] > 0 # label fits outside box right c1 = c1[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c1[0], c1[1] c2 = c2[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c2[0], c2[1] cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled cv2.putText(img, label, (c1[0], c1[1] - 2 if outside else c2[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) return img# 检测单个图片def main(img_path): colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] device = args.device input_size = (args.img_size, args.img_size) # 读入图片 img = cv2.imread(img_path) # 读取图像格式:BGR print('[INFO] Loading the model') # 实例化YOLOv5模型,得到检测结果 model = YOLOV5TorchObjectDetector(args.model_path, device, img_size=input_size, names=names) # img[..., ::-1]: BGR --> RGB # (480, 640, 3) --> (1, 3, 480, 640) torch_img = model.preprocessing(img[..., ::-1]) tic = time.time() # 遍历三层检测层 for target_layer in target_layers: # 获取grad-cam方法 if args.method == 'gradcam': saliency_method = YOLOV5GradCAM(model=model, layer_name=target_layer, img_size=input_size) elif args.method == 'gradcampp': saliency_method = YOLOV5GradCAMPP(model=model, layer_name=target_layer, img_size=input_size) masks, logits, [boxes, _, class_names, conf] = saliency_method(torch_img) # 得到预测结果 result = torch_img.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy() result = result[..., ::-1] # convert to bgr # 保存设置 imgae_name = os.path.basename(img_path) # 获取图片名 save_path = f'{args.output_dir}{imgae_name[:-4]}/{args.method}' if not os.path.exists(save_path): os.makedirs(save_path) print(f'[INFO] Saving the final image at {save_path}') # 遍历每张图片中的每个目标 for i, mask in enumerate(masks): # 遍历图片中的每个目标 res_img = result.copy() # 获取目标的位置和类别信息 bbox, cls_name = boxes[0][i], class_names[0][i] label = f'{cls_name}{conf[0][i]}' # 类别+置信分数 # 获取目标的热力图 res_img, heat_map = get_res_img(bbox, mask, res_img) res_img = plot_one_box(bbox, res_img, label=label, color=colors[int(names.index(cls_name))], line_thickness=3) # 缩放到原图片大小 res_img = cv2.resize(res_img, dsize=(img.shape[:-1][::-1])) output_path = f'{save_path}/{target_layer[6:8]}_{i}.jpg' cv2.imwrite(output_path, res_img) print(f'{target_layer[6:8]}_{i}.jpg done!!') print(f'Total time : {round(time.time() - tic, 4)} s')if __name__ == '__main__': # 图片路径为文件夹 if os.path.isdir(args.img_path): img_list = os.listdir(args.img_path) print(img_list) for item in img_list: # 依次获取文件夹中的图片名,组合成图片的路径 main(os.path.join(args.img_path, item)) # 单个图片 else: main(args.img_path)步骤四

在model文件夹下添加如下两个py文件,分别是gradcam.py和yolov5_object_detector.py gradcam.py代码如下:

import timeimport torchimport torch.nn.functional as Fdef find_yolo_layer(model, layer_name): """Find yolov5 layer to calculate GradCAM and GradCAM++ Args: model: yolov5 model. layer_name (str): the name of layer with its hierarchical information. Return: target_layer: found layer """ hierarchy = layer_name.split('_') target_layer = model.model._modules[hierarchy[0]] for h in hierarchy[1:]: target_layer = target_layer._modules[h] return target_layerclass YOLOV5GradCAM: # 初始化,得到target_layer层 def __init__(self, model, layer_name, img_size=(640, 640)): self.model = model self.gradients = dict() self.activations = dict() def backward_hook(module, grad_input, grad_output): self.gradients['value'] = grad_output[0] return None def forward_hook(module, input, output): self.activations['value'] = output return None target_layer = find_yolo_layer(self.model, layer_name) # 获取forward过程中每层的输入和输出,用于对比hook是不是正确记录 target_layer.register_forward_hook(forward_hook) target_layer.register_full_backward_hook(backward_hook) device = 'cuda' if next(self.model.model.parameters()).is_cuda else 'cpu' self.model(torch.zeros(1, 3, *img_size, device=device)) def forward(self, input_img, class_idx=True): """ Args: input_img: input image with shape of (1, 3, H, W) Return: mask: saliency map of the same spatial dimension with input logit: model output preds: The object predictions """ saliency_maps = [] b, c, h, w = input_img.size() preds, logits = self.model(input_img) for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]): if class_idx: score = logit[cls] else: score = logit.max() self.model.zero_grad() tic = time.time() # 获取梯度 score.backward(retain_graph=True) print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds') gradients = self.gradients['value'] activations = self.activations['value'] b, k, u, v = gradients.size() alpha = gradients.view(b, k, -1).mean(2) weights = alpha.view(b, k, 1, 1) saliency_map = (weights * activations).sum(1, keepdim=True) saliency_map = F.relu(saliency_map) saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False) saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max() saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data saliency_maps.append(saliency_map) return saliency_maps, logits, preds def __call__(self, input_img): return self.forward(input_img)class YOLOV5GradCAMPP(YOLOV5GradCAM): def __init__(self, model, layer_name, img_size=(640, 640)): super(YOLOV5GradCAMPP, self).__init__(model, layer_name, img_size) def forward(self, input_img, class_idx=True): saliency_maps = [] b, c, h, w = input_img.size() tic = time.time() preds, logits = self.model(input_img) print("[INFO] model-forward took: ", round(time.time() - tic, 4), 'seconds') for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]): if class_idx: score = logit[cls] else: score = logit.max() self.model.zero_grad() tic = time.time() # 获取梯度 score.backward(retain_graph=True) print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds') gradients = self.gradients['value'] # dS/dA activations = self.activations['value'] # A b, k, u, v = gradients.size() alpha_num = gradients.pow(2) alpha_denom = gradients.pow(2).mul(2) + \ activations.mul(gradients.pow(3)).view(b, k, u * v).sum(-1, keepdim=True).view(b, k, 1, 1) # torch.where(condition, x, y) condition是条件,满足条件就返回x,不满足就返回y alpha_denom = torch.where(alpha_denom != 0.0, alpha_denom, torch.ones_like(alpha_denom)) alpha = alpha_num.div(alpha_denom + 1e-7) positive_gradients = F.relu(score.exp() * gradients) # ReLU(dY/dA) == ReLU(exp(S)*dS/dA)) weights = (alpha * positive_gradients).view(b, k, u * v).sum(-1).view(b, k, 1, 1) saliency_map = (weights * activations).sum(1, keepdim=True) saliency_map = F.relu(saliency_map) saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False) saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max() saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data saliency_maps.append(saliency_map) return saliency_maps, logits, preds

yolov5_object_detector.py的代码如下:

import numpy as npimport torchfrom models.experimental import attempt_loadfrom utils.general import xywh2xyxyfrom utils.dataloaders import letterboximport cv2import timeimport torchvisionimport torch.nn as nnfrom utils.metrics import box_iouclass YOLOV5TorchObjectDetector(nn.Module): def __init__(self, model_weight, device, img_size, names=None, mode='eval', confidence=0.45, iou_thresh=0.45, agnostic_nms=False): super(YOLOV5TorchObjectDetector, self).__init__() self.device = device self.model = None self.img_size = img_size self.mode = mode self.confidence = confidence self.iou_thresh = iou_thresh self.agnostic = agnostic_nms self.model = attempt_load(model_weight, inplace=False, fuse=False) self.model.requires_grad_(True) self.model.to(device) if self.mode == 'train': self.model.train() else: self.model.eval() # fetch the names if names is None: self.names = ['your dataset classname'] else: self.names = names # preventing cold start img = torch.zeros((1, 3, *self.img_size), device=device) self.model(img) @staticmethod def non_max_suppression(prediction, logits, conf_thres=0.3, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, labels=(), max_det=300): """Runs Non-Maximum Suppression (NMS) on inference and logits results Returns: list of detections, on (n,6) tensor per image [xyxy, conf, cls] and pruned input logits (n, number-classes) """ nc = prediction.shape[2] - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Checks assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS t = time.time() output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] logits_output = [torch.zeros((0, nc), device=logits.device)] * logits.shape[0] # logits_output = [torch.zeros((0, 80), device=logits.device)] * logits.shape[0] for xi, (x, log_) in enumerate(zip(prediction, logits)): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence log_ = log_[xc[xi]] # Cat apriori labels if autolabelling if labels and len(labels[xi]): l = labels[xi] v = torch.zeros((len(l), nc + 5), device=x.device) v[:, :4] = l[:, 1:5] # box v[:, 4] = 1.0 # conf v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls x = torch.cat((x, v), 0) # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] log_ = log_[conf.view(-1) > conf_thres] # Filter by class if classes is not None: x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] # Check shape n = x.shape[0] # number of boxes if not n: # no boxes continue elif n > max_nms: # excess boxes x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy output[xi] = x[i] logits_output[xi] = log_[i] assert log_[i].shape[0] == x[i].shape[0] if (time.time() - t) > time_limit: print(f'WARNING: NMS time limit {time_limit}s exceeded') break # time limit exceeded return output, logits_output @staticmethod def yolo_resize(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True): return letterbox(img, new_shape=new_shape, color=color, auto=auto, scaleFill=scaleFill, scaleup=scaleup) def forward(self, img): prediction, logits, _ = self.model(img, augment=False) prediction, logits = self.non_max_suppression(prediction, logits, self.confidence, self.iou_thresh, classes=None, agnostic=self.agnostic) self.boxes, self.class_names, self.classes, self.confidences = [[[] for _ in range(img.shape[0])] for _ in range(4)] for i, det in enumerate(prediction): # detections per image if len(det): for *xyxy, conf, cls in det: # 返回整数 bbox = [int(b) for b in xyxy] self.boxes[i].append(bbox) self.confidences[i].append(round(conf.item(), 2)) cls = int(cls.item()) self.classes[i].append(cls) if self.names is not None: self.class_names[i].append(self.names[cls]) else: self.class_names[i].append(cls) return [self.boxes, self.classes, self.class_names, self.confidences], logits def preprocessing(self, img): if len(img.shape) != 4: img = np.expand_dims(img, axis=0) im0 = img.astype(np.uint8) img = np.array([self.yolo_resize(im, new_shape=self.img_size)[0] for im in im0]) img = img.transpose((0, 3, 1, 2)) img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img / 255.0 return img步骤五

更改model/yolo.py

具体而言 Detect类中的forward函数

def forward(self, x): z = [] # inference output logits_ = [] # 修改---1 for i in range(self.nl): x[i] = self.m[i](x[i]) # conv bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) logits = x[i][..., 5:] # 修改---2 if isinstance(self, Segment): # (boxes + masks) xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4) xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, conf.sigmoid(), mask), 4) else: # Detect (boxes only) xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4) xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, conf), 4) z.append(y.view(bs, self.na * nx * ny, self.no)) logits_.append(logits.view(bs, -1, self.no - 5)) # 修改---3 # return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) return x if self.training else (torch.cat(z, 1), torch.cat(logits_, 1), x) # 修改---4

为了防止大家不知道怎么修改yolo.py文件,我将修改后的yolo.py文件放在下方 yolo.py

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""YOLO-specific modulesUsage: $ python models/yolo.py --cfg yolov5s.yaml"""import argparseimport contextlibimport osimport platformimport sysfrom copy import deepcopyfrom pathlib import PathFILE = Path(__file__).resolve()ROOT = FILE.parents[1] # YOLOv5 root directoryif str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATHif platform.system() != 'Windows': ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relativefrom models.common import *from models.experimental import *from utils.autoanchor import check_anchor_orderfrom utils.general import LOGGER, check_version, check_yaml, make_divisible, print_argsfrom utils.plots import feature_visualizationfrom utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, time_sync)try: import thop # for FLOPs computationexcept ImportError: thop = Noneclass Detect(nn.Module): # YOLOv5 Detect head for detection models stride = None # strides computed during build dynamic = False # force grid reconstruction export = False # export mode def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer super().__init__() self.nc = nc # number of classes self.no = nc + 5 # number of outputs per anchor self.nl = len(anchors) # number of detection layers self.na = len(anchors[0]) // 2 # number of anchors self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2) self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv self.inplace = inplace # use inplace ops (e.g. slice assignment) def forward(self, x): z = [] # inference output logits_ = [] # 修改---1 for i in range(self.nl): x[i] = self.m[i](x[i]) # conv bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) logits = x[i][..., 5:] # 修改---2 if isinstance(self, Segment): # (boxes + masks) xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4) xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, conf.sigmoid(), mask), 4) else: # Detect (boxes only) xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4) xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, conf), 4) z.append(y.view(bs, self.na * nx * ny, self.no)) logits_.append(logits.view(bs, -1, self.no - 5)) # 修改---3 # return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) return x if self.training else (torch.cat(z, 1), torch.cat(logits_, 1), x) # 修改---4 def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, '1.10.0')): d = self.anchors[i].device t = self.anchors[i].dtype shape = 1, self.na, ny, nx, 2 # grid shape y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t) yv, xv = torch.meshgrid(y, x, indexing='ij') if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5 anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape) return grid, anchor_gridclass Segment(Detect): # YOLOv5 Segment head for segmentation models def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True): super().__init__(nc, anchors, ch, inplace) self.nm = nm # number of masks self.npr = npr # number of protos self.no = 5 + nc + self.nm # number of outputs per anchor self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv self.proto = Proto(ch[0], self.npr, self.nm) # protos self.detect = Detect.forward def forward(self, x): p = self.proto(x[0]) x = self.detect(self, x) return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])class BaseModel(nn.Module): # YOLOv5 base model def forward(self, x, profile=False, visualize=False): return self._forward_once(x, profile, visualize) # single-scale inference, train def _forward_once(self, x, profile=False, visualize=False): y, dt = [], [] # outputs for m in self.model: if m.f != -1: # if not from previous layer x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers if profile: self._profile_one_layer(m, x, dt) x = m(x) # run y.append(x if m.i in self.save else None) # save output if visualize: feature_visualization(x, m.type, m.i, save_dir=visualize) return x def _profile_one_layer(self, m, x, dt): c = m == self.model[-1] # is final layer, copy input as inplace fix o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs t = time_sync() for _ in range(10): m(x.copy() if c else x) dt.append((time_sync() - t) * 100) if m == self.model[0]: LOGGER.info(f"{'time (ms)':>10s}{'GFLOPs':>10s}{'params':>10s} module") LOGGER.info(f'{dt[-1]:10.2f}{o:10.2f}{m.np:10.0f}{m.type}') if c: LOGGER.info(f"{sum(dt):10.2f}{'-':>10s}{'-':>10s} Total") def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers LOGGER.info('Fusing layers... ') for m in self.model.modules(): if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv delattr(m, 'bn') # remove batchnorm m.forward = m.forward_fuse # update forward self.info() return self def info(self, verbose=False, img_size=640): # print model information model_info(self, verbose, img_size) def _apply(self, fn): # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers self = super()._apply(fn) m = self.model[-1] # Detect() if isinstance(m, (Detect, Segment)): m.stride = fn(m.stride) m.grid = list(map(fn, m.grid)) if isinstance(m.anchor_grid, list): m.anchor_grid = list(map(fn, m.anchor_grid)) return selfclass DetectionModel(BaseModel): # YOLOv5 detection model def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes super().__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg, encoding='ascii', errors='ignore') as f: self.yaml = yaml.safe_load(f) # model dict # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml['nc']: LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value if anchors: LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}') self.yaml['anchors'] = round(anchors) # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist self.names = [str(i) for i in range(self.yaml['nc'])] # default names self.inplace = self.yaml.get('inplace', True) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, (Detect, Segment)): s = 256 # 2x min stride m.inplace = self.inplace forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x) m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward check_anchor_order(m) m.anchors /= m.stride.view(-1, 1, 1) self.stride = m.stride self._initialize_biases() # only run once # Init weights, biases initialize_weights(self) self.info() LOGGER.info('') def forward(self, x, augment=False, profile=False, visualize=False): if augment: return self._forward_augment(x) # augmented inference, None return self._forward_once(x, profile, visualize) # single-scale inference, train def _forward_augment(self, x): img_size = x.shape[-2:] # height, width s = [1, 0.83, 0.67] # scales f = [None, 3, None] # flips (2-ud, 3-lr) y = [] # outputs for si, fi in zip(s, f): xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) yi = self._forward_once(xi)[0] # forward # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save yi = self._descale_pred(yi, fi, si, img_size) y.append(yi) y = self._clip_augmented(y) # clip augmented tails return torch.cat(y, 1), None # augmented inference, train def _descale_pred(self, p, flips, scale, img_size): # de-scale predictions following augmented inference (inverse operation) if self.inplace: p[..., :4] /= scale # de-scale if flips == 2: p[..., 1] = img_size[0] - p[..., 1] # de-flip ud elif flips == 3: p[..., 0] = img_size[1] - p[..., 0] # de-flip lr else: x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale if flips == 2: y = img_size[0] - y # de-flip ud elif flips == 3: x = img_size[1] - x # de-flip lr p = torch.cat((x, y, wh, p[..., 4:]), -1) return p def _clip_augmented(self, y): # Clip YOLOv5 augmented inference tails nl = self.model[-1].nl # number of detection layers (P3-P5) g = sum(4 ** x for x in range(nl)) # grid points e = 1 # exclude layer count i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices y[0] = y[0][:, :-i] # large i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices y[-1] = y[-1][:, i:] # small return y def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency # https://arxiv.org/abs/1708.02002 section 3.3 # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. m = self.model[-1] # Detect() module for mi, s in zip(m.m, m.stride): # from b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibilityclass SegmentationModel(DetectionModel): # YOLOv5 segmentation model def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None): super().__init__(cfg, ch, nc, anchors)class ClassificationModel(BaseModel): # YOLOv5 classification model def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index super().__init__() self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg) def _from_detection_model(self, model, nc=1000, cutoff=10): # Create a YOLOv5 classification model from a YOLOv5 detection model if isinstance(model, DetectMultiBackend): model = model.model # unwrap DetectMultiBackend model.model = model.model[:cutoff] # backbone m = model.model[-1] # last layer ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module c = Classify(ch, nc) # Classify() c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type model.model[-1] = c # replace self.model = model.model self.stride = model.stride self.save = [] self.nc = nc def _from_yaml(self, cfg): # Create a YOLOv5 classification model from a *.yaml file self.model = Nonedef parse_model(d, ch): # model_dict, input_channels(3) # Parse a YOLOv5 model.yaml dictionary LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}{'module':<40}{'arguments':<30}") anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation') if act: Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() LOGGER.info(f"{colorstr('activation:')}{act}") # print na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors no = na * (nc + 5) # number of outputs = anchors * (classes + 5) layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args m = eval(m) if isinstance(m, str) else m # eval strings for j, a in enumerate(args): with contextlib.suppress(NameError): args[j] = eval(a) if isinstance(a, str) else a # eval strings n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain if m in { Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}: c1, c2 = ch[f], args[0] if c2 != no: # if not output c2 = make_divisible(c2 * gw, 8) args = [c1, c2, *args[1:]] if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}: args.insert(2, n) # number of repeats n = 1 elif m is nn.BatchNorm2d: args = [ch[f]] elif m is Concat: c2 = sum(ch[x] for x in f) # TODO: channel, gw, gd elif m in {Detect, Segment}: args.append([ch[x] for x in f]) if isinstance(args[1], int): # number of anchors args[1] = [list(range(args[1] * 2))] * len(f) if m is Segment: args[3] = make_divisible(args[3] * gw, 8) elif m is Contract: c2 = ch[f] * args[0] ** 2 elif m is Expand: c2 = ch[f] // args[0] ** 2 else: c2 = ch[f] m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module t = str(m)[8:-2].replace('__main__.', '') # module type np = sum(x.numel() for x in m_.parameters()) # number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f}{t:<40}{str(args):<30}') # print save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist layers.append(m_) if i == 0: ch = [] ch.append(c2) return nn.Sequential(*layers), sorted(save)if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--profile', action='store_true', help='profile model speed') parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer') parser.add_argument('--test', action='store_true', help='test all yolo*.yaml') opt = parser.parse_args() opt.cfg = check_yaml(opt.cfg) # check YAML print_args(vars(opt)) device = select_device(opt.device) # Create model im = torch.rand(opt.batch_size, 3, 640, 640).to(device) model = Model(opt.cfg).to(device) # Options if opt.line_profile: # profile layer by layer model(im, profile=True) elif opt.profile: # profile forward-backward results = profile(input=im, ops=[model], n=3) elif opt.test: # test all models for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'): try: _ = Model(cfg) except Exception as e: print(f'Error in {cfg}: {e}') else: # report fused model summary model.fuse()步骤六:

运行main_gradcam.py 参数列表可以自己进行修改。

# Argumentsparser = argparse.ArgumentParser()parser.add_argument('--model-path', type=str, default="yolov5s.pt", help='Path to the model')parser.add_argument('--img-path', type=str, default='data/images/bus.jpg', help='input image path')parser.add_argument('--output-dir', type=str, default='runs/result17', help='output dir')parser.add_argument('--img-size', type=int, default=640, help="input image size")parser.add_argument('--target-layer', type=str, default='model_17_cv3_act', help='The layer hierarchical address to which gradcam will applied,' ' the names should be separated by underline')parser.add_argument('--method', type=str, default='gradcam', help='gradcam method')parser.add_argument('--device', type=str, default='cuda', help='cuda or cpu')parser.add_argument('--no_text_box', action='store_true', help='do not show label and box on the heatmap')args = parser.parse_args()完成

本文链接地址:https://www.jiuchutong.com/zhishi/294515.html 转载请保留说明!

上一篇:前端FileReader对象实现图片file文件转base64(前端file对象)

下一篇:Echarts地图的基本使用方法(echarts地图参数设置)

  • 一般户付款能开增值税票吗
  • 房产税土地使用税新政策消息2023
  • 网上代增值税专用发票流程
  • 差额征税小规模免征增值税
  • 办公楼租金如何定价
  • 对母公司的其他说法
  • 申报个税需要扣除公积金和社保吗
  • 支付一次性劳务报酬怎么做账
  • 间接持股数怎么计算
  • 小规模购买金税盘分录
  • 短期借款的主要形式
  • 可以税前扣除的商业健康保险有哪些
  • 固定资产提折旧 账务处理
  • 当月报废生产设备一台,原价80万元
  • 转让不动产增值税预缴
  • 工程结算与工程施工如何结转
  • 期末应交企业所得税怎么算
  • 暂估商品年底未入库汇算清缴后处理方法
  • 冲减利润怎么做账
  • 库存半成品属于什么会计科目
  • 事业单位乱报销出纳怎么办?
  • 扣个税必须要交社保吗
  • 生产型企业出口非自产产品能享有退税吗
  • 专票住宿费认证怎么操作
  • 人工费开专票最多能开几个点的
  • 哪些情况下超市销售发霉食品免责
  • 预缴企业所得税研发费用加计扣除
  • 本年已预缴所得税
  • 小企业会计准则科目
  • 中小五金企业如何进行成本核算
  • 职工报销子女医药费
  • windows 11怎么样
  • 小微企业免税的会计分录怎么写
  • 一般纳税人资质证明文件去哪打印
  • 建筑业总产值和营业收入相等吗?
  • 外贸企业出口退税计算
  • 进货返利会计分录
  • php循环语句
  • 无形资产租金计入什么会计科目里
  • 申报个人所得税是按应发工资还是实发工资
  • php随机生成1到3的数字
  • electron-vue官网
  • php preg_match
  • php读取二进制文件
  • 微信支付高速通行费怎么开电子发票
  • command对象可以执行sql语句吗
  • layui框架模板
  • 纸质承兑财务章怎么盖
  • 未缴足注册资本
  • 个体工商户可以开对公账户吗
  • mysql查看创建表代码
  • 金蝶如何新增客户
  • 红字冲销怎么做账
  • 增加固定资产原值
  • 差额征税问题
  • 电子商业承兑会不会拿不到钱
  • 营业外收入如何纳税
  • 业务招待费汇算清缴比例
  • 外派员工的薪酬构成有哪些项目
  • sql多级汇总
  • SQLServer 通用的分区增加和删除的算法
  • 不需要秘密的wifi安全吗
  • prevsrv.exe - prevsrv是什么进程 有什么用
  • win7系统玩暗黑2怎样全屏
  • 膈肌窝里有痦子
  • win7电脑桌面图标间隔太大怎么调
  • win10系统自带的浏览器叫什么
  • windows mobile10
  • unity接入安卓sdk
  • Node.js Sequelize如何实现数据库的读写分离
  • 安卓游戏引擎
  • rpg游戏脚本已经被备份
  • unity dc
  • jquery解析XML及获取XML节点名称的实现代码
  • 国家税务总局的局长什么级别
  • 税务检查的程序
  • 大理市国税局
  • 02112366电子税务局
  • 增值税税控开票软件怎么下载
  • 异辛烷征收消费税2023
  • 免责声明:网站部分图片文字素材来源于网络,如有侵权,请及时告知,我们会第一时间删除,谢谢! 邮箱:opceo@qq.com

    鄂ICP备2023003026号

    网站地图: 企业信息 工商信息 财税知识 网络常识 编程技术

    友情链接: 武汉网站建设