位置: IT常识 - 正文

手把手YOLOv5输出热力图(yolov5输出参数)

编辑:rootadmin
手把手YOLOv5输出热力图 环境要求

推荐整理分享手把手YOLOv5输出热力图(yolov5输出参数),希望有所帮助,仅作参考,欢迎阅读内容。

文章相关热门搜索词:yolov5输入,yolov2输出,yolov4的输出,yolov3输出是什么,yolov1输出,yolov4的输出,yolov1输出,yolov5 output,内容如对您有帮助,希望把文章链接给更多的朋友!

我的版本是YOLOV5 7.0

先看结果:

结果仅供参考

具体步骤一:

首先配置好YOLO V5环境 这个采用pip install requirements即可 具体配置环境可以看我其他的博客有详细介绍 GPU环境自己配置

步骤二:手把手YOLOv5输出热力图(yolov5输出参数)

运行YOLO 没问题,输出结果:

步骤三

在项目文件夹下添加main_gradcam.py文件 main_gradcam.py

import osimport randomimport timeimport argparseimport numpy as npfrom models.gradcam import YOLOV5GradCAM, YOLOV5GradCAMPPfrom models.yolov5_object_detector import YOLOV5TorchObjectDetectorimport cv2# 数据集类别名names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # class names# yolov5s网络中的三个detect层target_layers = ['model_17_cv3_act', 'model_20_cv3_act', 'model_23_cv3_act']# Argumentsparser = argparse.ArgumentParser()parser.add_argument('--model-path', type=str, default="yolov5s.pt", help='Path to the model')parser.add_argument('--img-path', type=str, default='data/images/bus.jpg', help='input image path')parser.add_argument('--output-dir', type=str, default='runs/result17', help='output dir')parser.add_argument('--img-size', type=int, default=640, help="input image size")parser.add_argument('--target-layer', type=str, default='model_17_cv3_act', help='The layer hierarchical address to which gradcam will applied,' ' the names should be separated by underline')parser.add_argument('--method', type=str, default='gradcam', help='gradcam method')parser.add_argument('--device', type=str, default='cuda', help='cuda or cpu')parser.add_argument('--no_text_box', action='store_true', help='do not show label and box on the heatmap')args = parser.parse_args()def get_res_img(bbox, mask, res_img): mask = mask.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy().astype( np.uint8) heatmap = cv2.applyColorMap(mask, cv2.COLORMAP_JET) # n_heatmat = (Box.fill_outer_box(heatmap, bbox) / 255).astype(np.float32) n_heatmat = (heatmap / 255).astype(np.float32) res_img = res_img / 255 res_img = cv2.add(res_img, n_heatmat) res_img = (res_img / res_img.max()) return res_img, n_heatmatdef plot_one_box(x, img, color=None, label=None, line_thickness=3): # this is a bug in cv2. It does not put box on a converted image from torch unless it's buffered and read again! cv2.imwrite('temp.jpg', (img * 255).astype(np.uint8)) img = cv2.imread('temp.jpg') # Plots one bounding box on image img tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness color = color or [random.randint(0, 255) for _ in range(3)] c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) if label: tf = max(tl - 1, 1) # font thickness t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] outside = c1[1] - t_size[1] - 3 >= 0 # label fits outside box up c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 if outside else c1[1] + t_size[1] + 3 outsize_right = c2[0] - img.shape[:2][1] > 0 # label fits outside box right c1 = c1[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c1[0], c1[1] c2 = c2[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c2[0], c2[1] cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled cv2.putText(img, label, (c1[0], c1[1] - 2 if outside else c2[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) return img# 检测单个图片def main(img_path): colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] device = args.device input_size = (args.img_size, args.img_size) # 读入图片 img = cv2.imread(img_path) # 读取图像格式:BGR print('[INFO] Loading the model') # 实例化YOLOv5模型,得到检测结果 model = YOLOV5TorchObjectDetector(args.model_path, device, img_size=input_size, names=names) # img[..., ::-1]: BGR --> RGB # (480, 640, 3) --> (1, 3, 480, 640) torch_img = model.preprocessing(img[..., ::-1]) tic = time.time() # 遍历三层检测层 for target_layer in target_layers: # 获取grad-cam方法 if args.method == 'gradcam': saliency_method = YOLOV5GradCAM(model=model, layer_name=target_layer, img_size=input_size) elif args.method == 'gradcampp': saliency_method = YOLOV5GradCAMPP(model=model, layer_name=target_layer, img_size=input_size) masks, logits, [boxes, _, class_names, conf] = saliency_method(torch_img) # 得到预测结果 result = torch_img.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy() result = result[..., ::-1] # convert to bgr # 保存设置 imgae_name = os.path.basename(img_path) # 获取图片名 save_path = f'{args.output_dir}{imgae_name[:-4]}/{args.method}' if not os.path.exists(save_path): os.makedirs(save_path) print(f'[INFO] Saving the final image at {save_path}') # 遍历每张图片中的每个目标 for i, mask in enumerate(masks): # 遍历图片中的每个目标 res_img = result.copy() # 获取目标的位置和类别信息 bbox, cls_name = boxes[0][i], class_names[0][i] label = f'{cls_name}{conf[0][i]}' # 类别+置信分数 # 获取目标的热力图 res_img, heat_map = get_res_img(bbox, mask, res_img) res_img = plot_one_box(bbox, res_img, label=label, color=colors[int(names.index(cls_name))], line_thickness=3) # 缩放到原图片大小 res_img = cv2.resize(res_img, dsize=(img.shape[:-1][::-1])) output_path = f'{save_path}/{target_layer[6:8]}_{i}.jpg' cv2.imwrite(output_path, res_img) print(f'{target_layer[6:8]}_{i}.jpg done!!') print(f'Total time : {round(time.time() - tic, 4)} s')if __name__ == '__main__': # 图片路径为文件夹 if os.path.isdir(args.img_path): img_list = os.listdir(args.img_path) print(img_list) for item in img_list: # 依次获取文件夹中的图片名,组合成图片的路径 main(os.path.join(args.img_path, item)) # 单个图片 else: main(args.img_path)步骤四

在model文件夹下添加如下两个py文件,分别是gradcam.py和yolov5_object_detector.py gradcam.py代码如下:

import timeimport torchimport torch.nn.functional as Fdef find_yolo_layer(model, layer_name): """Find yolov5 layer to calculate GradCAM and GradCAM++ Args: model: yolov5 model. layer_name (str): the name of layer with its hierarchical information. Return: target_layer: found layer """ hierarchy = layer_name.split('_') target_layer = model.model._modules[hierarchy[0]] for h in hierarchy[1:]: target_layer = target_layer._modules[h] return target_layerclass YOLOV5GradCAM: # 初始化,得到target_layer层 def __init__(self, model, layer_name, img_size=(640, 640)): self.model = model self.gradients = dict() self.activations = dict() def backward_hook(module, grad_input, grad_output): self.gradients['value'] = grad_output[0] return None def forward_hook(module, input, output): self.activations['value'] = output return None target_layer = find_yolo_layer(self.model, layer_name) # 获取forward过程中每层的输入和输出,用于对比hook是不是正确记录 target_layer.register_forward_hook(forward_hook) target_layer.register_full_backward_hook(backward_hook) device = 'cuda' if next(self.model.model.parameters()).is_cuda else 'cpu' self.model(torch.zeros(1, 3, *img_size, device=device)) def forward(self, input_img, class_idx=True): """ Args: input_img: input image with shape of (1, 3, H, W) Return: mask: saliency map of the same spatial dimension with input logit: model output preds: The object predictions """ saliency_maps = [] b, c, h, w = input_img.size() preds, logits = self.model(input_img) for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]): if class_idx: score = logit[cls] else: score = logit.max() self.model.zero_grad() tic = time.time() # 获取梯度 score.backward(retain_graph=True) print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds') gradients = self.gradients['value'] activations = self.activations['value'] b, k, u, v = gradients.size() alpha = gradients.view(b, k, -1).mean(2) weights = alpha.view(b, k, 1, 1) saliency_map = (weights * activations).sum(1, keepdim=True) saliency_map = F.relu(saliency_map) saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False) saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max() saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data saliency_maps.append(saliency_map) return saliency_maps, logits, preds def __call__(self, input_img): return self.forward(input_img)class YOLOV5GradCAMPP(YOLOV5GradCAM): def __init__(self, model, layer_name, img_size=(640, 640)): super(YOLOV5GradCAMPP, self).__init__(model, layer_name, img_size) def forward(self, input_img, class_idx=True): saliency_maps = [] b, c, h, w = input_img.size() tic = time.time() preds, logits = self.model(input_img) print("[INFO] model-forward took: ", round(time.time() - tic, 4), 'seconds') for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]): if class_idx: score = logit[cls] else: score = logit.max() self.model.zero_grad() tic = time.time() # 获取梯度 score.backward(retain_graph=True) print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds') gradients = self.gradients['value'] # dS/dA activations = self.activations['value'] # A b, k, u, v = gradients.size() alpha_num = gradients.pow(2) alpha_denom = gradients.pow(2).mul(2) + \ activations.mul(gradients.pow(3)).view(b, k, u * v).sum(-1, keepdim=True).view(b, k, 1, 1) # torch.where(condition, x, y) condition是条件,满足条件就返回x,不满足就返回y alpha_denom = torch.where(alpha_denom != 0.0, alpha_denom, torch.ones_like(alpha_denom)) alpha = alpha_num.div(alpha_denom + 1e-7) positive_gradients = F.relu(score.exp() * gradients) # ReLU(dY/dA) == ReLU(exp(S)*dS/dA)) weights = (alpha * positive_gradients).view(b, k, u * v).sum(-1).view(b, k, 1, 1) saliency_map = (weights * activations).sum(1, keepdim=True) saliency_map = F.relu(saliency_map) saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False) saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max() saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data saliency_maps.append(saliency_map) return saliency_maps, logits, preds

yolov5_object_detector.py的代码如下:

import numpy as npimport torchfrom models.experimental import attempt_loadfrom utils.general import xywh2xyxyfrom utils.dataloaders import letterboximport cv2import timeimport torchvisionimport torch.nn as nnfrom utils.metrics import box_iouclass YOLOV5TorchObjectDetector(nn.Module): def __init__(self, model_weight, device, img_size, names=None, mode='eval', confidence=0.45, iou_thresh=0.45, agnostic_nms=False): super(YOLOV5TorchObjectDetector, self).__init__() self.device = device self.model = None self.img_size = img_size self.mode = mode self.confidence = confidence self.iou_thresh = iou_thresh self.agnostic = agnostic_nms self.model = attempt_load(model_weight, inplace=False, fuse=False) self.model.requires_grad_(True) self.model.to(device) if self.mode == 'train': self.model.train() else: self.model.eval() # fetch the names if names is None: self.names = ['your dataset classname'] else: self.names = names # preventing cold start img = torch.zeros((1, 3, *self.img_size), device=device) self.model(img) @staticmethod def non_max_suppression(prediction, logits, conf_thres=0.3, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, labels=(), max_det=300): """Runs Non-Maximum Suppression (NMS) on inference and logits results Returns: list of detections, on (n,6) tensor per image [xyxy, conf, cls] and pruned input logits (n, number-classes) """ nc = prediction.shape[2] - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Checks assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS t = time.time() output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] logits_output = [torch.zeros((0, nc), device=logits.device)] * logits.shape[0] # logits_output = [torch.zeros((0, 80), device=logits.device)] * logits.shape[0] for xi, (x, log_) in enumerate(zip(prediction, logits)): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence log_ = log_[xc[xi]] # Cat apriori labels if autolabelling if labels and len(labels[xi]): l = labels[xi] v = torch.zeros((len(l), nc + 5), device=x.device) v[:, :4] = l[:, 1:5] # box v[:, 4] = 1.0 # conf v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls x = torch.cat((x, v), 0) # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] log_ = log_[conf.view(-1) > conf_thres] # Filter by class if classes is not None: x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] # Check shape n = x.shape[0] # number of boxes if not n: # no boxes continue elif n > max_nms: # excess boxes x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy output[xi] = x[i] logits_output[xi] = log_[i] assert log_[i].shape[0] == x[i].shape[0] if (time.time() - t) > time_limit: print(f'WARNING: NMS time limit {time_limit}s exceeded') break # time limit exceeded return output, logits_output @staticmethod def yolo_resize(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True): return letterbox(img, new_shape=new_shape, color=color, auto=auto, scaleFill=scaleFill, scaleup=scaleup) def forward(self, img): prediction, logits, _ = self.model(img, augment=False) prediction, logits = self.non_max_suppression(prediction, logits, self.confidence, self.iou_thresh, classes=None, agnostic=self.agnostic) self.boxes, self.class_names, self.classes, self.confidences = [[[] for _ in range(img.shape[0])] for _ in range(4)] for i, det in enumerate(prediction): # detections per image if len(det): for *xyxy, conf, cls in det: # 返回整数 bbox = [int(b) for b in xyxy] self.boxes[i].append(bbox) self.confidences[i].append(round(conf.item(), 2)) cls = int(cls.item()) self.classes[i].append(cls) if self.names is not None: self.class_names[i].append(self.names[cls]) else: self.class_names[i].append(cls) return [self.boxes, self.classes, self.class_names, self.confidences], logits def preprocessing(self, img): if len(img.shape) != 4: img = np.expand_dims(img, axis=0) im0 = img.astype(np.uint8) img = np.array([self.yolo_resize(im, new_shape=self.img_size)[0] for im in im0]) img = img.transpose((0, 3, 1, 2)) img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img / 255.0 return img步骤五

更改model/yolo.py

具体而言 Detect类中的forward函数

def forward(self, x): z = [] # inference output logits_ = [] # 修改---1 for i in range(self.nl): x[i] = self.m[i](x[i]) # conv bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) logits = x[i][..., 5:] # 修改---2 if isinstance(self, Segment): # (boxes + masks) xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4) xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, conf.sigmoid(), mask), 4) else: # Detect (boxes only) xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4) xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, conf), 4) z.append(y.view(bs, self.na * nx * ny, self.no)) logits_.append(logits.view(bs, -1, self.no - 5)) # 修改---3 # return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) return x if self.training else (torch.cat(z, 1), torch.cat(logits_, 1), x) # 修改---4

为了防止大家不知道怎么修改yolo.py文件,我将修改后的yolo.py文件放在下方 yolo.py

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""YOLO-specific modulesUsage: $ python models/yolo.py --cfg yolov5s.yaml"""import argparseimport contextlibimport osimport platformimport sysfrom copy import deepcopyfrom pathlib import PathFILE = Path(__file__).resolve()ROOT = FILE.parents[1] # YOLOv5 root directoryif str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATHif platform.system() != 'Windows': ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relativefrom models.common import *from models.experimental import *from utils.autoanchor import check_anchor_orderfrom utils.general import LOGGER, check_version, check_yaml, make_divisible, print_argsfrom utils.plots import feature_visualizationfrom utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, time_sync)try: import thop # for FLOPs computationexcept ImportError: thop = Noneclass Detect(nn.Module): # YOLOv5 Detect head for detection models stride = None # strides computed during build dynamic = False # force grid reconstruction export = False # export mode def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer super().__init__() self.nc = nc # number of classes self.no = nc + 5 # number of outputs per anchor self.nl = len(anchors) # number of detection layers self.na = len(anchors[0]) // 2 # number of anchors self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2) self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv self.inplace = inplace # use inplace ops (e.g. slice assignment) def forward(self, x): z = [] # inference output logits_ = [] # 修改---1 for i in range(self.nl): x[i] = self.m[i](x[i]) # conv bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) logits = x[i][..., 5:] # 修改---2 if isinstance(self, Segment): # (boxes + masks) xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4) xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, conf.sigmoid(), mask), 4) else: # Detect (boxes only) xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4) xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, conf), 4) z.append(y.view(bs, self.na * nx * ny, self.no)) logits_.append(logits.view(bs, -1, self.no - 5)) # 修改---3 # return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) return x if self.training else (torch.cat(z, 1), torch.cat(logits_, 1), x) # 修改---4 def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, '1.10.0')): d = self.anchors[i].device t = self.anchors[i].dtype shape = 1, self.na, ny, nx, 2 # grid shape y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t) yv, xv = torch.meshgrid(y, x, indexing='ij') if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5 anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape) return grid, anchor_gridclass Segment(Detect): # YOLOv5 Segment head for segmentation models def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True): super().__init__(nc, anchors, ch, inplace) self.nm = nm # number of masks self.npr = npr # number of protos self.no = 5 + nc + self.nm # number of outputs per anchor self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv self.proto = Proto(ch[0], self.npr, self.nm) # protos self.detect = Detect.forward def forward(self, x): p = self.proto(x[0]) x = self.detect(self, x) return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])class BaseModel(nn.Module): # YOLOv5 base model def forward(self, x, profile=False, visualize=False): return self._forward_once(x, profile, visualize) # single-scale inference, train def _forward_once(self, x, profile=False, visualize=False): y, dt = [], [] # outputs for m in self.model: if m.f != -1: # if not from previous layer x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers if profile: self._profile_one_layer(m, x, dt) x = m(x) # run y.append(x if m.i in self.save else None) # save output if visualize: feature_visualization(x, m.type, m.i, save_dir=visualize) return x def _profile_one_layer(self, m, x, dt): c = m == self.model[-1] # is final layer, copy input as inplace fix o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs t = time_sync() for _ in range(10): m(x.copy() if c else x) dt.append((time_sync() - t) * 100) if m == self.model[0]: LOGGER.info(f"{'time (ms)':>10s}{'GFLOPs':>10s}{'params':>10s} module") LOGGER.info(f'{dt[-1]:10.2f}{o:10.2f}{m.np:10.0f}{m.type}') if c: LOGGER.info(f"{sum(dt):10.2f}{'-':>10s}{'-':>10s} Total") def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers LOGGER.info('Fusing layers... ') for m in self.model.modules(): if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv delattr(m, 'bn') # remove batchnorm m.forward = m.forward_fuse # update forward self.info() return self def info(self, verbose=False, img_size=640): # print model information model_info(self, verbose, img_size) def _apply(self, fn): # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers self = super()._apply(fn) m = self.model[-1] # Detect() if isinstance(m, (Detect, Segment)): m.stride = fn(m.stride) m.grid = list(map(fn, m.grid)) if isinstance(m.anchor_grid, list): m.anchor_grid = list(map(fn, m.anchor_grid)) return selfclass DetectionModel(BaseModel): # YOLOv5 detection model def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes super().__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg, encoding='ascii', errors='ignore') as f: self.yaml = yaml.safe_load(f) # model dict # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml['nc']: LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value if anchors: LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}') self.yaml['anchors'] = round(anchors) # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist self.names = [str(i) for i in range(self.yaml['nc'])] # default names self.inplace = self.yaml.get('inplace', True) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, (Detect, Segment)): s = 256 # 2x min stride m.inplace = self.inplace forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x) m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward check_anchor_order(m) m.anchors /= m.stride.view(-1, 1, 1) self.stride = m.stride self._initialize_biases() # only run once # Init weights, biases initialize_weights(self) self.info() LOGGER.info('') def forward(self, x, augment=False, profile=False, visualize=False): if augment: return self._forward_augment(x) # augmented inference, None return self._forward_once(x, profile, visualize) # single-scale inference, train def _forward_augment(self, x): img_size = x.shape[-2:] # height, width s = [1, 0.83, 0.67] # scales f = [None, 3, None] # flips (2-ud, 3-lr) y = [] # outputs for si, fi in zip(s, f): xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) yi = self._forward_once(xi)[0] # forward # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save yi = self._descale_pred(yi, fi, si, img_size) y.append(yi) y = self._clip_augmented(y) # clip augmented tails return torch.cat(y, 1), None # augmented inference, train def _descale_pred(self, p, flips, scale, img_size): # de-scale predictions following augmented inference (inverse operation) if self.inplace: p[..., :4] /= scale # de-scale if flips == 2: p[..., 1] = img_size[0] - p[..., 1] # de-flip ud elif flips == 3: p[..., 0] = img_size[1] - p[..., 0] # de-flip lr else: x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale if flips == 2: y = img_size[0] - y # de-flip ud elif flips == 3: x = img_size[1] - x # de-flip lr p = torch.cat((x, y, wh, p[..., 4:]), -1) return p def _clip_augmented(self, y): # Clip YOLOv5 augmented inference tails nl = self.model[-1].nl # number of detection layers (P3-P5) g = sum(4 ** x for x in range(nl)) # grid points e = 1 # exclude layer count i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices y[0] = y[0][:, :-i] # large i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices y[-1] = y[-1][:, i:] # small return y def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency # https://arxiv.org/abs/1708.02002 section 3.3 # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. m = self.model[-1] # Detect() module for mi, s in zip(m.m, m.stride): # from b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibilityclass SegmentationModel(DetectionModel): # YOLOv5 segmentation model def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None): super().__init__(cfg, ch, nc, anchors)class ClassificationModel(BaseModel): # YOLOv5 classification model def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index super().__init__() self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg) def _from_detection_model(self, model, nc=1000, cutoff=10): # Create a YOLOv5 classification model from a YOLOv5 detection model if isinstance(model, DetectMultiBackend): model = model.model # unwrap DetectMultiBackend model.model = model.model[:cutoff] # backbone m = model.model[-1] # last layer ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module c = Classify(ch, nc) # Classify() c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type model.model[-1] = c # replace self.model = model.model self.stride = model.stride self.save = [] self.nc = nc def _from_yaml(self, cfg): # Create a YOLOv5 classification model from a *.yaml file self.model = Nonedef parse_model(d, ch): # model_dict, input_channels(3) # Parse a YOLOv5 model.yaml dictionary LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}{'module':<40}{'arguments':<30}") anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation') if act: Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() LOGGER.info(f"{colorstr('activation:')}{act}") # print na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors no = na * (nc + 5) # number of outputs = anchors * (classes + 5) layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args m = eval(m) if isinstance(m, str) else m # eval strings for j, a in enumerate(args): with contextlib.suppress(NameError): args[j] = eval(a) if isinstance(a, str) else a # eval strings n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain if m in { Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}: c1, c2 = ch[f], args[0] if c2 != no: # if not output c2 = make_divisible(c2 * gw, 8) args = [c1, c2, *args[1:]] if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}: args.insert(2, n) # number of repeats n = 1 elif m is nn.BatchNorm2d: args = [ch[f]] elif m is Concat: c2 = sum(ch[x] for x in f) # TODO: channel, gw, gd elif m in {Detect, Segment}: args.append([ch[x] for x in f]) if isinstance(args[1], int): # number of anchors args[1] = [list(range(args[1] * 2))] * len(f) if m is Segment: args[3] = make_divisible(args[3] * gw, 8) elif m is Contract: c2 = ch[f] * args[0] ** 2 elif m is Expand: c2 = ch[f] // args[0] ** 2 else: c2 = ch[f] m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module t = str(m)[8:-2].replace('__main__.', '') # module type np = sum(x.numel() for x in m_.parameters()) # number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f}{t:<40}{str(args):<30}') # print save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist layers.append(m_) if i == 0: ch = [] ch.append(c2) return nn.Sequential(*layers), sorted(save)if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--profile', action='store_true', help='profile model speed') parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer') parser.add_argument('--test', action='store_true', help='test all yolo*.yaml') opt = parser.parse_args() opt.cfg = check_yaml(opt.cfg) # check YAML print_args(vars(opt)) device = select_device(opt.device) # Create model im = torch.rand(opt.batch_size, 3, 640, 640).to(device) model = Model(opt.cfg).to(device) # Options if opt.line_profile: # profile layer by layer model(im, profile=True) elif opt.profile: # profile forward-backward results = profile(input=im, ops=[model], n=3) elif opt.test: # test all models for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'): try: _ = Model(cfg) except Exception as e: print(f'Error in {cfg}: {e}') else: # report fused model summary model.fuse()步骤六:

运行main_gradcam.py 参数列表可以自己进行修改。

# Argumentsparser = argparse.ArgumentParser()parser.add_argument('--model-path', type=str, default="yolov5s.pt", help='Path to the model')parser.add_argument('--img-path', type=str, default='data/images/bus.jpg', help='input image path')parser.add_argument('--output-dir', type=str, default='runs/result17', help='output dir')parser.add_argument('--img-size', type=int, default=640, help="input image size")parser.add_argument('--target-layer', type=str, default='model_17_cv3_act', help='The layer hierarchical address to which gradcam will applied,' ' the names should be separated by underline')parser.add_argument('--method', type=str, default='gradcam', help='gradcam method')parser.add_argument('--device', type=str, default='cuda', help='cuda or cpu')parser.add_argument('--no_text_box', action='store_true', help='do not show label and box on the heatmap')args = parser.parse_args()完成

本文链接地址:https://www.jiuchutong.com/zhishi/294515.html 转载请保留说明!

上一篇:前端FileReader对象实现图片file文件转base64(前端file对象)

下一篇:Echarts地图的基本使用方法(echarts地图参数设置)

  • 拼多多消息出现在桌面怎么取消(拼多多消息出现在桌面怎么取消华为)

    拼多多消息出现在桌面怎么取消(拼多多消息出现在桌面怎么取消华为)

  • 华为nova6的指纹在哪里(华为nova6的指纹锁在哪里)

    华为nova6的指纹在哪里(华为nova6的指纹锁在哪里)

  • 快手如何换全黑色主题(快手如何换全黑头像)

    快手如何换全黑色主题(快手如何换全黑头像)

  • 为什么抖音只能发59秒吗(为什么抖音只能发15秒视频)

    为什么抖音只能发59秒吗(为什么抖音只能发15秒视频)

  • 朋友圈三张图片并排是怎么发的 (朋友圈三张图片并排是怎么发的?)

    朋友圈三张图片并排是怎么发的 (朋友圈三张图片并排是怎么发的?)

  • 抖音怎么让人看不到在线时间(抖音怎么让人看不到点赞的人)

    抖音怎么让人看不到在线时间(抖音怎么让人看不到点赞的人)

  • 淘宝红包卡券在哪(淘宝红包卷在哪领)

    淘宝红包卡券在哪(淘宝红包卷在哪领)

  • gtx770什么水平

    gtx770什么水平

  • 手机抢单慢的原因(手机抢单原理)

    手机抢单慢的原因(手机抢单原理)

  • oppok3微信视频没有美颜是吗(oppo微信视频来了没反应)

    oppok3微信视频没有美颜是吗(oppo微信视频来了没反应)

  • oppoa11截屏怎么截屏(oppo a11截图)

    oppoa11截屏怎么截屏(oppo a11截图)

  • 电脑显示文件损坏无法启动(电脑显示文件损坏然后蓝屏怎么处理)

    电脑显示文件损坏无法启动(电脑显示文件损坏然后蓝屏怎么处理)

  • 淘宝发货途中不想要了(淘宝发货途中不想要了拦截快递)

    淘宝发货途中不想要了(淘宝发货途中不想要了拦截快递)

  • 手机qq怎么群发消息给多人(手机QQ怎么群发邮箱)

    手机qq怎么群发消息给多人(手机QQ怎么群发邮箱)

  • 抖音不用手机号可以注册吗(抖音不用手机号怎么注册新账号)

    抖音不用手机号可以注册吗(抖音不用手机号怎么注册新账号)

  • 微信需要横屏模式吗(微信横屏模式在哪里)

    微信需要横屏模式吗(微信横屏模式在哪里)

  • 手机root后有什么坏处(手机root后有什么好处)

    手机root后有什么坏处(手机root后有什么好处)

  • 黑莓手机怎么下载app(黑莓手机怎么下载qq音乐)

    黑莓手机怎么下载app(黑莓手机怎么下载qq音乐)

  • 怎样复制粘贴快手昵称(怎样复制粘贴快捷指令)

    怎样复制粘贴快手昵称(怎样复制粘贴快捷指令)

  • 电脑版微信怎么开视频(电脑版微信怎么发朋友圈)

    电脑版微信怎么开视频(电脑版微信怎么发朋友圈)

  • 快狗打车可以提供发票吗(快狗打车提现什么时候到账)

    快狗打车可以提供发票吗(快狗打车提现什么时候到账)

  • 一加7pro是哪个公司(一加7pro哪个版本最流畅)

    一加7pro是哪个公司(一加7pro哪个版本最流畅)

  • 怎么剪歌(怎么剪歌曲拼接在一起)

    怎么剪歌(怎么剪歌曲拼接在一起)

  • 滴滴司机驾龄要求(滴滴驾驶员驾龄要求)

    滴滴司机驾龄要求(滴滴驾驶员驾龄要求)

  • Win10修改编辑hosts文件无法保存怎么办(windows 修改host)

    Win10修改编辑hosts文件无法保存怎么办(windows 修改host)

  • 没有独立显卡没有NVIDIA 如何安装pytorch(没有独立显卡没有核显能开机吗)

    没有独立显卡没有NVIDIA 如何安装pytorch(没有独立显卡没有核显能开机吗)

  • 财税2012年15号文件作废
  • 缴纳城镇土地使用税标准
  • 房地产企业递延所得税资产计算方法
  • 金税四期可以查个人账户吗
  • 国税地税电子钥匙价格
  • 发票专用章和财务章区别
  • 甲方给的工程奖项有哪些
  • 讲师讲课费标准2019
  • 未达起征点可以开专票吗
  • 卷票发票校验码在哪里
  • 暂估一直不来票,没有纳税调整怎么办
  • 应付票据和应付账款有什么区别
  • 进口化妆品消费税率是多少
  • 销售废旧物品的账务处理
  • 公司厨房用品专用发票怎么处理?
  • 营改增后预存话费可以报销吗?
  • 产品成本计算的公式
  • 代扣代缴个人所得税会计分录怎么做
  • 小规模季度超过30万是全额纳税吗
  • 全年一次性奖金税收优惠政策
  • 回单是会计还是出纳职责
  • 工会经费人数在哪里查询
  • 应收账款减值准备和坏账准备的区别
  • 增值税的计征方法有哪些
  • 外汇实收资本如何做账
  • 销售折扣账务怎么处理
  • 1697509666
  • 苹果中国区副总裁
  • spf13-vim – Vim编辑器终极发布
  • win7系统右键菜单太多怎么办
  • win10如何设置右键
  • 一接通电源就自动关机
  • 财政拨付专项资金进什么会计科目
  • 科普笔记本电脑怎样
  • PHP:ignore_user_abort()的用法_misc函数
  • linux动态扩容
  • 固定资产清理损失可以税前扣除吗
  • laravel dump
  • ICCV, ECCV, CVPR,IEEE的关系
  • 对个别报表中处置收益的归属期间进行调整
  • 收到银行开具的手续费的专票会计分录
  • vue element ui
  • 应收票据确认坏账怎么处理
  • 网上蛋糕商城jsp页面
  • window cuda
  • vue组件入门
  • tar 压缩命令tar
  • linux ar命令
  • 股权转让 减资
  • 进项税额转出忘记申报咋办
  • 未分配利润可以弥补亏损吗
  • 售后服务期多久
  • 企业领用产品的会计分录
  • 同一控制下的控股合并中,投资方
  • 资金信息综合服务
  • 物资采购是
  • 运输服务和运输费有什么区别
  • 公司提前扣员工保险合法吗
  • 车票抵扣税款
  • 应收账款怎么记录
  • 企业建账流程图
  • xp系统要求
  • 手动防止Ping攻击方法(无需防火墙)
  • rundll32找不到文件
  • win8系统怎么还原出厂设置
  • 系统自动保存文件在哪里
  • Bullet(Cocos2dx)之优化PhysicsDraw3D
  • cocos2d开发app
  • unity5.x游戏开发指南
  • javascript教程完整版
  • 关于批处理的说法错误的是
  • unity unity3d
  • unity3d spine
  • 农商银行股权分红如何计算
  • cfca证书下载流程
  • 公司完税证明去哪里打印
  • 一般纳税人买车可以抵扣进项吗
  • 云南省国家税务局电子税务局官网
  • 一个人可以申请
  • 为什么企业减少注册资金的原因
  • 免责声明:网站部分图片文字素材来源于网络,如有侵权,请及时告知,我们会第一时间删除,谢谢! 邮箱:opceo@qq.com

    鄂ICP备2023003026号

    网站地图: 企业信息 工商信息 财税知识 网络常识 编程技术

    友情链接: 武汉网站建设