diff --git a/.infer.py.swp b/.infer.py.swp new file mode 100644 index 000000000..025782281 Binary files /dev/null and b/.infer.py.swp differ diff --git a/README.md b/README.md index 0256fac99..dd2d2fc5a 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,9 @@ A [PyTorch](http://pytorch.org/) implementation of [Single Shot MultiBox Detecto +### OK!!!! + this is my own branch + ### Table of Contents - Installation - Datasets diff --git a/data/1.jpg b/data/1.jpg new file mode 100644 index 000000000..fd3c47c57 Binary files /dev/null and b/data/1.jpg differ diff --git a/data/2.jpg b/data/2.jpg new file mode 100644 index 000000000..cace078d8 Binary files /dev/null and b/data/2.jpg differ diff --git a/data/coco.py b/data/coco.py index 765531761..4b331c905 100644 --- a/data/coco.py +++ b/data/coco.py @@ -29,7 +29,6 @@ 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') - def get_label_map(label_file): label_map = {} labels = open(label_file, 'r') @@ -83,7 +82,7 @@ class COCODetection(data.Dataset): in the target (bbox) and transforms it. """ - def __init__(self, root, image_set='trainval35k', transform=None, + def __init__(self, root, image_set='train2014', transform=None, target_transform=COCOAnnotationTransform(), dataset_name='MS COCO'): sys.path.append(osp.join(root, COCO_API)) from pycocotools.coco import COCO diff --git a/data/config.py b/data/config.py index 8999622cc..e16659c7f 100644 --- a/data/config.py +++ b/data/config.py @@ -27,7 +27,7 @@ } coco = { - 'num_classes': 201, + 'num_classes': 81, 'lr_steps': (280000, 360000, 400000), 'max_iter': 400000, 'feature_maps': [38, 19, 10, 5, 3, 1], diff --git a/debug.sh b/debug.sh new file mode 100644 index 000000000..1d72104ef --- /dev/null +++ b/debug.sh @@ -0,0 +1 @@ +vim layers/functions/detection.py diff --git a/infer.py b/infer.py new file mode 100644 index 000000000..35ea2649b --- /dev/null +++ b/infer.py @@ -0,0 +1,31 @@ +#do the infer + +import torch +import cv2 +from ssd import build_ssd + +num_classes = 81 +image = cv2.imread("data/1.jpg") +weights = "weights/ssd300_COCO_10000.pth" + +#cv2.imshow("fafda", image) +#cv2.waitKey() + +#def infer() +def get_features_hook(self, input, output): + print("hooks ", output.data.cpu().numpy().shape) + +if __name__ == '__main__': + net = build_ssd('test', 300, num_classes) + image = cv2.resize(image, (300, 300)) + image = torch.Tensor(image) + image = image.permute(2, 0, 1) + image = image.unsqueeze(0) +#load weights to the net + net.load_state_dict(torch.load(weights)) + output = net(image) + print(output.shape) +#get the specific layer value + +# print(net) + diff --git a/layers/box_utils.py b/layers/box_utils.py index 84214947b..9b75e9aeb 100644 --- a/layers/box_utils.py +++ b/layers/box_utils.py @@ -172,68 +172,68 @@ def log_sum_exp(x): # Original author: Francisco Massa: # https://github.com/fmassa/object-detection.torch # Ported to PyTorch by Max deGroot (02/01/2017) -def nms(boxes, scores, overlap=0.5, top_k=200): - """Apply non-maximum suppression at test time to avoid detecting too many - overlapping bounding boxes for a given object. - Args: - boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. - scores: (tensor) The class predscores for the img, Shape:[num_priors]. - overlap: (float) The overlap thresh for suppressing unnecessary boxes. - top_k: (int) The Maximum number of box preds to consider. - Return: - The indices of the kept boxes with respect to num_priors. - """ - - keep = scores.new(scores.size(0)).zero_().long() - if boxes.numel() == 0: - return keep - x1 = boxes[:, 0] - y1 = boxes[:, 1] - x2 = boxes[:, 2] - y2 = boxes[:, 3] - area = torch.mul(x2 - x1, y2 - y1) - v, idx = scores.sort(0) # sort in ascending order - # I = I[v >= 0.01] - idx = idx[-top_k:] # indices of the top-k largest vals - xx1 = boxes.new() - yy1 = boxes.new() - xx2 = boxes.new() - yy2 = boxes.new() - w = boxes.new() - h = boxes.new() - - # keep = torch.Tensor() - count = 0 - while idx.numel() > 0: - i = idx[-1] # index of current largest val - # keep.append(i) - keep[count] = i - count += 1 - if idx.size(0) == 1: - break - idx = idx[:-1] # remove kept element from view - # load bboxes of next highest vals - torch.index_select(x1, 0, idx, out=xx1) - torch.index_select(y1, 0, idx, out=yy1) - torch.index_select(x2, 0, idx, out=xx2) - torch.index_select(y2, 0, idx, out=yy2) - # store element-wise max with next highest score - xx1 = torch.clamp(xx1, min=x1[i]) - yy1 = torch.clamp(yy1, min=y1[i]) - xx2 = torch.clamp(xx2, max=x2[i]) - yy2 = torch.clamp(yy2, max=y2[i]) - w.resize_as_(xx2) - h.resize_as_(yy2) - w = xx2 - xx1 - h = yy2 - yy1 - # check sizes of xx1 and xx2.. after each iteration - w = torch.clamp(w, min=0.0) - h = torch.clamp(h, min=0.0) - inter = w*h - # IoU = i / (area(a) + area(b) - i) - rem_areas = torch.index_select(area, 0, idx) # load remaining areas) - union = (rem_areas - inter) + area[i] - IoU = inter/union # store result in iou - # keep only elements with an IoU <= overlap - idx = idx[IoU.le(overlap)] - return keep, count +#def nms(boxes, scores, overlap=0.5, top_k=200): +# """Apply non-maximum suppression at test time to avoid detecting too many +# overlapping bounding boxes for a given object. +# Args: +# boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. +# scores: (tensor) The class predscores for the img, Shape:[num_priors]. +# overlap: (float) The overlap thresh for suppressing unnecessary boxes. +# top_k: (int) The Maximum number of box preds to consider. +# Return: +# The indices of the kept boxes with respect to num_priors. +# """ +# +# keep = scores.new(scores.size(0)).zero_().long() +# if boxes.numel() == 0: +# return keep +# x1 = boxes[:, 0] +# y1 = boxes[:, 1] +# x2 = boxes[:, 2] +# y2 = boxes[:, 3] +# area = torch.mul(x2 - x1, y2 - y1) +# v, idx = scores.sort(0) # sort in ascending order +# # I = I[v >= 0.01] +# idx = idx[-top_k:] # indices of the top-k largest vals +# xx1 = boxes.new() +# yy1 = boxes.new() +# xx2 = boxes.new() +# yy2 = boxes.new() +# w = boxes.new() +# h = boxes.new() +# +# # keep = torch.Tensor() +# count = 0 +# while idx.numel() > 0: +# i = idx[-1] # index of current largest val +# # keep.append(i) +# keep[count] = i +# count += 1 +# if idx.size(0) == 1: +# break +# idx = idx[:-1] # remove kept element from view +# # load bboxes of next highest vals +# torch.index_select(x1, 0, idx, out=xx1) +# torch.index_select(y1, 0, idx, out=yy1) +# torch.index_select(x2, 0, idx, out=xx2) +# torch.index_select(y2, 0, idx, out=yy2) +# # store element-wise max with next highest score +# xx1 = torch.clamp(xx1, min=x1[i]) +# yy1 = torch.clamp(yy1, min=y1[i]) +# xx2 = torch.clamp(xx2, max=x2[i]) +# yy2 = torch.clamp(yy2, max=y2[i]) +# w.resize_as_(xx2) +# h.resize_as_(yy2) +# w = xx2 - xx1 +# h = yy2 - yy1 +# # check sizes of xx1 and xx2.. after each iteration +# w = torch.clamp(w, min=0.0) +# h = torch.clamp(h, min=0.0) +# inter = w*h +# # IoU = i / (area(a) + area(b) - i) +# rem_areas = torch.index_select(area, 0, idx) # load remaining areas) +# union = (rem_areas - inter) + area[i] +# IoU = inter/union # store result in iou +# # keep only elements with an IoU <= overlap +# idx = idx[IoU.le(overlap)] +# return keep, count diff --git a/layers/functions/1 b/layers/functions/1 new file mode 100644 index 000000000..553c2ae9b --- /dev/null +++ b/layers/functions/1 @@ -0,0 +1,4 @@ +#to do + + + diff --git a/layers/functions/detection.py b/layers/functions/detection.py index 0d1ef8d30..50421a844 100644 --- a/layers/functions/detection.py +++ b/layers/functions/detection.py @@ -1,8 +1,25 @@ import torch from torch.autograd import Function -from ..box_utils import decode, nms +from ..box_utils import decode from data import voc as cfg +class paper_box(object): + def __init__(self, index, x, y, box): + self.index = index + self.x = x + self.y = y + self.box = box +def box_iou(a, b): + if a.box[2] < b.box[0] or a.box[0] > b.box[2]: + return 0 + if a.box[1] > b.box[3] or a.box[3] < b.box[1]: + return 0 + width = min(a.box[2], b.box[2]) - max(a.box[0], b.box[0]) + height = min(a.box[3], b.box[3]) - max(a.box[1], a.box[1]) + iou = width * height + a_area = (a.box[2] - a.box[0]) * (a.box[3] - a.box[1]) + b_area = (b.box[2] - b.box[0]) * (b.box[3] - b.box[1]) + return (iou / (a_area + b_area - iou)) class Detect(Function): """At test time, Detect is the final layer of SSD. Decode location preds, @@ -36,27 +53,43 @@ def forward(self, loc_data, conf_data, prior_data): output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) + #next we will specific the exact layer and its output + #we get the all predicted boxes and its confidence + decoded_boxes = decode(loc_data[0], prior_data, self.variance) + conf_data = conf_data[0] + loc_data = loc_data[0] + all_boxes = torch.cat((decoded_boxes, conf_data), 1) +# for i in range(self.num_classes): +# index = [] +# for j in range(len(loc_data)): +# index.append(j) +# #in the specific class, we will do something specifical +# for j in range(len(loc_data)): +# for k in range(len(loc_data) - j): +# if conf_data[j][i] < conf_data[k][i]: +# index[j] = + return all_boxes # Decode predictions into bboxes. - for i in range(num): - decoded_boxes = decode(loc_data[i], prior_data, self.variance) - # For each class, perform nms - conf_scores = conf_preds[i].clone() - - for cl in range(1, self.num_classes): - c_mask = conf_scores[cl].gt(self.conf_thresh) - scores = conf_scores[cl][c_mask] - if scores.dim() == 0: - continue - l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) - boxes = decoded_boxes[l_mask].view(-1, 4) - # idx of highest scoring and non-overlapping boxes per class - ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) - output[i, cl, :count] = \ - torch.cat((scores[ids[:count]].unsqueeze(1), - boxes[ids[:count]]), 1) - flt = output.contiguous().view(num, -1, 5) - _, idx = flt[:, :, 0].sort(1, descending=True) - _, rank = idx.sort(1) - flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) - return output +# for i in range(num): +# decoded_boxes = decode(loc_data[i], prior_data, self.variance) +# # For each class, perform nms +# conf_scores = conf_preds[i].clone() +# +# for cl in range(1, self.num_classes): +# c_mask = conf_scores[cl].gt(self.conf_thresh) +# scores = conf_scores[cl][c_mask] +# if scores.size(0) == 0: +# continue +# l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) +# boxes = decoded_boxes[l_mask].view(-1, 4) +# # idx of highest scoring and non-overlapping boxes per class +# ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) +# output[i, cl, :count] = \ +# torch.cat((scores[ids[:count]].unsqueeze(1), +# boxes[ids[:count]]), 1) +# flt = output.contiguous().view(num, -1, 5) +# _, idx = flt[:, :, 0].sort(1, descending=True) +# _, rank = idx.sort(1) +# flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) +# return output diff --git a/layers/functions/nms.py b/layers/functions/nms.py new file mode 100644 index 000000000..057602440 --- /dev/null +++ b/layers/functions/nms.py @@ -0,0 +1,7 @@ +#to do + +def nms(all_boxes): + return all_boxes + + + diff --git "a/layers/functions/\357\274\201" "b/layers/functions/\357\274\201" new file mode 100644 index 000000000..6ee5bf89e --- /dev/null +++ "b/layers/functions/\357\274\201" @@ -0,0 +1,7 @@ +#to do + +def nms: + return 0 + + + diff --git a/layers/modules/multibox_loss.py b/layers/modules/multibox_loss.py index fb49cf439..8acff30f7 100644 --- a/layers/modules/multibox_loss.py +++ b/layers/modules/multibox_loss.py @@ -94,8 +94,8 @@ def forward(self, predictions, targets): loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining - loss_c[pos] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) + loss_c[pos] = 0 # filter out pos boxes for now _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) @@ -111,7 +111,9 @@ def forward(self, predictions, targets): # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N - N = num_pos.data.sum() + N = num_pos.data.sum().double() + loss_l = loss_l.double() + loss_c = loss_c.double() loss_l /= N loss_c /= N return loss_l, loss_c diff --git a/train.py b/train.py index 427dd9244..15dee06f6 100644 --- a/train.py +++ b/train.py @@ -150,19 +150,23 @@ def train(): batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): + epoch += 1 update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 - epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data - images, targets = next(batch_iterator) + try: + images, targets = next(batch_iterator) + except StopIteration: + batch_iterator = iter(data_loader) + images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) @@ -180,15 +184,15 @@ def train(): loss.backward() optimizer.step() t1 = time.time() - loc_loss += loss_l.data[0] - conf_loss += loss_c.data[0] + loc_loss += loss_l.data + conf_loss += loss_c.data if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) - print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ') + print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data), end=' ') if args.visdom: - update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], + update_vis_plot(iteration, loss_l.data, loss_c.data, iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: