diff --git a/.infer.py.swp b/.infer.py.swp
new file mode 100644
index 000000000..025782281
Binary files /dev/null and b/.infer.py.swp differ
diff --git a/README.md b/README.md
index 0256fac99..dd2d2fc5a 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,9 @@ A [PyTorch](http://pytorch.org/) implementation of [Single Shot MultiBox Detecto
+### OK!!!!
+ this is my own branch
+
### Table of Contents
- Installation
- Datasets
diff --git a/data/1.jpg b/data/1.jpg
new file mode 100644
index 000000000..fd3c47c57
Binary files /dev/null and b/data/1.jpg differ
diff --git a/data/2.jpg b/data/2.jpg
new file mode 100644
index 000000000..cace078d8
Binary files /dev/null and b/data/2.jpg differ
diff --git a/data/coco.py b/data/coco.py
index 765531761..4b331c905 100644
--- a/data/coco.py
+++ b/data/coco.py
@@ -29,7 +29,6 @@
'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush')
-
def get_label_map(label_file):
label_map = {}
labels = open(label_file, 'r')
@@ -83,7 +82,7 @@ class COCODetection(data.Dataset):
in the target (bbox) and transforms it.
"""
- def __init__(self, root, image_set='trainval35k', transform=None,
+ def __init__(self, root, image_set='train2014', transform=None,
target_transform=COCOAnnotationTransform(), dataset_name='MS COCO'):
sys.path.append(osp.join(root, COCO_API))
from pycocotools.coco import COCO
diff --git a/data/config.py b/data/config.py
index 8999622cc..e16659c7f 100644
--- a/data/config.py
+++ b/data/config.py
@@ -27,7 +27,7 @@
}
coco = {
- 'num_classes': 201,
+ 'num_classes': 81,
'lr_steps': (280000, 360000, 400000),
'max_iter': 400000,
'feature_maps': [38, 19, 10, 5, 3, 1],
diff --git a/debug.sh b/debug.sh
new file mode 100644
index 000000000..1d72104ef
--- /dev/null
+++ b/debug.sh
@@ -0,0 +1 @@
+vim layers/functions/detection.py
diff --git a/infer.py b/infer.py
new file mode 100644
index 000000000..35ea2649b
--- /dev/null
+++ b/infer.py
@@ -0,0 +1,31 @@
+#do the infer
+
+import torch
+import cv2
+from ssd import build_ssd
+
+num_classes = 81
+image = cv2.imread("data/1.jpg")
+weights = "weights/ssd300_COCO_10000.pth"
+
+#cv2.imshow("fafda", image)
+#cv2.waitKey()
+
+#def infer()
+def get_features_hook(self, input, output):
+ print("hooks ", output.data.cpu().numpy().shape)
+
+if __name__ == '__main__':
+ net = build_ssd('test', 300, num_classes)
+ image = cv2.resize(image, (300, 300))
+ image = torch.Tensor(image)
+ image = image.permute(2, 0, 1)
+ image = image.unsqueeze(0)
+#load weights to the net
+ net.load_state_dict(torch.load(weights))
+ output = net(image)
+ print(output.shape)
+#get the specific layer value
+
+# print(net)
+
diff --git a/layers/box_utils.py b/layers/box_utils.py
index 84214947b..9b75e9aeb 100644
--- a/layers/box_utils.py
+++ b/layers/box_utils.py
@@ -172,68 +172,68 @@ def log_sum_exp(x):
# Original author: Francisco Massa:
# https://github.com/fmassa/object-detection.torch
# Ported to PyTorch by Max deGroot (02/01/2017)
-def nms(boxes, scores, overlap=0.5, top_k=200):
- """Apply non-maximum suppression at test time to avoid detecting too many
- overlapping bounding boxes for a given object.
- Args:
- boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
- scores: (tensor) The class predscores for the img, Shape:[num_priors].
- overlap: (float) The overlap thresh for suppressing unnecessary boxes.
- top_k: (int) The Maximum number of box preds to consider.
- Return:
- The indices of the kept boxes with respect to num_priors.
- """
-
- keep = scores.new(scores.size(0)).zero_().long()
- if boxes.numel() == 0:
- return keep
- x1 = boxes[:, 0]
- y1 = boxes[:, 1]
- x2 = boxes[:, 2]
- y2 = boxes[:, 3]
- area = torch.mul(x2 - x1, y2 - y1)
- v, idx = scores.sort(0) # sort in ascending order
- # I = I[v >= 0.01]
- idx = idx[-top_k:] # indices of the top-k largest vals
- xx1 = boxes.new()
- yy1 = boxes.new()
- xx2 = boxes.new()
- yy2 = boxes.new()
- w = boxes.new()
- h = boxes.new()
-
- # keep = torch.Tensor()
- count = 0
- while idx.numel() > 0:
- i = idx[-1] # index of current largest val
- # keep.append(i)
- keep[count] = i
- count += 1
- if idx.size(0) == 1:
- break
- idx = idx[:-1] # remove kept element from view
- # load bboxes of next highest vals
- torch.index_select(x1, 0, idx, out=xx1)
- torch.index_select(y1, 0, idx, out=yy1)
- torch.index_select(x2, 0, idx, out=xx2)
- torch.index_select(y2, 0, idx, out=yy2)
- # store element-wise max with next highest score
- xx1 = torch.clamp(xx1, min=x1[i])
- yy1 = torch.clamp(yy1, min=y1[i])
- xx2 = torch.clamp(xx2, max=x2[i])
- yy2 = torch.clamp(yy2, max=y2[i])
- w.resize_as_(xx2)
- h.resize_as_(yy2)
- w = xx2 - xx1
- h = yy2 - yy1
- # check sizes of xx1 and xx2.. after each iteration
- w = torch.clamp(w, min=0.0)
- h = torch.clamp(h, min=0.0)
- inter = w*h
- # IoU = i / (area(a) + area(b) - i)
- rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
- union = (rem_areas - inter) + area[i]
- IoU = inter/union # store result in iou
- # keep only elements with an IoU <= overlap
- idx = idx[IoU.le(overlap)]
- return keep, count
+#def nms(boxes, scores, overlap=0.5, top_k=200):
+# """Apply non-maximum suppression at test time to avoid detecting too many
+# overlapping bounding boxes for a given object.
+# Args:
+# boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
+# scores: (tensor) The class predscores for the img, Shape:[num_priors].
+# overlap: (float) The overlap thresh for suppressing unnecessary boxes.
+# top_k: (int) The Maximum number of box preds to consider.
+# Return:
+# The indices of the kept boxes with respect to num_priors.
+# """
+#
+# keep = scores.new(scores.size(0)).zero_().long()
+# if boxes.numel() == 0:
+# return keep
+# x1 = boxes[:, 0]
+# y1 = boxes[:, 1]
+# x2 = boxes[:, 2]
+# y2 = boxes[:, 3]
+# area = torch.mul(x2 - x1, y2 - y1)
+# v, idx = scores.sort(0) # sort in ascending order
+# # I = I[v >= 0.01]
+# idx = idx[-top_k:] # indices of the top-k largest vals
+# xx1 = boxes.new()
+# yy1 = boxes.new()
+# xx2 = boxes.new()
+# yy2 = boxes.new()
+# w = boxes.new()
+# h = boxes.new()
+#
+# # keep = torch.Tensor()
+# count = 0
+# while idx.numel() > 0:
+# i = idx[-1] # index of current largest val
+# # keep.append(i)
+# keep[count] = i
+# count += 1
+# if idx.size(0) == 1:
+# break
+# idx = idx[:-1] # remove kept element from view
+# # load bboxes of next highest vals
+# torch.index_select(x1, 0, idx, out=xx1)
+# torch.index_select(y1, 0, idx, out=yy1)
+# torch.index_select(x2, 0, idx, out=xx2)
+# torch.index_select(y2, 0, idx, out=yy2)
+# # store element-wise max with next highest score
+# xx1 = torch.clamp(xx1, min=x1[i])
+# yy1 = torch.clamp(yy1, min=y1[i])
+# xx2 = torch.clamp(xx2, max=x2[i])
+# yy2 = torch.clamp(yy2, max=y2[i])
+# w.resize_as_(xx2)
+# h.resize_as_(yy2)
+# w = xx2 - xx1
+# h = yy2 - yy1
+# # check sizes of xx1 and xx2.. after each iteration
+# w = torch.clamp(w, min=0.0)
+# h = torch.clamp(h, min=0.0)
+# inter = w*h
+# # IoU = i / (area(a) + area(b) - i)
+# rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
+# union = (rem_areas - inter) + area[i]
+# IoU = inter/union # store result in iou
+# # keep only elements with an IoU <= overlap
+# idx = idx[IoU.le(overlap)]
+# return keep, count
diff --git a/layers/functions/1 b/layers/functions/1
new file mode 100644
index 000000000..553c2ae9b
--- /dev/null
+++ b/layers/functions/1
@@ -0,0 +1,4 @@
+#to do
+
+
+
diff --git a/layers/functions/detection.py b/layers/functions/detection.py
index 0d1ef8d30..50421a844 100644
--- a/layers/functions/detection.py
+++ b/layers/functions/detection.py
@@ -1,8 +1,25 @@
import torch
from torch.autograd import Function
-from ..box_utils import decode, nms
+from ..box_utils import decode
from data import voc as cfg
+class paper_box(object):
+ def __init__(self, index, x, y, box):
+ self.index = index
+ self.x = x
+ self.y = y
+ self.box = box
+def box_iou(a, b):
+ if a.box[2] < b.box[0] or a.box[0] > b.box[2]:
+ return 0
+ if a.box[1] > b.box[3] or a.box[3] < b.box[1]:
+ return 0
+ width = min(a.box[2], b.box[2]) - max(a.box[0], b.box[0])
+ height = min(a.box[3], b.box[3]) - max(a.box[1], a.box[1])
+ iou = width * height
+ a_area = (a.box[2] - a.box[0]) * (a.box[3] - a.box[1])
+ b_area = (b.box[2] - b.box[0]) * (b.box[3] - b.box[1])
+ return (iou / (a_area + b_area - iou))
class Detect(Function):
"""At test time, Detect is the final layer of SSD. Decode location preds,
@@ -36,27 +53,43 @@ def forward(self, loc_data, conf_data, prior_data):
output = torch.zeros(num, self.num_classes, self.top_k, 5)
conf_preds = conf_data.view(num, num_priors,
self.num_classes).transpose(2, 1)
+ #next we will specific the exact layer and its output
+ #we get the all predicted boxes and its confidence
+ decoded_boxes = decode(loc_data[0], prior_data, self.variance)
+ conf_data = conf_data[0]
+ loc_data = loc_data[0]
+ all_boxes = torch.cat((decoded_boxes, conf_data), 1)
+# for i in range(self.num_classes):
+# index = []
+# for j in range(len(loc_data)):
+# index.append(j)
+# #in the specific class, we will do something specifical
+# for j in range(len(loc_data)):
+# for k in range(len(loc_data) - j):
+# if conf_data[j][i] < conf_data[k][i]:
+# index[j] =
+ return all_boxes
# Decode predictions into bboxes.
- for i in range(num):
- decoded_boxes = decode(loc_data[i], prior_data, self.variance)
- # For each class, perform nms
- conf_scores = conf_preds[i].clone()
-
- for cl in range(1, self.num_classes):
- c_mask = conf_scores[cl].gt(self.conf_thresh)
- scores = conf_scores[cl][c_mask]
- if scores.dim() == 0:
- continue
- l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
- boxes = decoded_boxes[l_mask].view(-1, 4)
- # idx of highest scoring and non-overlapping boxes per class
- ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
- output[i, cl, :count] = \
- torch.cat((scores[ids[:count]].unsqueeze(1),
- boxes[ids[:count]]), 1)
- flt = output.contiguous().view(num, -1, 5)
- _, idx = flt[:, :, 0].sort(1, descending=True)
- _, rank = idx.sort(1)
- flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
- return output
+# for i in range(num):
+# decoded_boxes = decode(loc_data[i], prior_data, self.variance)
+# # For each class, perform nms
+# conf_scores = conf_preds[i].clone()
+#
+# for cl in range(1, self.num_classes):
+# c_mask = conf_scores[cl].gt(self.conf_thresh)
+# scores = conf_scores[cl][c_mask]
+# if scores.size(0) == 0:
+# continue
+# l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
+# boxes = decoded_boxes[l_mask].view(-1, 4)
+# # idx of highest scoring and non-overlapping boxes per class
+# ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
+# output[i, cl, :count] = \
+# torch.cat((scores[ids[:count]].unsqueeze(1),
+# boxes[ids[:count]]), 1)
+# flt = output.contiguous().view(num, -1, 5)
+# _, idx = flt[:, :, 0].sort(1, descending=True)
+# _, rank = idx.sort(1)
+# flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
+# return output
diff --git a/layers/functions/nms.py b/layers/functions/nms.py
new file mode 100644
index 000000000..057602440
--- /dev/null
+++ b/layers/functions/nms.py
@@ -0,0 +1,7 @@
+#to do
+
+def nms(all_boxes):
+ return all_boxes
+
+
+
diff --git "a/layers/functions/\357\274\201" "b/layers/functions/\357\274\201"
new file mode 100644
index 000000000..6ee5bf89e
--- /dev/null
+++ "b/layers/functions/\357\274\201"
@@ -0,0 +1,7 @@
+#to do
+
+def nms:
+ return 0
+
+
+
diff --git a/layers/modules/multibox_loss.py b/layers/modules/multibox_loss.py
index fb49cf439..8acff30f7 100644
--- a/layers/modules/multibox_loss.py
+++ b/layers/modules/multibox_loss.py
@@ -94,8 +94,8 @@ def forward(self, predictions, targets):
loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
# Hard Negative Mining
- loss_c[pos] = 0 # filter out pos boxes for now
loss_c = loss_c.view(num, -1)
+ loss_c[pos] = 0 # filter out pos boxes for now
_, loss_idx = loss_c.sort(1, descending=True)
_, idx_rank = loss_idx.sort(1)
num_pos = pos.long().sum(1, keepdim=True)
@@ -111,7 +111,9 @@ def forward(self, predictions, targets):
# Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
- N = num_pos.data.sum()
+ N = num_pos.data.sum().double()
+ loss_l = loss_l.double()
+ loss_c = loss_c.double()
loss_l /= N
loss_c /= N
return loss_l, loss_c
diff --git a/train.py b/train.py
index 427dd9244..15dee06f6 100644
--- a/train.py
+++ b/train.py
@@ -150,19 +150,23 @@ def train():
batch_iterator = iter(data_loader)
for iteration in range(args.start_iter, cfg['max_iter']):
if args.visdom and iteration != 0 and (iteration % epoch_size == 0):
+ epoch += 1
update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None,
'append', epoch_size)
# reset epoch loss counters
loc_loss = 0
conf_loss = 0
- epoch += 1
if iteration in cfg['lr_steps']:
step_index += 1
adjust_learning_rate(optimizer, args.gamma, step_index)
# load train data
- images, targets = next(batch_iterator)
+ try:
+ images, targets = next(batch_iterator)
+ except StopIteration:
+ batch_iterator = iter(data_loader)
+ images, targets = next(batch_iterator)
if args.cuda:
images = Variable(images.cuda())
@@ -180,15 +184,15 @@ def train():
loss.backward()
optimizer.step()
t1 = time.time()
- loc_loss += loss_l.data[0]
- conf_loss += loss_c.data[0]
+ loc_loss += loss_l.data
+ conf_loss += loss_c.data
if iteration % 10 == 0:
print('timer: %.4f sec.' % (t1 - t0))
- print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ')
+ print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data), end=' ')
if args.visdom:
- update_vis_plot(iteration, loss_l.data[0], loss_c.data[0],
+ update_vis_plot(iteration, loss_l.data, loss_c.data,
iter_plot, epoch_plot, 'append')
if iteration != 0 and iteration % 5000 == 0: