在上篇博客提到,该任务就是将原始数据的每张图片(256x256)进行grid级别的label预测,思路很简单,就是最后卷出的feature map是4x4的,不要过average global pooling layer,直接拉成1x16的向量过sigmoid激活函数即可(label也要变成16个字符,有点类似OCR)。
def grabcut(img_name): masks = [] # one image has many object that need to grabcut for i, ann_info in enumerate(ANNS[img_name], start=1): img = cv.imread((img_dir +img_name).rstrip()+'.jpg') grab_name = ann_info[1] xmin = ann_info[3] ymin = ann_info[2] xmax = ann_info[5] ymax = ann_info[4] """get int box coor""" img_w = img.shape[1] img_h = img.shape[0] xmin, ymin, xmax, ymax = get_int_coor(xmin, ymin, xmax, ymax, img_w, img_h) box_w = xmax - xmin box_h = ymax - ymin # cv.grabcut's para mask = np.zeros(img.shape[:2], np.uint8) # rect is the tuple rect = (xmin, ymin, box_w, box_h) bgdModel = np.zeros((1, 65), np.float64) fgdModel = np.zeros((1, 65), np.float64) #for small bbox: if box_w * box_h < MINI_AREA: img_mask = mask[ymin:ymax, xmin:xmax] = 1 # for big box that area == img.area(one object bbox is just the whole image) elif box_w * box_h == img.shape[1] * img.shape[0]: rect = [RECT_SHRINK, RECT_SHRINK, box_w - RECT_SHRINK * 2, box_h - RECT_SHRINK * 2] cv.grabCut(img, mask, rect, bgdModel,fgdModel, ITER_NUM, cv.GC_INIT_WITH_RECT) # astype('uint8') keep the image pixel in range[0,255] img_mask = np.where((mask == 0) | (mask == 2), 0, 1).astype('uint8') # for normal bbox: else: cv.grabCut(img, mask, rect, bgdModel,fgdModel, ITER_NUM, cv.GC_INIT_WITH_RECT) img_mask = np.where((mask == 0) | (mask == 2), 0, 1).astype('uint8') # if the grabcut output is just background(it happens in my dataset) if np.sum(img_mask) == 0: img_mask = np.where((mask == 0), 0, 1).astype('uint8') # couting IOU # if the grabcut output too small region, it need reset to bbox mask box_mask = np.zeros((img.shape[0], img.shape[1])) box_mask[ymin:ymax, xmin:xmax] = 1 sum_area = box_mask + img_mask intersection = np.where((sum_area==2), 1, 0).astype('uint8') union = np.where((sum_area==0), 0, 1).astype('uint8') IOU = np.sum(intersection) / np.sum(union) if IOU <= IOU_THRESHOLD: img_mask = box_mask # for draw mask on the image later img = cv.cvtColor(img, cv.COLOR_BGR2RGB) masks.append([img_mask, grab_name, rect]) num_object = i """for multi-objects intersection and fix the label """ masks.sort(key=lambda mask: np.sum(mask[0]), reverse=True) for j in range(num_object): for k in range(j+1, num_object): masks[j][0] = masks[j][0] - masks[k][0] masks[j][0] = np.where((masks[j][0]==1), 1, 0).astype('uint8') """get class name id""" grab_name = masks[j][1] class_id = grab_name.split('_')[-1] class_id = int(class_id.split('.')[0])
#set the numpy value to class_id masks[j][0] = np.where((masks[j][0]==1), class_id, 0).astype('uint8') # save grabcut_inst(one object in a image) scipy.misc.toimage(masks[j][0], cmin=0, cmax=255, pal=tbvoc_info.colors_map, mode='P' ).save((grabcut_dir).rstrip()+masks[j][1]) """merge masks""" # built array(img.shape size) mask_ = np.zeros(img.shape[:2]) for mask in masks: mask_ = mask_ + mask[0] # save segmetation_label(every object in a image) scipy.misc.toimage(mask_, cmin=0, cmax=255, pal=tbvoc_info.colors_map, mode='P').save((segmentation_label_dir+img_name).rstrip()+'.png')
# must use cv2.imread() # if use PIL.Image.open(), the algorithm will break #TODO --need to fix the image problem img = cv.imread(os.path.join(img_dir, img_name).rstrip()+'.jpg') img = cv.resize(img, (1632,1216), interpolation = cv.INTER_LINEAR)
# expand to [1,H,W] masks_pro = np.expand_dims(masks_pro, 0) # masks_pro = masks_pro[:, :, np.newaxis] # append to array---shape(2,H,W) # one depth represents the class 0, the other represents the class 1 masks_pro = np.append(1-masks_pro, masks_pro, axis=0) #[Classes, H, W] # U needs to be flat U = masks_pro.reshape(2, -1) # deepcopy and the order is C-order(from rows to colums) U = U.copy(order='C') # for binary classification, the value after sigmoid may be very small U = np.where((U < 1e-12), 1e-12, U) d = dcrf.DenseCRF2D(width, height, 2)
# make sure the array be c-order which will faster the processing speed # reference: https://zhuanlan.zhihu.com/p/59767914 U = np.ascontiguousarray(U) img = np.ascontiguousarray(img)
d.setUnaryEnergy(-np.log(U)) d.addPairwiseGaussian(sxy=3, compat=3) d.addPairwiseBilateral(sxy=80, srgb=13, rgbim=img, compat=10) Q = d.inference(5) # compare each value between two rows by colum # and inference each pixel belongs to which class(0 or 1) map = np.argmax(Q, axis=0).reshape((height, width)) proba = np.array(map)
def update_label(predict_model, device): """load train_pairs.txt info for check the missed diagnosis objects""" #ann_info:[image name, image name_num_ class_id.png, bbox_ymin, # bbox_xmin,bbox_ymax, bbox_xmax, class_name] print('start to update...') ANNS = {} with open(dataset_pairs_dir, 'r') as da_p_txt: for ann_info in da_p_txt: # split the string line, get the list ann_info = ann_info.rstrip().split('###') if ann_info[0].rstrip() not in ANNS: ANNS[ann_info[0].rstrip()] = [] ANNS[ann_info[0].rstrip()].append(ann_info)
predict_model.eval()
# define the same image transformations transformations = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])
update_num = 0 print('updating progress:') with open(dataset_txt_dir, 'r') as da_txt: # don't use the code line below # or it will close the file and the whole programm end here (I guess) # I debug here for two hours...... #lines = len(da_txt.readlines()) for update_name in da_txt: update_num += 1 # in RGB [W, H, depth] img = Image.open(os.path.join(img_dir, update_name).rstrip()+'.jpg') img_w = img.size[0] img_h = img.size[1] img = img.resize((1632, 1216), Image.LANCZOS) input_ = transformations(img).float() # add batch_size dimension #[3, H, W]-->[1, 3, H, W] input_ = input_.unsqueeze_(0) input_ = input_.to(device) pred = predict_model(input_).view([1216, 1632]).data.cpu() #pred.shape[H,W] pred = np.array(pred) """crf smooth prediction""" crf_pred = run_densecrf(img_dir, update_name, pred)