Spaces:

onnx
/

yolov4

Runtime error

App Files Files Community

yolov4 / app.py

akhaliq HF Staff

Update app.py

1d9b661 about 4 years ago

raw

history blame contribute delete

10 kB

	import cv2
	import numpy as np
	from onnx import numpy_helper
	import onnx
	import os
	from PIL import Image
	from matplotlib.pyplot import imshow
	import onnxruntime as rt
	from scipy import special
	import colorsys
	import random
	import gradio as gr

	def image_preprocess(image, target_size, gt_boxes=None):

	ih, iw = target_size
	h, w, _ = image.shape

	scale = min(iw/w, ih/h)
	nw, nh = int(scale * w), int(scale * h)
	image_resized = cv2.resize(image, (nw, nh))

	image_padded = np.full(shape=[ih, iw, 3], fill_value=128.0)
	dw, dh = (iw - nw) // 2, (ih-nh) // 2
	image_padded[dh:nh+dh, dw:nw+dw, :] = image_resized
	image_padded = image_padded / 255.

	if gt_boxes is None:
	return image_padded

	else:
	gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
	gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
	return image_padded, gt_boxes

	input_size = 416

	os.system("wget https://github.com/AK391/models/raw/main/vision/object_detection_segmentation/yolov4/model/yolov4.onnx")

	# Start from ORT 1.10, ORT requires explicitly setting the providers parameter if you want to use execution providers
	# other than the default CPU provider (as opposed to the previous behavior of providers getting set/registered by default
	# based on the build flags) when instantiating InferenceSession.
	# For example, if NVIDIA GPU is available and ORT Python package is built with CUDA, then call API as following:
	# rt.InferenceSession(path/to/model, providers=['CUDAExecutionProvider'])
	sess = rt.InferenceSession("yolov4.onnx")

	outputs = sess.get_outputs()



	def get_anchors(anchors_path, tiny=False):
	'''loads the anchors from a file'''
	with open(anchors_path) as f:
	anchors = f.readline()
	anchors = np.array(anchors.split(','), dtype=np.float32)
	return anchors.reshape(3, 3, 2)

	def postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=[1,1,1]):
	'''define anchor boxes'''
	for i, pred in enumerate(pred_bbox):
	conv_shape = pred.shape
	output_size = conv_shape[1]
	conv_raw_dxdy = pred[:, :, :, :, 0:2]
	conv_raw_dwdh = pred[:, :, :, :, 2:4]
	xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size))
	xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2)

	xy_grid = np.tile(np.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1])
	xy_grid = xy_grid.astype(np.float)

	pred_xy = ((special.expit(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * STRIDES[i]
	pred_wh = (np.exp(conv_raw_dwdh) * ANCHORS[i])
	pred[:, :, :, :, 0:4] = np.concatenate([pred_xy, pred_wh], axis=-1)

	pred_bbox = [np.reshape(x, (-1, np.shape(x)[-1])) for x in pred_bbox]
	pred_bbox = np.concatenate(pred_bbox, axis=0)
	return pred_bbox


	def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
	'''remove boundary boxs with a low detection probability'''
	valid_scale=[0, np.inf]
	pred_bbox = np.array(pred_bbox)

	pred_xywh = pred_bbox[:, 0:4]
	pred_conf = pred_bbox[:, 4]
	pred_prob = pred_bbox[:, 5:]

	# # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
	pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
	pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
	# # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
	org_h, org_w = org_img_shape
	resize_ratio = min(input_size / org_w, input_size / org_h)

	dw = (input_size - resize_ratio * org_w) / 2
	dh = (input_size - resize_ratio * org_h) / 2

	pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
	pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio

	# # (3) clip some boxes that are out of range
	pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
	np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
	invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
	pred_coor[invalid_mask] = 0

	# # (4) discard some invalid boxes
	bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
	scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))

	# # (5) discard some boxes with low scores
	classes = np.argmax(pred_prob, axis=-1)
	scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
	score_mask = scores > score_threshold
	mask = np.logical_and(scale_mask, score_mask)
	coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]

	return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)

	def bboxes_iou(boxes1, boxes2):
	'''calculate the Intersection Over Union value'''
	boxes1 = np.array(boxes1)
	boxes2 = np.array(boxes2)

	boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
	boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

	left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
	right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])

	inter_section = np.maximum(right_down - left_up, 0.0)
	inter_area = inter_section[..., 0] * inter_section[..., 1]
	union_area = boxes1_area + boxes2_area - inter_area
	ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)

	return ious

	def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
	"""
	:param bboxes: (xmin, ymin, xmax, ymax, score, class)

	Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
	https://github.com/bharatsingh430/soft-nms
	"""
	classes_in_img = list(set(bboxes[:, 5]))
	best_bboxes = []

	for cls in classes_in_img:
	cls_mask = (bboxes[:, 5] == cls)
	cls_bboxes = bboxes[cls_mask]

	while len(cls_bboxes) > 0:
	max_ind = np.argmax(cls_bboxes[:, 4])
	best_bbox = cls_bboxes[max_ind]
	best_bboxes.append(best_bbox)
	cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
	iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
	weight = np.ones((len(iou),), dtype=np.float32)

	assert method in ['nms', 'soft-nms']

	if method == 'nms':
	iou_mask = iou > iou_threshold
	weight[iou_mask] = 0.0

	if method == 'soft-nms':
	weight = np.exp(-(1.0 * iou ** 2 / sigma))

	cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
	score_mask = cls_bboxes[:, 4] > 0.
	cls_bboxes = cls_bboxes[score_mask]

	return best_bboxes

	def read_class_names(class_file_name):
	'''loads class name from a file'''
	names = {}
	with open(class_file_name, 'r') as data:
	for ID, name in enumerate(data):
	names[ID] = name.strip('\n')
	return names

	def draw_bbox(image, bboxes, classes=read_class_names("coco.names"), show_label=True):
	"""
	bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
	"""

	num_classes = len(classes)
	image_h, image_w, _ = image.shape
	hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
	colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
	colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))

	random.seed(0)
	random.shuffle(colors)
	random.seed(None)

	for i, bbox in enumerate(bboxes):
	coor = np.array(bbox[:4], dtype=np.int32)
	fontScale = 0.5
	score = bbox[4]
	class_ind = int(bbox[5])
	bbox_color = colors[class_ind]
	bbox_thick = int(0.6 * (image_h + image_w) / 600)
	c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
	cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)

	if show_label:
	bbox_mess = '%s: %.2f' % (classes[class_ind], score)
	t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
	cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1)
	cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
	fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)

	return image

	def inference(img):
	original_image = cv2.imread(img)
	original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
	original_image_size = original_image.shape[:2]

	image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
	image_data = image_data[np.newaxis, ...].astype(np.float32)

	print("Preprocessed image shape:",image_data.shape) # shape of the preprocessed input

	output_names = list(map(lambda output: output.name, outputs))
	input_name = sess.get_inputs()[0].name

	detections = sess.run(output_names, {input_name: image_data})
	print("Output shape:", list(map(lambda detection: detection.shape, detections)))

	ANCHORS = "./yolov4_anchors.txt"
	STRIDES = [8, 16, 32]
	XYSCALE = [1.2, 1.1, 1.05]

	ANCHORS = get_anchors(ANCHORS)
	STRIDES = np.array(STRIDES)



	pred_bbox = postprocess_bbbox(detections, ANCHORS, STRIDES, XYSCALE)
	bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25)
	bboxes = nms(bboxes, 0.213, method='nms')
	image = draw_bbox(original_image, bboxes)

	image = Image.fromarray(image)
	return image

	title="YOLOv4"
	description="YOLOv4 optimizes the speed and accuracy of object detection. It is two times faster than EfficientDet. It improves YOLOv3's AP and FPS by 10% and 12%, respectively, with mAP50 of 52.32 on the COCO 2017 dataset and FPS of 41.7 on Tesla 100."
	examples=[["example.png"]]
	gr.Interface(inference,gr.inputs.Image(type="filepath"),gr.outputs.Image(type="pil"),title=title,description=description,examples=examples).launch()