Spaces:

Leyogho
/

Core-AI-IMAGE

Build error

App Files Files Community

Core-AI-IMAGE / datasets /evaluation /captioning_evaluation.py

Leyogho

Core

edebe10 12 months ago

raw

history blame contribute delete

6.03 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	# --------------------------------------------------------
	# X-Decoder -- Generalized Decoding for Pixel, Image, and Language
	# Copyright (c) 2022 Microsoft
	# Licensed under The MIT License [see LICENSE for details]
	# Modified by Xueyan Zou (xueyan@cs.wisc.edu)
	# --------------------------------------------------------

	import os
	import json
	import logging
	import itertools

	import detectron2.utils.comm as comm
	from detectron2.evaluation.evaluator import DatasetEvaluator

	from caption_pycocotools.coco import COCO
	from pycocoevalcap.eval import COCOEvalCap


	class CaptioningEvaluator(DatasetEvaluator):
	"""
	Evaluate AR for object proposals, AP for instance detection/segmentation, AP
	for keypoint detection outputs using COCO's metrics.
	See http://cocodataset.org/#detection-eval and
	http://cocodataset.org/#keypoints-eval to understand its metrics.
	The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means
	the metric cannot be computed (e.g. due to no predictions made).
	In addition to COCO, this evaluator is able to support any bounding box detection,
	instance segmentation, or keypoint detection dataset.
	"""

	def __init__(
	self,
	distributed=True,
	output_dir=None,
	gt_json=None,
	):
	"""
	Args:
	dataset_name (str): name of the dataset to be evaluated.
	It must have either the following corresponding metadata:
	"json_file": the path to the COCO format annotation
	Or it must be in detectron2's standard dataset format
	so it can be converted to COCO format automatically.
	tasks (tuple[str]): tasks that can be evaluated under the given
	configuration. A task is one of "bbox", "segm", "keypoints".
	By default, will infer this automatically from predictions.
	distributed (True): if True, will collect results from all ranks and run evaluation
	in the main process.
	Otherwise, will only evaluate the results in the current process.
	output_dir (str): optional, an output directory to dump all
	results predicted on the dataset. The dump contains two files:
	1. "instances_predictions.pth" a file that can be loaded with `torch.load` and
	contains all the results in the format they are produced by the model.
	2. "coco_instances_results.json" a json file in COCO's result format.
	max_dets_per_image (int): limit on the maximum number of detections per image.
	By default in COCO, this limit is to 100, but this can be customized
	to be greater, as is needed in evaluation metrics AP fixed and AP pool
	(see https://arxiv.org/pdf/2102.01066.pdf)
	This doesn't affect keypoint evaluation.
	use_fast_impl (bool): use a fast but unofficial implementation to compute AP.
	Although the results should be very close to the official implementation in COCO
	API, it is still recommended to compute results with the official API for use in
	papers. The faster implementation also uses more RAM.
	kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS.
	See http://cocodataset.org/#keypoints-eval
	When empty, it will use the defaults in COCO.
	Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
	allow_cached_coco (bool): Whether to use cached coco json from previous validation
	runs. You should set this to False if you need to use different validation data.
	Defaults to True.
	"""
	self._logger = logging.getLogger(__name__)
	self._distributed = distributed
	self._output_dir = output_dir
	self._gt_json = COCO(gt_json)

	def reset(self):
	self._gen_captions = []
	self._image_ids = []

	def process(self, inputs, outputs):
	"""
	Args:
	inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
	It is a list of dict. Each dict corresponds to an image and
	contains keys like "height", "width", "file_name", "image_id".
	outputs: the outputs of a COCO model. It is a list of dicts with key
	"instances" that contains :class:`Instances`.
	"""
	for output in outputs:
	self._image_ids.append(output['image_id'])
	self._gen_captions.append(output['captioning_text'])

	def evaluate(self, img_ids=None):
	"""
	Args:
	img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset
	"""

	if self._distributed:
	comm.synchronize()
	def gather(x, move=False):
	x = comm.gather(x)
	x = list(itertools.chain(*x))
	if move:
	x = [xx.to(self._gen_captions[0].device) for xx in x]
	return x
	gen_captions = gather(self._gen_captions)
	image_ids = gather(self._image_ids)
	if not comm.is_main_process():
	return {}
	else:
	gen_captions = self._gen_captions
	image_ids = self._image_ids

	assert len(gen_captions) == len(image_ids)
	pred_captions = [{"image_id": image_id, "caption": gen_caption} for image_id, gen_caption in zip(image_ids, gen_captions)]
	pred_pth = os.path.join(self._output_dir, 'results.json')
	json.dump(pred_captions, open(pred_pth, "w"))

	gt_captions = self._gt_json
	pred_captions = gt_captions.loadRes(pred_pth)

	cocoEval = COCOEvalCap(gt_captions, pred_captions)
	cocoEval.params['image_id'] = pred_captions.getImgIds()
	cocoEval.evaluate()
	return cocoEval.eval