Spaces:
Build error
Build error
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # -------------------------------------------------------- | |
| # X-Decoder -- Generalized Decoding for Pixel, Image, and Language | |
| # Copyright (c) 2022 Microsoft | |
| # Licensed under The MIT License [see LICENSE for details] | |
| # Modified by Xueyan Zou (xueyan@cs.wisc.edu) | |
| # -------------------------------------------------------- | |
| import os | |
| import json | |
| import logging | |
| import itertools | |
| import detectron2.utils.comm as comm | |
| from detectron2.evaluation.evaluator import DatasetEvaluator | |
| from caption_pycocotools.coco import COCO | |
| from pycocoevalcap.eval import COCOEvalCap | |
| class CaptioningEvaluator(DatasetEvaluator): | |
| """ | |
| Evaluate AR for object proposals, AP for instance detection/segmentation, AP | |
| for keypoint detection outputs using COCO's metrics. | |
| See http://cocodataset.org/#detection-eval and | |
| http://cocodataset.org/#keypoints-eval to understand its metrics. | |
| The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means | |
| the metric cannot be computed (e.g. due to no predictions made). | |
| In addition to COCO, this evaluator is able to support any bounding box detection, | |
| instance segmentation, or keypoint detection dataset. | |
| """ | |
| def __init__( | |
| self, | |
| distributed=True, | |
| output_dir=None, | |
| gt_json=None, | |
| ): | |
| """ | |
| Args: | |
| dataset_name (str): name of the dataset to be evaluated. | |
| It must have either the following corresponding metadata: | |
| "json_file": the path to the COCO format annotation | |
| Or it must be in detectron2's standard dataset format | |
| so it can be converted to COCO format automatically. | |
| tasks (tuple[str]): tasks that can be evaluated under the given | |
| configuration. A task is one of "bbox", "segm", "keypoints". | |
| By default, will infer this automatically from predictions. | |
| distributed (True): if True, will collect results from all ranks and run evaluation | |
| in the main process. | |
| Otherwise, will only evaluate the results in the current process. | |
| output_dir (str): optional, an output directory to dump all | |
| results predicted on the dataset. The dump contains two files: | |
| 1. "instances_predictions.pth" a file that can be loaded with `torch.load` and | |
| contains all the results in the format they are produced by the model. | |
| 2. "coco_instances_results.json" a json file in COCO's result format. | |
| max_dets_per_image (int): limit on the maximum number of detections per image. | |
| By default in COCO, this limit is to 100, but this can be customized | |
| to be greater, as is needed in evaluation metrics AP fixed and AP pool | |
| (see https://arxiv.org/pdf/2102.01066.pdf) | |
| This doesn't affect keypoint evaluation. | |
| use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. | |
| Although the results should be very close to the official implementation in COCO | |
| API, it is still recommended to compute results with the official API for use in | |
| papers. The faster implementation also uses more RAM. | |
| kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. | |
| See http://cocodataset.org/#keypoints-eval | |
| When empty, it will use the defaults in COCO. | |
| Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. | |
| allow_cached_coco (bool): Whether to use cached coco json from previous validation | |
| runs. You should set this to False if you need to use different validation data. | |
| Defaults to True. | |
| """ | |
| self._logger = logging.getLogger(__name__) | |
| self._distributed = distributed | |
| self._output_dir = output_dir | |
| self._gt_json = COCO(gt_json) | |
| def reset(self): | |
| self._gen_captions = [] | |
| self._image_ids = [] | |
| def process(self, inputs, outputs): | |
| """ | |
| Args: | |
| inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). | |
| It is a list of dict. Each dict corresponds to an image and | |
| contains keys like "height", "width", "file_name", "image_id". | |
| outputs: the outputs of a COCO model. It is a list of dicts with key | |
| "instances" that contains :class:`Instances`. | |
| """ | |
| for output in outputs: | |
| self._image_ids.append(output['image_id']) | |
| self._gen_captions.append(output['captioning_text']) | |
| def evaluate(self, img_ids=None): | |
| """ | |
| Args: | |
| img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset | |
| """ | |
| if self._distributed: | |
| comm.synchronize() | |
| def gather(x, move=False): | |
| x = comm.gather(x) | |
| x = list(itertools.chain(*x)) | |
| if move: | |
| x = [xx.to(self._gen_captions[0].device) for xx in x] | |
| return x | |
| gen_captions = gather(self._gen_captions) | |
| image_ids = gather(self._image_ids) | |
| if not comm.is_main_process(): | |
| return {} | |
| else: | |
| gen_captions = self._gen_captions | |
| image_ids = self._image_ids | |
| assert len(gen_captions) == len(image_ids) | |
| pred_captions = [{"image_id": image_id, "caption": gen_caption} for image_id, gen_caption in zip(image_ids, gen_captions)] | |
| pred_pth = os.path.join(self._output_dir, 'results.json') | |
| json.dump(pred_captions, open(pred_pth, "w")) | |
| gt_captions = self._gt_json | |
| pred_captions = gt_captions.loadRes(pred_pth) | |
| cocoEval = COCOEvalCap(gt_captions, pred_captions) | |
| cocoEval.params['image_id'] = pred_captions.getImgIds() | |
| cocoEval.evaluate() | |
| return cocoEval.eval |