Spaces:
Running
Running
| from seqeval.metrics import ( | |
| classification_report, | |
| precision_score, | |
| recall_score, | |
| f1_score, | |
| accuracy_score, | |
| ) | |
| from seqeval.scheme import IOB2 | |
| from types import SimpleNamespace | |
| import logging | |
| import re | |
| logger = logging.getLogger(__name__) | |
| def compute_nested_metrics(segments, vocabs): | |
| """ | |
| Compute metrics for nested NER | |
| :param segments: List[List[Nested.data.dataset.Token]] - list of segments | |
| :return: metrics - SimpleNamespace - F1/micro/macro/weights, recall, precision, accuracy | |
| """ | |
| y, y_hat = list(), list() | |
| # We duplicate the dataset N times, where N is the number of entity types | |
| # For each copy, we create y and y_hat | |
| # Example: first copy, will create pairs of ground truth and predicted labels for entity type GPE | |
| # another copy will create pairs for LOC, etc. | |
| for i, vocab in enumerate(vocabs): | |
| vocab_tags = [tag for tag in vocab.get_itos() if "-" in tag] | |
| r = re.compile("|".join(vocab_tags)) | |
| y += [[(list(filter(r.match, token.gold_tag)) or ["O"])[0] for token in segment] for segment in segments] | |
| y_hat += [[token.pred_tag[i]["tag"] for token in segment] for segment in segments] | |
| logging.info("\n" + classification_report(y, y_hat, scheme=IOB2, digits=4)) | |
| metrics = { | |
| "micro_f1": f1_score(y, y_hat, average="micro", scheme=IOB2), | |
| "macro_f1": f1_score(y, y_hat, average="macro", scheme=IOB2), | |
| "weights_f1": f1_score(y, y_hat, average="weighted", scheme=IOB2), | |
| "precision": precision_score(y, y_hat, scheme=IOB2), | |
| "recall": recall_score(y, y_hat, scheme=IOB2), | |
| "accuracy": accuracy_score(y, y_hat), | |
| } | |
| return SimpleNamespace(**metrics) | |
| def compute_single_label_metrics(segments): | |
| """ | |
| Compute metrics for flat NER | |
| :param segments: List[List[Nested.data.dataset.Token]] - list of segments | |
| :return: metrics - SimpleNamespace - F1/micro/macro/weights, recall, precision, accuracy | |
| """ | |
| y = [[token.gold_tag[0] for token in segment] for segment in segments] | |
| y_hat = [[token.pred_tag[0]["tag"] for token in segment] for segment in segments] | |
| logging.info("\n" + classification_report(y, y_hat, scheme=IOB2, digits=4)) | |
| metrics = { | |
| "micro_f1": f1_score(y, y_hat, average="micro", scheme=IOB2), | |
| "macro_f1": f1_score(y, y_hat, average="macro", scheme=IOB2), | |
| "weights_f1": f1_score(y, y_hat, average="weighted", scheme=IOB2), | |
| "precision": precision_score(y, y_hat, scheme=IOB2), | |
| "recall": recall_score(y, y_hat, scheme=IOB2), | |
| "accuracy": accuracy_score(y, y_hat), | |
| } | |
| return SimpleNamespace(**metrics) | |