Spaces:
Running
Running
| from nervaluate import Evaluator | |
| from sklearn.metrics import classification_report | |
| from token_level_output import get_token_output_labels | |
| EVALUATION_METRICS = [ | |
| "Span Based Evaluation with Partial Overlap", | |
| "Token Based Evaluation with Micro Avg", | |
| "Token Based Evaluation with Macro Avg", | |
| ] | |
| def get_span_eval(gt_ner_span, pred_ner_span, text): | |
| evaluator = Evaluator([gt_ner_span], [pred_ner_span], tags=["Disease", "Drug"]) | |
| return round(evaluator.evaluate()[0]["ent_type"]["f1"], 2) | |
| def get_token_micro_eval(gt_ner_span, pred_ner_span, text): | |
| return round( | |
| classification_report( | |
| get_token_output_labels(gt_ner_span, text), | |
| get_token_output_labels(pred_ner_span, text), | |
| labels=["Disease", "Drug"], | |
| output_dict=True, | |
| )["micro avg"]["f1-score"], | |
| 2, | |
| ) | |
| def get_token_macro_eval(gt_ner_span, pred_ner_span, text): | |
| return round( | |
| classification_report( | |
| get_token_output_labels(gt_ner_span, text), | |
| get_token_output_labels(pred_ner_span, text), | |
| labels=["Disease", "Drug"], | |
| output_dict=True, | |
| )["macro avg"]["f1-score"], | |
| 2, | |
| ) | |
| def get_evaluation_metric(metric_type, gt_ner_span, pred_ner_span, text): | |
| match metric_type: | |
| case "Span Based Evaluation with Partial Overlap": | |
| return get_span_eval(gt_ner_span, pred_ner_span, text) | |
| case "Token Based Evaluation with Micro Avg": | |
| return get_token_micro_eval(gt_ner_span, pred_ner_span, text) | |
| case "Token Based Evaluation with Macro Avg": | |
| return get_token_macro_eval(gt_ner_span, pred_ner_span, text) | |