from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline from typing import List, Dict, Any import os class PIINER: def __init__(self, model_name: str = "dslim/bert-base-NER"): """ Initialize the HuggingFace NER pipeline using the specified model. """ self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForTokenClassification.from_pretrained(model_name) self.ner_pipeline = pipeline("ner", model=self.model, tokenizer=self.tokenizer, aggregation_strategy="simple") def extract_entities(self, text: str) -> List[Dict[str, Any]]: """ Perform NER on the input text and return list of detected entities. Each entity includes: entity_group, word, start, end, and score. """ entities = self.ner_pipeline(text) return entities