Spaces:
Sleeping
Sleeping
| from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline | |
| from typing import List, Dict, Any | |
| import os | |
| class PIINER: | |
| def __init__(self, model_name: str = "dslim/bert-base-NER"): | |
| """ | |
| Initialize the HuggingFace NER pipeline using the specified model. | |
| """ | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| self.model = AutoModelForTokenClassification.from_pretrained(model_name) | |
| self.ner_pipeline = pipeline("ner", model=self.model, tokenizer=self.tokenizer, aggregation_strategy="simple") | |
| def extract_entities(self, text: str) -> List[Dict[str, Any]]: | |
| """ | |
| Perform NER on the input text and return list of detected entities. | |
| Each entity includes: entity_group, word, start, end, and score. | |
| """ | |
| entities = self.ner_pipeline(text) | |
| return entities | |