| import numpy | |
| from transformers import TokenClassificationPipeline | |
| class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline): | |
| def __init__(self,**kwargs): | |
| super().__init__(**kwargs) | |
| x=self.model.config.label2id | |
| y=[k for k in x if not k.startswith("I-")] | |
| self.transition=numpy.full((len(x),len(x)),-numpy.inf) | |
| for k,v in x.items(): | |
| for j in ["I-"+k[2:]] if k.startswith("B-") else [k]+y if k.startswith("I-") else y: | |
| self.transition[v,x[j]]=0 | |
| def check_model_type(self,supported_models): | |
| pass | |
| def postprocess(self,model_outputs,**kwargs): | |
| if "logits" not in model_outputs: | |
| return self.postprocess(model_outputs[0],**kwargs) | |
| m=model_outputs["logits"][0].numpy() | |
| e=numpy.exp(m-numpy.max(m,axis=-1,keepdims=True)) | |
| z=e/e.sum(axis=-1,keepdims=True) | |
| for i in range(m.shape[0]-1,0,-1): | |
| m[i-1]+=numpy.max(m[i]+self.transition,axis=1) | |
| k=[numpy.argmax(m[0]+self.transition[0])] | |
| for i in range(1,m.shape[0]): | |
| k.append(numpy.argmax(m[i]+self.transition[k[-1]])) | |
| w=[{"entity":self.model.config.id2label[j],"start":s,"end":e,"score":z[i,j]} for i,((s,e),j) in enumerate(zip(model_outputs["offset_mapping"][0].tolist(),k)) if s<e] | |
| if "aggregation_strategy" in kwargs and kwargs["aggregation_strategy"]!="none": | |
| for i,t in reversed(list(enumerate(w))): | |
| p=t.pop("entity") | |
| if p.startswith("I-"): | |
| w[i-1]["score"]=min(w[i-1]["score"],t["score"]) | |
| w[i-1]["end"]=w.pop(i)["end"] | |
| elif p.startswith("B-"): | |
| t["entity_group"]=p[2:] | |
| else: | |
| t["entity_group"]=p | |
| for t in w: | |
| t["text"]=model_outputs["sentence"][t["start"]:t["end"]] | |
| return w | |