Commit
·
92f0b14
1
Parent(s):
f5c22ed
model improved
Browse files- maker.py +16 -0
- pytorch_model.bin +1 -1
maker.py
CHANGED
|
@@ -21,6 +21,7 @@ class UDgoeswithDataset(object):
|
|
| 21 |
if len(t)==10 and t[0].isdecimal():
|
| 22 |
c.append(t)
|
| 23 |
elif c!=[]:
|
|
|
|
| 24 |
v=tokenizer([t[1].replace(" ","_") for t in c],add_special_tokens=False)["input_ids"]
|
| 25 |
for i in range(len(v)-1,-1,-1):
|
| 26 |
for j in range(1,len(v[i])):
|
|
@@ -28,6 +29,21 @@ class UDgoeswithDataset(object):
|
|
| 28 |
y=["0"]+[t[0] for t in c]
|
| 29 |
h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(c,1)]
|
| 30 |
p,v=[t[3]+"|"+t[5]+"|"+t[7] for t in c],sum(v,[])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
if len(v)<tokenizer.model_max_length-3:
|
| 32 |
self.ids.append([cls]+v+[sep])
|
| 33 |
self.tags.append([dep]+p+[dep])
|
|
|
|
| 21 |
if len(t)==10 and t[0].isdecimal():
|
| 22 |
c.append(t)
|
| 23 |
elif c!=[]:
|
| 24 |
+
d=list(c)
|
| 25 |
v=tokenizer([t[1].replace(" ","_") for t in c],add_special_tokens=False)["input_ids"]
|
| 26 |
for i in range(len(v)-1,-1,-1):
|
| 27 |
for j in range(1,len(v[i])):
|
|
|
|
| 29 |
y=["0"]+[t[0] for t in c]
|
| 30 |
h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(c,1)]
|
| 31 |
p,v=[t[3]+"|"+t[5]+"|"+t[7] for t in c],sum(v,[])
|
| 32 |
+
if len(v)<tokenizer.model_max_length-3:
|
| 33 |
+
self.ids.append([cls]+v+[sep])
|
| 34 |
+
self.tags.append([dep]+p+[dep])
|
| 35 |
+
label=set(sum([self.tags[-1],list(label)],[]))
|
| 36 |
+
for i,k in enumerate(v):
|
| 37 |
+
self.ids.append([cls]+v[0:i]+[msk]+v[i+1:]+[sep,k])
|
| 38 |
+
self.tags.append([dep]+[t if h[j]==i+1 else dep for j,t in enumerate(p)]+[dep,dep])
|
| 39 |
+
c=d
|
| 40 |
+
v=tokenizer([t[1].replace("_"," ") for t in c],add_special_tokens=False)["input_ids"]
|
| 41 |
+
for i in range(len(v)-1,-1,-1):
|
| 42 |
+
for j in range(1,len(v[i])):
|
| 43 |
+
c.insert(i+1,[c[i][0],"_","_","X","_","_",c[i][0],"goeswith","_","_"])
|
| 44 |
+
y=["0"]+[t[0] for t in c]
|
| 45 |
+
h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(c,1)]
|
| 46 |
+
p,v=[t[3]+"|"+t[5]+"|"+t[7] for t in c],sum(v,[])
|
| 47 |
if len(v)<tokenizer.model_max_length-3:
|
| 48 |
self.ids.append([cls]+v+[sep])
|
| 49 |
self.tags.append([dep]+p+[dep])
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 538828593
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a53188b23df9f7933ce159ec14e1da5057afde959f352f3e17d90faf444024d
|
| 3 |
size 538828593
|