Spaces:
Build error
Build error
danseith
commited on
Commit
·
eabdff9
1
Parent(s):
85ace6b
Minor typo fix
Browse files
app.py
CHANGED
|
@@ -30,10 +30,12 @@ def add_mask(text, size=1):
|
|
| 30 |
if '[MASK]' in split_text:
|
| 31 |
return text
|
| 32 |
idx = np.random.randint(len(split_text), size=size)
|
| 33 |
-
|
| 34 |
for i in idx:
|
|
|
|
| 35 |
split_text[i] = '[MASK]'
|
| 36 |
-
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
class TempScalePipe(FillMaskPipeline):
|
|
@@ -136,9 +138,10 @@ scrambler = pipeline("temp-scale", model="anferico/bert-for-patents")
|
|
| 136 |
def unmask(text, temp, rounds):
|
| 137 |
sampling = 'multi'
|
| 138 |
for round in range(rounds):
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
| 142 |
mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
|
| 143 |
out = {item["token_str"]: item["score"] for item in res}
|
| 144 |
score_to_str = {out[k] : k for k in out.keys()}
|
|
@@ -149,7 +152,7 @@ def unmask(text, temp, rounds):
|
|
| 149 |
idx = np.random.randint(0, len(score_list))
|
| 150 |
score = score_list[idx]
|
| 151 |
new_token = score_to_str[score]
|
| 152 |
-
if len(list(new_token)) < 2 or new_token == masked:
|
| 153 |
continue
|
| 154 |
split_text[mask_pos] = '*' + new_token + '*'
|
| 155 |
text = ' '.join(split_text)
|
|
|
|
| 30 |
if '[MASK]' in split_text:
|
| 31 |
return text
|
| 32 |
idx = np.random.randint(len(split_text), size=size)
|
| 33 |
+
masked_strings = []
|
| 34 |
for i in idx:
|
| 35 |
+
masked_strings.append(split_text[i])
|
| 36 |
split_text[i] = '[MASK]'
|
| 37 |
+
masked_output = ' '.join(split_text)
|
| 38 |
+
return masked_output, masked_strings
|
| 39 |
|
| 40 |
|
| 41 |
class TempScalePipe(FillMaskPipeline):
|
|
|
|
| 138 |
def unmask(text, temp, rounds):
|
| 139 |
sampling = 'multi'
|
| 140 |
for round in range(rounds):
|
| 141 |
+
tp = add_mask(text, size=1)
|
| 142 |
+
masked_text, masked = tp[0], tp[1]
|
| 143 |
+
split_text = masked_text.split()
|
| 144 |
+
res = scrambler(masked_text, temp=temp, top_k=10)
|
| 145 |
mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
|
| 146 |
out = {item["token_str"]: item["score"] for item in res}
|
| 147 |
score_to_str = {out[k] : k for k in out.keys()}
|
|
|
|
| 152 |
idx = np.random.randint(0, len(score_list))
|
| 153 |
score = score_list[idx]
|
| 154 |
new_token = score_to_str[score]
|
| 155 |
+
if len(list(new_token)) < 2 or new_token == masked[0]:
|
| 156 |
continue
|
| 157 |
split_text[mask_pos] = '*' + new_token + '*'
|
| 158 |
text = ' '.join(split_text)
|