Spaces:
Build error
Build error
danseith
commited on
Commit
·
616c8c6
1
Parent(s):
a95bc58
Added warning to single edit and added list of words to not substitute.
Browse files
app.py
CHANGED
|
@@ -28,7 +28,7 @@ tab_one_examples = [['A crustless _ made from two slices of baked bread.'],
|
|
| 28 |
]
|
| 29 |
|
| 30 |
|
| 31 |
-
def add_mask(text
|
| 32 |
split_text = text.split()
|
| 33 |
|
| 34 |
# If the user supplies a mask, don't add more
|
|
@@ -36,13 +36,20 @@ def add_mask(text, size=1):
|
|
| 36 |
u_pos = [i for i, s in enumerate(split_text) if '_' in s][0]
|
| 37 |
split_text[u_pos] = '[MASK]'
|
| 38 |
return ' '.join(split_text), '[MASK]'
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
masked_output = ' '.join(split_text)
|
| 45 |
-
return masked_output,
|
| 46 |
|
| 47 |
|
| 48 |
class TempScalePipe(FillMaskPipeline):
|
|
@@ -154,8 +161,7 @@ def sample_output(out, sampling):
|
|
| 154 |
|
| 155 |
|
| 156 |
def unmask_single(text, temp=1):
|
| 157 |
-
|
| 158 |
-
masked_text, masked = tp[0], tp[1]
|
| 159 |
res = scrambler(masked_text, temp=temp, top_k=10)
|
| 160 |
out = {item["token_str"]: item["score"] for item in res}
|
| 161 |
return out
|
|
@@ -164,21 +170,20 @@ def unmask_single(text, temp=1):
|
|
| 164 |
def unmask(text, temp, rounds):
|
| 165 |
sampling = 'multi'
|
| 166 |
for _ in range(rounds):
|
| 167 |
-
|
| 168 |
-
masked_text, masked = tp[0], tp[1]
|
| 169 |
split_text = masked_text.split()
|
| 170 |
res = scrambler(masked_text, temp=temp, top_k=15)
|
| 171 |
mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
|
| 172 |
out = {item["token_str"]: item["score"] for item in res}
|
| 173 |
new_token = sample_output(out, sampling)
|
| 174 |
unsuccessful_iters = 0
|
| 175 |
-
while new_token == masked
|
| 176 |
if unsuccessful_iters > 5:
|
| 177 |
break
|
| 178 |
print('skipped', new_token)
|
| 179 |
new_token = sample_output(out, sampling=sampling)
|
| 180 |
unsuccessful_iters += 1
|
| 181 |
-
if new_token == masked
|
| 182 |
split_text[mask_pos] = new_token
|
| 183 |
else:
|
| 184 |
split_text[mask_pos] = '*' + new_token + '*'
|
|
@@ -188,6 +193,7 @@ def unmask(text, temp, rounds):
|
|
| 188 |
text[0] = text[0].upper()
|
| 189 |
return ''.join(text)
|
| 190 |
|
|
|
|
| 191 |
textbox1 = gr.Textbox(label="Input Sentence", lines=5)
|
| 192 |
output_textbox1 = gr.Textbox(placeholder="Output will appear here", lines=4)
|
| 193 |
|
|
@@ -197,13 +203,15 @@ temp_slider2 = gr.Slider(1.0, 3.0, value=1.0, label='Creativity')
|
|
| 197 |
edit_slider2 = gr.Slider(1, 20, step=1, value=1.0, label='Number of edits')
|
| 198 |
|
| 199 |
title1 = "Patent-BERT Sentence Remix-er: Single Edit"
|
| 200 |
-
description1 = """<p>Try inserting a '_' where you want the model to generate a list of likely words.
|
|
|
|
| 201 |
<br/>
|
| 202 |
<p/>"""
|
| 203 |
title2 = "Patent-BERT Sentence Remix-er: Multiple Edits"
|
| 204 |
description2 = """<p>Try typing in a sentence for the model to remix. Adjust the 'creativity' scale bar to change the
|
| 205 |
the model's confidence in its likely substitutions and the 'number of edits' for the number of edits you want
|
| 206 |
-
the model to attempt to make.
|
|
|
|
| 207 |
|
| 208 |
demo1 = gr.Interface(
|
| 209 |
fn=unmask_single,
|
|
|
|
| 28 |
]
|
| 29 |
|
| 30 |
|
| 31 |
+
def add_mask(text):
|
| 32 |
split_text = text.split()
|
| 33 |
|
| 34 |
# If the user supplies a mask, don't add more
|
|
|
|
| 36 |
u_pos = [i for i, s in enumerate(split_text) if '_' in s][0]
|
| 37 |
split_text[u_pos] = '[MASK]'
|
| 38 |
return ' '.join(split_text), '[MASK]'
|
| 39 |
+
|
| 40 |
+
idx = np.random.randint(len(split_text), size=1).astype(int)[0]
|
| 41 |
+
# Don't mask certain words
|
| 42 |
+
num_iters = 0
|
| 43 |
+
while split_text[idx].lower() in ['a', 'an', 'the', 'is', 'and', 'or']:
|
| 44 |
+
num_iters += 1
|
| 45 |
+
idx = np.random.randint(len(split_text), size=1).astype(int)[0]
|
| 46 |
+
if num_iters > 10:
|
| 47 |
+
break
|
| 48 |
+
|
| 49 |
+
masked_string = split_text[idx]
|
| 50 |
+
split_text[idx] = '[MASK]'
|
| 51 |
masked_output = ' '.join(split_text)
|
| 52 |
+
return masked_output, masked_string
|
| 53 |
|
| 54 |
|
| 55 |
class TempScalePipe(FillMaskPipeline):
|
|
|
|
| 161 |
|
| 162 |
|
| 163 |
def unmask_single(text, temp=1):
|
| 164 |
+
masked_text, _ = add_mask(text)
|
|
|
|
| 165 |
res = scrambler(masked_text, temp=temp, top_k=10)
|
| 166 |
out = {item["token_str"]: item["score"] for item in res}
|
| 167 |
return out
|
|
|
|
| 170 |
def unmask(text, temp, rounds):
|
| 171 |
sampling = 'multi'
|
| 172 |
for _ in range(rounds):
|
| 173 |
+
masked_text, masked = add_mask(text)
|
|
|
|
| 174 |
split_text = masked_text.split()
|
| 175 |
res = scrambler(masked_text, temp=temp, top_k=15)
|
| 176 |
mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
|
| 177 |
out = {item["token_str"]: item["score"] for item in res}
|
| 178 |
new_token = sample_output(out, sampling)
|
| 179 |
unsuccessful_iters = 0
|
| 180 |
+
while new_token == masked:
|
| 181 |
if unsuccessful_iters > 5:
|
| 182 |
break
|
| 183 |
print('skipped', new_token)
|
| 184 |
new_token = sample_output(out, sampling=sampling)
|
| 185 |
unsuccessful_iters += 1
|
| 186 |
+
if new_token == masked:
|
| 187 |
split_text[mask_pos] = new_token
|
| 188 |
else:
|
| 189 |
split_text[mask_pos] = '*' + new_token + '*'
|
|
|
|
| 193 |
text[0] = text[0].upper()
|
| 194 |
return ''.join(text)
|
| 195 |
|
| 196 |
+
|
| 197 |
textbox1 = gr.Textbox(label="Input Sentence", lines=5)
|
| 198 |
output_textbox1 = gr.Textbox(placeholder="Output will appear here", lines=4)
|
| 199 |
|
|
|
|
| 203 |
edit_slider2 = gr.Slider(1, 20, step=1, value=1.0, label='Number of edits')
|
| 204 |
|
| 205 |
title1 = "Patent-BERT Sentence Remix-er: Single Edit"
|
| 206 |
+
description1 = """<p>Try inserting a '_' where you want the model to generate a list of likely words.
|
| 207 |
+
<strong>Note:</strong> You can only add one '_' per submission.
|
| 208 |
<br/>
|
| 209 |
<p/>"""
|
| 210 |
title2 = "Patent-BERT Sentence Remix-er: Multiple Edits"
|
| 211 |
description2 = """<p>Try typing in a sentence for the model to remix. Adjust the 'creativity' scale bar to change the
|
| 212 |
the model's confidence in its likely substitutions and the 'number of edits' for the number of edits you want
|
| 213 |
+
the model to attempt to make. The words substituted in the output sentence will be enclosed in asterisks (e.g., *word*).
|
| 214 |
+
<br/> <p/> """
|
| 215 |
|
| 216 |
demo1 = gr.Interface(
|
| 217 |
fn=unmask_single,
|