Spaces:
Running
on
T4
Running
on
T4
da03
commited on
Commit
·
1f1d7aa
1
Parent(s):
1d42781
utils.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys, os
|
| 2 |
+
import uuid
|
| 3 |
+
import shutil
|
| 4 |
+
import subprocess
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def normalize_formula(formula):
|
| 8 |
+
unique_filename = str(uuid.uuid4()) + '.tex'
|
| 9 |
+
with open(unique_filename, 'w') as fout:
|
| 10 |
+
fout.write(formula)
|
| 11 |
+
|
| 12 |
+
input_file = unique_filename
|
| 13 |
+
output_file = unique_filename + '.out'
|
| 14 |
+
assert os.path.exists(input_file), input_file
|
| 15 |
+
cmd = "perl -pe 's|hskip(.*?)(cm\\|in\\|pt\\|mm\\|em)|hspace{\\1\\2}|g' %s > %s"%(input_file, output_file)
|
| 16 |
+
ret = subprocess.call(cmd, shell=True)
|
| 17 |
+
if ret != 0:
|
| 18 |
+
assert False
|
| 19 |
+
|
| 20 |
+
temp_file = output_file + '.tmp'
|
| 21 |
+
with open(temp_file, 'w') as fout:
|
| 22 |
+
with open(output_file) as fin:
|
| 23 |
+
for line in fin:
|
| 24 |
+
fout.write(line.replace('\r', ' ').strip() + '\n') # delete \r
|
| 25 |
+
|
| 26 |
+
cmd = "cat %s | node preprocess_latex.js %s > %s "%(temp_file, 'normalize', output_file)
|
| 27 |
+
ret = subprocess.call(cmd, shell=True)
|
| 28 |
+
os.remove(temp_file)
|
| 29 |
+
if ret != 0:
|
| 30 |
+
assert False
|
| 31 |
+
temp_file = output_file + '.tmp'
|
| 32 |
+
shutil.move(output_file, temp_file)
|
| 33 |
+
with open(temp_file) as fin:
|
| 34 |
+
with open(output_file, 'w') as fout:
|
| 35 |
+
for line in fin:
|
| 36 |
+
tokens = line.strip().split()
|
| 37 |
+
tokens_out = []
|
| 38 |
+
for token in tokens:
|
| 39 |
+
if True or is_ascii(token):
|
| 40 |
+
tokens_out.append(token)
|
| 41 |
+
fout.write(' '.join(tokens_out)+'\n')
|
| 42 |
+
formula_normalized = open(output_file).read().strip()
|
| 43 |
+
os.remove(temp_file)
|
| 44 |
+
os.remove(input_file)
|
| 45 |
+
os.remove(output_file)
|
| 46 |
+
return formula_normalized
|