Spaces:
Build error
Build error
| import ctranslate2 | |
| from subword_nmt.apply_bpe import BPE | |
| import codecs | |
| import re | |
| def apply_subwording(sample_text, model_code_path): | |
| # APPLY BPE WITH SUBWORD-NMT | |
| model = codecs.open(model_code_path, encoding='utf-8') | |
| bpe = BPE(model) | |
| subwording_text = "" | |
| for line in sample_text.splitlines(): | |
| subwording_line = bpe.process_line(line) | |
| subwording_text = subwording_text + subwording_line + "\n" | |
| return subwording_text | |
| def remove_subwording_marks(translated_text): | |
| return re.sub("@@ ", "", translated_text) | |
| def translate_nos(sample_text, model): | |
| tokenizer_model = model[0] | |
| translator_model = model[1] | |
| # Apply subwording | |
| subwording_text = apply_subwording(sample_text, tokenizer_model) | |
| # Translate entry | |
| translator = ctranslate2.Translator(translator_model, device="cpu") | |
| output ="" | |
| for line in subwording_text.splitlines(): | |
| line = line.strip() | |
| r = translator.translate_batch( | |
| [line.split()], replace_unknowns=True, beam_size=5, batch_type='examples' | |
| ) | |
| results =' '.join(r[0].hypotheses[0])+"\n" | |
| output = output + results | |
| # Remove subwording | |
| output = remove_subwording_marks(output) | |
| return output |