|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
|
|
|
class GrammarCorrector:
|
|
|
def __init__(self, model_name="vennify/t5-base-grammar-correction"):
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
|
|
|
|
|
def correct(self, text):
|
|
|
input_text = "gec: " + text
|
|
|
input_ids = self.tokenizer.encode(input_text, return_tensors="pt")
|
|
|
outputs = self.model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True)
|
|
|
corrected_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
return corrected_text
|
|
|
|