Update app.py
Browse files
app.py
CHANGED
|
@@ -56,6 +56,7 @@ class Translators:
|
|
| 56 |
self.sl, self.tl = sl, tl
|
| 57 |
self.input_text = input_text
|
| 58 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
| 59 |
|
| 60 |
def google(self):
|
| 61 |
# for rep in ('\r\n', '\r', '\n', ' '):
|
|
@@ -242,6 +243,35 @@ class Translators:
|
|
| 242 |
except Exception as error:
|
| 243 |
return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
|
| 244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
def salamandratapipe(self):
|
| 246 |
pipe = pipeline("text-generation", model=self.model_name)
|
| 247 |
messages = [{"role": "user", "content": f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text} \n{self.tl}:"}]
|
|
@@ -631,7 +661,7 @@ def translate_text(model_name: str, s_language: str, t_language: str, input_text
|
|
| 631 |
translated_text = Translators(model_name, s_language, t_language, input_text).salamandratapipe()
|
| 632 |
|
| 633 |
elif "rosetta" in model_name.lower():
|
| 634 |
-
translated_text = Translators(model_name, s_language, t_language, input_text).
|
| 635 |
|
| 636 |
elif "small100" in model_name.lower():
|
| 637 |
translated_text = Translators(model_name, sl, tl, input_text).smallonehundred()
|
|
|
|
| 56 |
self.sl, self.tl = sl, tl
|
| 57 |
self.input_text = input_text
|
| 58 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 59 |
+
self.max_new_tokens = 512
|
| 60 |
|
| 61 |
def google(self):
|
| 62 |
# for rep in ('\r\n', '\r', '\n', ' '):
|
|
|
|
| 243 |
except Exception as error:
|
| 244 |
return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
|
| 245 |
|
| 246 |
+
def rosetta(self):
|
| 247 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 248 |
+
self.model_name,
|
| 249 |
+
dtype=torch.bfloat16,
|
| 250 |
+
device_map="auto",
|
| 251 |
+
max_memory={0: "16GB"},
|
| 252 |
+
)
|
| 253 |
+
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 254 |
+
context = {"tone": "Formal and precise"}
|
| 255 |
+
system = f"Translate the user's text to {self.tl}. Provide the final translation in a formal tone immediately immediately without any other text."
|
| 256 |
+
messages = [
|
| 257 |
+
{"role": "system", "content": system},
|
| 258 |
+
{"role": "user", "content": self.input_text},
|
| 259 |
+
]
|
| 260 |
+
|
| 261 |
+
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 262 |
+
print(prompt)
|
| 263 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(self.device)
|
| 264 |
+
input_length = inputs["input_ids"].shape[1]
|
| 265 |
+
with torch.inference_mode():
|
| 266 |
+
outputs = model.generate(
|
| 267 |
+
**inputs,
|
| 268 |
+
max_new_tokens=self.max_new_tokens,
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
generated_tokens = outputs[0][input_length:]
|
| 272 |
+
translation = tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
| 273 |
+
return translation
|
| 274 |
+
|
| 275 |
def salamandratapipe(self):
|
| 276 |
pipe = pipeline("text-generation", model=self.model_name)
|
| 277 |
messages = [{"role": "user", "content": f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text} \n{self.tl}:"}]
|
|
|
|
| 661 |
translated_text = Translators(model_name, s_language, t_language, input_text).salamandratapipe()
|
| 662 |
|
| 663 |
elif "rosetta" in model_name.lower():
|
| 664 |
+
translated_text = Translators(model_name, s_language, t_language, input_text).rosetta()
|
| 665 |
|
| 666 |
elif "small100" in model_name.lower():
|
| 667 |
translated_text = Translators(model_name, sl, tl, input_text).smallonehundred()
|