TiberiuCristianLeon commited on
Commit
94663ef
·
verified ·
1 Parent(s): f4602df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -1
app.py CHANGED
@@ -56,6 +56,7 @@ class Translators:
56
  self.sl, self.tl = sl, tl
57
  self.input_text = input_text
58
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
59
 
60
  def google(self):
61
  # for rep in ('\r\n', '\r', '\n', ' '):
@@ -242,6 +243,35 @@ class Translators:
242
  except Exception as error:
243
  return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  def salamandratapipe(self):
246
  pipe = pipeline("text-generation", model=self.model_name)
247
  messages = [{"role": "user", "content": f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text} \n{self.tl}:"}]
@@ -631,7 +661,7 @@ def translate_text(model_name: str, s_language: str, t_language: str, input_text
631
  translated_text = Translators(model_name, s_language, t_language, input_text).salamandratapipe()
632
 
633
  elif "rosetta" in model_name.lower():
634
- translated_text = Translators(model_name, s_language, t_language, input_text).simplepipe()
635
 
636
  elif "small100" in model_name.lower():
637
  translated_text = Translators(model_name, sl, tl, input_text).smallonehundred()
 
56
  self.sl, self.tl = sl, tl
57
  self.input_text = input_text
58
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
59
+ self.max_new_tokens = 512
60
 
61
  def google(self):
62
  # for rep in ('\r\n', '\r', '\n', ' '):
 
243
  except Exception as error:
244
  return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
245
 
246
+ def rosetta(self):
247
+ model = AutoModelForCausalLM.from_pretrained(
248
+ self.model_name,
249
+ dtype=torch.bfloat16,
250
+ device_map="auto",
251
+ max_memory={0: "16GB"},
252
+ )
253
+ tokenizer = AutoTokenizer.from_pretrained(self.model_name)
254
+ context = {"tone": "Formal and precise"}
255
+ system = f"Translate the user's text to {self.tl}. Provide the final translation in a formal tone immediately immediately without any other text."
256
+ messages = [
257
+ {"role": "system", "content": system},
258
+ {"role": "user", "content": self.input_text},
259
+ ]
260
+
261
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
262
+ print(prompt)
263
+ inputs = tokenizer(prompt, return_tensors="pt").to(self.device)
264
+ input_length = inputs["input_ids"].shape[1]
265
+ with torch.inference_mode():
266
+ outputs = model.generate(
267
+ **inputs,
268
+ max_new_tokens=self.max_new_tokens,
269
+ )
270
+
271
+ generated_tokens = outputs[0][input_length:]
272
+ translation = tokenizer.decode(generated_tokens, skip_special_tokens=True)
273
+ return translation
274
+
275
  def salamandratapipe(self):
276
  pipe = pipeline("text-generation", model=self.model_name)
277
  messages = [{"role": "user", "content": f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text} \n{self.tl}:"}]
 
661
  translated_text = Translators(model_name, s_language, t_language, input_text).salamandratapipe()
662
 
663
  elif "rosetta" in model_name.lower():
664
+ translated_text = Translators(model_name, s_language, t_language, input_text).rosetta()
665
 
666
  elif "small100" in model_name.lower():
667
  translated_text = Translators(model_name, sl, tl, input_text).smallonehundred()