chironjit45 commited on
Commit
1304fd0
·
1 Parent(s): 089243c

add all need file

Browse files
Files changed (5) hide show
  1. Dockerfile +27 -0
  2. TextAPI.py +39 -0
  3. app.py +50 -0
  4. mt_model.pth +3 -0
  5. requirements.txt +11 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && \
5
+ apt-get install -y build-essential libffi-dev git && \
6
+ rm -rf /var/lib/apt/lists/*
7
+
8
+ # Create non-root user
9
+ RUN useradd -m -u 1000 user
10
+ USER user
11
+ ENV PATH="/home/user/.local/bin:$PATH"
12
+
13
+ WORKDIR /app
14
+
15
+ # Copy requirements and install
16
+ COPY --chown=user requirements.txt requirements.txt
17
+ RUN python -m pip install --upgrade pip
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy app and model
21
+ COPY --chown=user . /app
22
+
23
+ # Expose port
24
+ EXPOSE 7860
25
+
26
+ # Run FastAPI app
27
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
TextAPI.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+
4
+ MT_PRETRAINED_MODEL_NAME = "shhossain/opus-mt-en-to-bn"
5
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
6
+ MAX_LENGTH = 128
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(MT_PRETRAINED_MODEL_NAME)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(MT_PRETRAINED_MODEL_NAME)
10
+ state_dict = torch.load("mt_model_weights.pt", map_location=DEVICE)
11
+ model.load_state_dict(state_dict, strict=False)
12
+ model.to(DEVICE)
13
+ model.eval()
14
+
15
+ def call_translate_api(text: str) -> dict:
16
+ try:
17
+ input_ids = tokenizer(text, return_tensors="pt", padding="max_length",
18
+ truncation=True, max_length=MAX_LENGTH).input_ids.to(DEVICE)
19
+ with torch.no_grad():
20
+ output_tokens = model.generate(input_ids, max_length=MAX_LENGTH,
21
+ num_beams=4, early_stopping=True)
22
+ translation = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
23
+ return {"status": "success", "translation": translation, "raw_response": output_tokens.tolist()}
24
+ except Exception as e:
25
+ return {"status": "error", "error": str(e)}
26
+
27
+ if __name__ == "__main__":
28
+ print("===== English → Bangla Translator =====")
29
+ while True:
30
+ text = input("\nEnter English text: ")
31
+ if text.strip().lower() == "exit":
32
+ print("Exiting translator. Goodbye!")
33
+ break
34
+
35
+ result = call_translate_api(text)
36
+ if result["status"] == "success":
37
+ print(f"Bangla Translation: {result['translation']}")
38
+ else:
39
+ print(f"Error: {result['error']}")
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+
5
+ # ---------------- CONFIG ---------------- #
6
+ mt_pretrained_model_name = "shhossain/opus-mt-en-to-bn" # base architecture
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+ MAX_LENGTH = 128
9
+
10
+ # ---------------- LOAD TOKENIZER ---------------- #
11
+ tokenizer = AutoTokenizer.from_pretrained(mt_pretrained_model_name)
12
+
13
+ # ---------------- LOAD MODEL + YOUR WEIGHTS ---------------- #
14
+ # Load the base pretrained model
15
+ model = AutoModelForSeq2SeqLM.from_pretrained(mt_pretrained_model_name)
16
+
17
+ # Load your fine-tuned weights (must be in the same folder as app.py)
18
+ state_dict = torch.load("mt_model.pth", map_location=device)
19
+ model.load_state_dict(state_dict, strict=False) # strict=False = ignore extra keys
20
+ model.to(device)
21
+ model.eval()
22
+
23
+ # ---------------- TRANSLATION FUNCTION ---------------- #
24
+ def translate_english_to_bangla(sentence: str) -> str:
25
+ input_ids = tokenizer(
26
+ sentence,
27
+ return_tensors="pt",
28
+ padding="max_length",
29
+ truncation=True,
30
+ max_length=MAX_LENGTH
31
+ ).input_ids.to(device)
32
+
33
+ with torch.no_grad():
34
+ output_tokens = model.generate(
35
+ input_ids,
36
+ max_length=MAX_LENGTH,
37
+ num_beams=4,
38
+ early_stopping=True
39
+ )
40
+
41
+ return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
42
+
43
+ # ---------------- GRADIO INTERFACE ---------------- #
44
+ gr.Interface(
45
+ fn=translate_english_to_bangla,
46
+ inputs=gr.Textbox(lines=3, placeholder="Enter your English sentence...", label="English Text"),
47
+ outputs=gr.Textbox(label="Bangla Translation"),
48
+ title="English to Bangla Translator(Fine Tuning)",
49
+ description=""
50
+ ).launch(server_name="0.0.0.0", server_port=7860)
mt_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e664f12133a2750bdaa0075dcb39ce2a710a3f8fde1f7e15341bd758e16d3995
3
+ size 305476421
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio
4
+ pydantic
5
+ sentencepiece
6
+ numpy
7
+ fastapi
8
+ uvicorn
9
+ sacremoses
10
+
11
+