Spaces:

aziac
/

csaf-captcha

Running

App Files Files Community

aziac commited on 25 days ago

Commit

caa7998

1 Parent(s): 805ac97

pretrained method

Browse files

Files changed (8) hide show

app/main.py +64 -91
app/vocab.txt +20 -0
model/.DS_Store +0 -3
model/fingerprint.pb +0 -3
model/saved_model.pb +0 -3
model/variables/variables.data-00000-of-00001 +0 -3
model/variables/variables.index +0 -3
requirements.txt +4 -2

app/main.py CHANGED Viewed

@@ -3,51 +3,57 @@ import random
 from pathlib import Path
 import numpy as np
 import tensorflow as tf
-import keras
-from PIL import Image
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from contextlib import asynccontextmanager
 # --- Pydantic Models for Request Body ---
 class CaptchaRequest(BaseModel):
     filename: str
 # --- Global Variables ---
-# This will hold our loaded prediction model
 prediction_model = None
-# --- Configuration based on your Training Notebook ---
-# 1. CHARACTER SET
-data_dir = Path("./static/images/")
-images = sorted(list(map(str, list(data_dir.glob("*.png")))))
-labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in images]
-characters = set(char for label in labels for char in label)
-CHARACTERS = sorted(list(characters))
-# 2. IMAGE DIMENSIONS
-# These dimensions are taken directly from your notebook.
 IMG_WIDTH = 200
 IMG_HEIGHT = 50
 # --- App Lifespan Management (Model Loading) ---
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    # Code to run on startup
-    print("INFO:     Loading TensorFlow prediction model...")
-    global prediction_model
     try:
-        # NOTE: Ensure you save the `prediction_model` from your notebook,
-        # not the multi-input training `model`.
-        prediction_model = keras.layers.TFSMLayer('model', call_endpoint='serving_default')
-        print("INFO:     TensorFlow model loaded successfully.")
     except Exception as e:
-        print(f"ERROR:    Failed to load model: {e}")
         prediction_model = None
     yield
-    # Code to run on shutdown
     print("INFO:     Application shutting down.")
@@ -55,67 +61,23 @@ async def lifespan(app: FastAPI):
 app = FastAPI(lifespan=lifespan)
 # --- CORS Middleware ---
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
 # --- Constants ---
 IMAGE_DIR = Path("static/images")
-# --- Helper Functions based on your Notebook ---
-def preprocess_image(image_path):
-    """
-    Loads and preprocesses an image for model prediction based on the notebook's
-    `encode_single_sample` function.
-    """
-    try:
-        # 1. Read image, convert to grayscale
-        img = Image.open(image_path).convert('L') #
-        # 2. Resize to the desired size (width, height)
-        img = img.resize((IMG_WIDTH, IMG_HEIGHT)) #
-        # 3. Convert to numpy array of float32 in [0, 1] range
-        img = np.array(img, dtype=np.float32) / 255.0 #
-        # 4. Transpose the image because the RNN part of the model expects the time
-        # dimension to correspond to the width of the image.
-        # The notebook does this with `ops.transpose(img, axes=[1, 0, 2])`.
-        # Here, a numpy array of shape (height, width) becomes (width, height).
-        img = img.T
-        # 5. Add channel and batch dimensions
-        img = np.expand_dims(img, axis=-1) # Add channel -> (width, height, 1)
-        img = np.expand_dims(img, axis=0)  # Add batch -> (1, width, height, 1)
-        return img
-    except Exception as e:
-        print(f"Error preprocessing image {image_path}: {e}")
-        return None
-def decode_prediction(pred):
-    """
-    Decodes the raw model output into a human-readable string using CTC decoding,
-    mirroring the notebook's `decode_batch_predictions` function.
-    """
-    # 1. Get the input length (number of timesteps)
     input_len = np.ones(pred.shape[0]) * pred.shape[1]
-    # 2. Use Keras's CTC decoder (greedy search is sufficient and fast)
-    # This is equivalent to `tf.nn.ctc_greedy_decoder` used in the notebook.
-    results = tf.keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0]
-    # 3. Iterate over the results and convert back to text
-    output_text = ""
-    for res in results.numpy():
-        # The `CHARACTERS` list maps indices to characters.
-        # -1 is the default padding value from ctc_decode.
-        if res != -1 and res < len(CHARACTERS):
-            output_text += CHARACTERS[res]
     return output_text
 # --- API Endpoints ---
@@ -134,27 +96,38 @@ async def get_captcha():
 @app.post("/solve_captcha")
 async def solve_captcha(request: CaptchaRequest):
-    if prediction_model is None:
-        raise HTTPException(status_code=503, detail="Model is not loaded or failed to load.")
     image_path = IMAGE_DIR / request.filename
     if not image_path.is_file():
         raise HTTPException(status_code=404, detail=f"File '{request.filename}' not found.")
-    # Preprocess the image according to the notebook's logic
-    processed_image = preprocess_image(image_path)
-    if processed_image is None:
-        raise HTTPException(status_code=500, detail="Failed to process the image.")
     try:
-        # A TFSMLayer is a callable Keras layer.
-        # We can call it directly with our input numpy array.
-        preds = prediction_model(processed_image)
-        # Decode the prediction
-        predicted_label = decode_prediction(preds)
-        return {"prediction": predicted_label}
     except Exception as e:
         print(f"Error during prediction: {e}")
         raise HTTPException(status_code=500, detail=f"An error occurred during model inference: {e}")

 from pathlib import Path
 import numpy as np
 import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from contextlib import asynccontextmanager
+# New import for the pre-trained model
+from huggingface_hub import from_pretrained_keras
 # --- Pydantic Models for Request Body ---
 class CaptchaRequest(BaseModel):
     filename: str
 # --- Global Variables ---
 prediction_model = None
+num_to_char = None
+max_length = 5 # From your Gradio script
+# --- Configuration for the pre-trained "keras-io/ocr-for-captcha" model ---
 IMG_WIDTH = 200
 IMG_HEIGHT = 50
 # --- App Lifespan Management (Model Loading) ---
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    global prediction_model, num_to_char
     try:
+        print("INFO:     Loading pre-trained Keras model and vocab...")
+        # 1. Load the base model from Hugging Face Hub
+        base_model = from_pretrained_keras("keras-io/ocr-for-captcha", compile=False)
+        # 2. Create the inference-only prediction_model (from your Gradio script)
+        prediction_model = keras.models.Model(
+            base_model.get_layer(name="image").input, base_model.get_layer(name="dense2").output
+        )
+        # 3. Load the vocabulary from the file
+        with open("vocab.txt", "r") as f:
+            vocab = f.read().splitlines()
+        # 4. Create the character mapping layer (from your Gradio script)
+        num_to_char = layers.StringLookup(vocabulary=vocab, mask_token=None, invert=True)
+        print("INFO:     Model and vocab loaded successfully.")
     except Exception as e:
+        print(f"ERROR:    Failed to load pre-trained model or vocab: {e}")
         prediction_model = None
     yield
     print("INFO:     Application shutting down.")
 app = FastAPI(lifespan=lifespan)
 # --- CORS Middleware ---
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
 # --- Constants ---
 IMAGE_DIR = Path("static/images")
+# --- Helper Functions (from your Gradio script) ---
+def decode_batch_predictions(pred):
+    # This function is directly from your Gradio script
     input_len = np.ones(pred.shape[0]) * pred.shape[1]
+    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
+        :, :max_length
+    ]
+    output_text = []
+    for res in results:
+        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
+        output_text.append(res)
     return output_text
 # --- API Endpoints ---
 @app.post("/solve_captcha")
 async def solve_captcha(request: CaptchaRequest):
+    if prediction_model is None or num_to_char is None:
+        raise HTTPException(status_code=503, detail="Model or vocab is not loaded.")
     image_path = IMAGE_DIR / request.filename
     if not image_path.is_file():
         raise HTTPException(status_code=404, detail=f"File '{request.filename}' not found.")
     try:
+        # This core logic is taken directly from your `classify_image` function
+        # 1. Read image
+        img = tf.io.read_file(str(image_path)) # Convert Path object to string for tf.io
+        # 2. Decode and convert to grayscale
+        img = tf.io.decode_png(img, channels=1)
+        # 3. Convert to float32 in [0, 1] range
+        img = tf.image.convert_image_dtype(img, tf.float32)
+        # 4. Resize to the desired size
+        img = tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])
+        # 5. Transpose the image
+        img = tf.transpose(img, perm=[1, 0, 2])
+        # 6. Add a batch dimension
+        img = tf.expand_dims(img, axis=0)
+        # 7. Get predictions
+        preds = prediction_model.predict(img)
+        # 8. Decode the predictions
+        pred_text = decode_batch_predictions(preds)
+        # Return the first (and only) prediction
+        return {"prediction": pred_text[0]}
     except Exception as e:
         print(f"Error during prediction: {e}")
         raise HTTPException(status_code=500, detail=f"An error occurred during model inference: {e}")

app/vocab.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+[UNK]
+8
+6
+m
+x
+d
+y
+w
+2
+7
+n
+g
+5
+c
+f
+p
+e
+3
+4
+b

model/.DS_Store DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ce35a183b313defdf28e0e5a7cfb29468a17bb0d9b42f1ef75f4e366851478f7
-size 6148

model/fingerprint.pb DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:97e29e2ce27e4c2d1d1273f0cdb069a094ecdbea21a6559d82fbd34ed9c17b4b
-size 78

model/saved_model.pb DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cd3fd69880e1b68390152c8236dd3000a7abdde05867d6f2074d120dbfdd6c17
-size 269319

model/variables/variables.data-00000-of-00001 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b5bd622ed42679af9c2142e8c79621c1fe209608c3061414e1869c207df6b609
-size 3467858

model/variables/variables.index DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a326c821608827e2a07f3ccb56d4b74ac4b172245d5b97a9d23832a6fd87ea37
-size 2907

requirements.txt CHANGED Viewed

@@ -1,6 +1,8 @@
 fastapi
 uvicorn[standard]
 python-multipart
-tensorflow
-numpy
 Pillow

 fastapi
 uvicorn[standard]
 python-multipart
+tensorflow>=2.6,<2.15
+keras<3.0.0
+huggingface_hub
+numpy<2
 Pillow