Spaces:

aziac
/

csaf-captcha

Sleeping

App Files Files Community

aziac commited on Oct 7

Commit

6bc7bcc

1 Parent(s): 7017186

added solve captcha endpoint

Browse files

Files changed (9) hide show

.DS_Store +0 -0
.gitattributes +3 -0
app/main.py +127 -14
model/.DS_Store +3 -0
model/fingerprint.pb +3 -0
model/saved_model.pb +3 -0
model/variables/variables.data-00000-of-00001 +3 -0
model/variables/variables.index +3 -0
requirements.txt +4 -1

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model/saved_model.pb filter=lfs diff=lfs merge=lfs -text
+model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+model/** filter=lfs diff=lfs merge=lfs -text

app/main.py CHANGED Viewed

@@ -1,11 +1,57 @@
 import os
 import random
 from pathlib import Path
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-# Initialize the FastAPI app
-app = FastAPI()
 # --- CORS Middleware ---
 app.add_middleware(
@@ -17,30 +63,97 @@ app.add_middleware(
 )
 # --- Constants ---
-# Define the path to the directory containing captcha images
 IMAGE_DIR = Path("static/images")
 # --- API Endpoints ---
 @app.get("/")
 async def read_root():
-    """A simple root endpoint to check if the API is running."""
     return {"message": "Welcome to the Captcha Solver API!"}
 @app.get("/get_captcha")
 async def get_captcha():
-    """
-    Returns the filename of a random captcha image from the static/images directory.
-    """
     if not IMAGE_DIR.is_dir():
-        raise HTTPException(status_code=500, detail="Image directory not found on server.")
-    image_files = [f for f in os.listdir(IMAGE_DIR) if f.endswith(('.png'))]
     if not image_files:
         raise HTTPException(status_code=404, detail="No captcha images found.")
-    random_image_filename = random.choice(image_files)
-    return {"filename": random_image_filename}

 import os
 import random
 from pathlib import Path
+import numpy as np
+import tensorflow as tf
+from PIL import Image
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from contextlib import asynccontextmanager
+# --- Pydantic Models for Request Body ---
+class CaptchaRequest(BaseModel):
+    filename: str
+# --- Global Variables ---
+# This will hold our loaded prediction model
+prediction_model = None
+# --- Configuration based on your Training Notebook ---
+# 1. CHARACTER SET
+data_dir = Path("./static/images/")
+images = sorted(list(map(str, list(data_dir.glob("*.png")))))
+labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in images]
+characters = set(char for label in labels for char in label)
+CHARACTERS = sorted(list(characters))
+# 2. IMAGE DIMENSIONS
+# These dimensions are taken directly from your notebook.
+IMG_WIDTH = 200
+IMG_HEIGHT = 50
+# --- App Lifespan Management (Model Loading) ---
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Code to run on startup
+    print("INFO:     Loading TensorFlow prediction model...")
+    global prediction_model
+    try:
+        # NOTE: Ensure you save the `prediction_model` from your notebook,
+        # not the multi-input training `model`.
+        prediction_model = tf.saved_model.load('model')
+        print("INFO:     TensorFlow model loaded successfully.")
+    except Exception as e:
+        print(f"ERROR:    Failed to load model: {e}")
+        prediction_model = None
+    yield
+    # Code to run on shutdown
+    print("INFO:     Application shutting down.")
+# Initialize the FastAPI app with the lifespan manager
+app = FastAPI(lifespan=lifespan)
 # --- CORS Middleware ---
 app.add_middleware(
 )
 # --- Constants ---
 IMAGE_DIR = Path("static/images")
+# --- Helper Functions based on your Notebook ---
+def preprocess_image(image_path):
+    """
+    Loads and preprocesses an image for model prediction based on the notebook's
+    `encode_single_sample` function.
+    """
+    try:
+        # 1. Read image, convert to grayscale
+        img = Image.open(image_path).convert('L') #
+        # 2. Resize to the desired size (width, height)
+        img = img.resize((IMG_WIDTH, IMG_HEIGHT)) #
+        # 3. Convert to numpy array of float32 in [0, 1] range
+        img = np.array(img, dtype=np.float32) / 255.0 #
+        # 4. Transpose the image because the RNN part of the model expects the time
+        # dimension to correspond to the width of the image.
+        # The notebook does this with `ops.transpose(img, axes=[1, 0, 2])`.
+        # Here, a numpy array of shape (height, width) becomes (width, height).
+        img = img.T
+        # 5. Add channel and batch dimensions
+        img = np.expand_dims(img, axis=-1) # Add channel -> (width, height, 1)
+        img = np.expand_dims(img, axis=0)  # Add batch -> (1, width, height, 1)
+        return img
+    except Exception as e:
+        print(f"Error preprocessing image {image_path}: {e}")
+        return None
+def decode_prediction(pred):
+    """
+    Decodes the raw model output into a human-readable string using CTC decoding,
+    mirroring the notebook's `decode_batch_predictions` function.
+    """
+    # 1. Get the input length (number of timesteps)
+    input_len = np.ones(pred.shape[0]) * pred.shape[1]
+    # 2. Use Keras's CTC decoder (greedy search is sufficient and fast)
+    # This is equivalent to `tf.nn.ctc_greedy_decoder` used in the notebook.
+    results = tf.keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0]
+    # 3. Iterate over the results and convert back to text
+    output_text = ""
+    for res in results.numpy():
+        # The `CHARACTERS` list maps indices to characters.
+        # -1 is the default padding value from ctc_decode.
+        if res != -1 and res < len(CHARACTERS):
+            output_text += CHARACTERS[res]
+    return output_text
 # --- API Endpoints ---
 @app.get("/")
 async def read_root():
     return {"message": "Welcome to the Captcha Solver API!"}
 @app.get("/get_captcha")
 async def get_captcha():
     if not IMAGE_DIR.is_dir():
+        raise HTTPException(status_code=500, detail="Image directory not found.")
+    image_files = [f for f in os.listdir(IMAGE_DIR) if f.endswith(('.png', '.jpg', '.jpeg'))]
     if not image_files:
         raise HTTPException(status_code=404, detail="No captcha images found.")
+    return {"filename": random.choice(image_files)}
+@app.post("/solve_captcha")
+async def solve_captcha(request: CaptchaRequest):
+    if prediction_model is None:
+        raise HTTPException(status_code=503, detail="Model is not loaded or failed to load.")
+    image_path = IMAGE_DIR / request.filename
+    if not image_path.is_file():
+        raise HTTPException(status_code=404, detail=f"File '{request.filename}' not found.")
+    # Preprocess the image according to the notebook's logic
+    processed_image = preprocess_image(image_path)
+    if processed_image is None:
+        raise HTTPException(status_code=500, detail="Failed to process the image.")
+    try:
+        # Get model prediction by calling the loaded model directly
+        # The `prediction_model` from the notebook expects only the image as input.
+        preds = prediction_model(tf.constant(processed_image))
+        # Decode the prediction
+        predicted_label = decode_prediction(preds)
+        return {"prediction": predicted_label}
+    except Exception as e:
+        print(f"Error during prediction: {e}")
+        raise HTTPException(status_code=500, detail=f"An error occurred during model inference: {e}")

model/.DS_Store ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce35a183b313defdf28e0e5a7cfb29468a17bb0d9b42f1ef75f4e366851478f7
+size 6148

model/fingerprint.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:97e29e2ce27e4c2d1d1273f0cdb069a094ecdbea21a6559d82fbd34ed9c17b4b
+size 78

model/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd3fd69880e1b68390152c8236dd3000a7abdde05867d6f2074d120dbfdd6c17
+size 269319

model/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5bd622ed42679af9c2142e8c79621c1fe209608c3061414e1869c207df6b609
+size 3467858

model/variables/variables.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a326c821608827e2a07f3ccb56d4b74ac4b172245d5b97a9d23832a6fd87ea37
+size 2907

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
 fastapi
 uvicorn[standard]
-python-multipart

 fastapi
 uvicorn[standard]
+python-multipart
+tensorflow
+numpy
+Pillow