LiquidAI
/

LeapBundles

Model card Files Files and versions

xet

Community

ykhrustalev commited on 11 days ago

Commit

747e7be

verified ·

1 Parent(s): d5c949e

Update inference configuration schema

Browse files

Files changed (1) hide show

schema.json +186 -0

schema.json ADDED Viewed

	@@ -0,0 +1,186 @@

+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "Inference Configuration",
+  "description": "Configuration schema for model inference entrypoint",
+  "type": "object",
+  "required": ["schema_version", "inference_type", "load_time_parameters"],
+  "definitions": {
+    "ggufFile": {
+      "oneOf": [
+        {
+          "type": "string",
+          "pattern": "^(?!/)(?:[^/\\n]+/)*[^/\\n]+\\.gguf$",
+          "description": "Relative path to .gguf file (no leading slash, subdirectories allowed)"
+        },
+        {
+          "type": "string",
+          "pattern": "^/(?:[^/\\n]+/)*[^/\\n]+\\.gguf$",
+          "description": "Absolute path to .gguf file (leading slash required)"
+        },
+        {
+          "type": "string",
+          "pattern": "^https?://[^\\s]+\\.gguf(\\?[^\\s]*)?$",
+          "description": "HTTPS/HTTP URL to .gguf file (query parameters allowed)"
+        }
+      ]
+    },
+    "safetensorsFile": {
+      "oneOf": [
+        {
+          "type": "string",
+          "pattern": "^(?!/)(?:[^/\\n]+/)*[^/\\n]+\\.safetensors$",
+          "description": "Relative path to .safetensors file (no leading slash, subdirectories allowed)"
+        },
+        {
+          "type": "string",
+          "pattern": "^/(?:[^/\\n]+/)*[^/\\n]+\\.safetensors$",
+          "description": "Absolute path to .safetensors file (leading slash required)"
+        },
+        {
+          "type": "string",
+          "pattern": "^https?://[^\\s]+\\.safetensors(\\?[^\\s]*)?$",
+          "description": "HTTPS/HTTP URL to .safetensors file (query parameters allowed)"
+        }
+      ]
+    },
+    "samplingParameters": {
+      "type": "object",
+      "description": "Sampling configuration for text generation",
+      "properties": {
+        "temperature": {
+          "type": "number",
+          "minimum": 0.0,
+          "maximum": 2.0,
+          "description": "Sampling temperature (0.0 = deterministic, higher = more random)"
+        },
+        "top_p": {
+          "type": "number",
+          "minimum": 0.0,
+          "maximum": 1.0,
+          "description": "Nucleus sampling probability (cumulative probability threshold)"
+        },
+        "min_p": {
+          "type": "number",
+          "minimum": 0.0,
+          "maximum": 1.0,
+          "description": "Minimum probability threshold for token consideration"
+        },
+        "repetition_penalty": {
+          "type": "number",
+          "minimum": 0.0,
+          "maximum": 2.0,
+          "description": "Penalty for token repetition (1.0 = no penalty, higher = more penalty)"
+        }
+      },
+      "additionalProperties": false
+    }
+  },
+  "properties": {
+    "schema_version": {
+      "type": "string",
+      "enum": ["1.0.0"],
+      "description": "Schema version for compatibility checking and migration"
+    },
+    "inference_type": {"type": "string", "description": "Combined inference backend and type in format: backend/type"},
+    "load_time_parameters": {"type": "object", "description": "Parameters required at model load time"},
+    "generation_time_parameters": {"type": "object", "description": "Optional parameters used during generation"}
+  },
+  "allOf": [
+    {
+      "if": {"properties": {"schema_version": {"const": "1.0.0"}}},
+      "then": {
+        "properties": {
+          "inference_type": {"enum": ["llama.cpp/text-to-text", "llama.cpp/image-to-text", "llama.cpp/lfm2-audio-v1"]}
+        },
+        "allOf": [
+          {
+            "if": {"properties": {"inference_type": {"const": "llama.cpp/text-to-text"}}},
+            "then": {
+              "properties": {
+                "load_time_parameters": {
+                  "required": ["model"],
+                  "properties": {
+                    "chat_template": {"type": "string", "description": "Optional chat template override"},
+                    "model": {"$ref": "#/definitions/ggufFile", "description": "Text model file (local path or URL)"}
+                  },
+                  "additionalProperties": false
+                },
+                "generation_time_parameters": {
+                  "properties": {"sampling_parameters": {"$ref": "#/definitions/samplingParameters"}},
+                  "additionalProperties": false
+                }
+              }
+            }
+          },
+          {
+            "if": {"properties": {"inference_type": {"const": "llama.cpp/image-to-text"}}},
+            "then": {
+              "properties": {
+                "load_time_parameters": {
+                  "required": ["model", "multimodal_projector"],
+                  "properties": {
+                    "chat_template": {"type": "string", "description": "Optional chat template override"},
+                    "model": {
+                      "$ref": "#/definitions/ggufFile",
+                      "description": "Backbone model file (local path or URL)"
+                    },
+                    "multimodal_projector": {
+                      "$ref": "#/definitions/ggufFile",
+                      "description": "Multimodal projector file (local path or URL)"
+                    }
+                  },
+                  "additionalProperties": false
+                },
+                "generation_time_parameters": {
+                  "properties": {"sampling_parameters": {"$ref": "#/definitions/samplingParameters"}},
+                  "additionalProperties": false
+                }
+              }
+            }
+          },
+          {
+            "if": {"properties": {"inference_type": {"const": "llama.cpp/lfm2-audio-v1"}}},
+            "then": {
+              "properties": {
+                "load_time_parameters": {
+                  "required": ["model", "multimodal_projector", "audio_decoder", "audio_tokenizer"],
+                  "properties": {
+                    "chat_template": {"type": "string", "description": "Optional chat template override"},
+                    "model": {
+                      "$ref": "#/definitions/ggufFile",
+                      "description": "Backbone model file (local path or URL)"
+                    },
+                    "multimodal_projector": {
+                      "$ref": "#/definitions/ggufFile",
+                      "description": "Audio encoder file (local path or URL)"
+                    },
+                    "audio_decoder": {
+                      "$ref": "#/definitions/ggufFile",
+                      "description": "Audio decoder file (local path or URL)"
+                    },
+                    "audio_tokenizer": {
+                      "$ref": "#/definitions/safetensorsFile",
+                      "description": "Audio tokenizer file (local path or URL)"
+                    }
+                  },
+                  "additionalProperties": false
+                },
+                "generation_time_parameters": {
+                  "properties": {
+                    "sampling_parameters": {"$ref": "#/definitions/samplingParameters"},
+                    "number_of_decoding_threads": {
+                      "type": "integer",
+                      "description": "Number of threads for audio decoding",
+                      "minimum": 1
+                    }
+                  },
+                  "additionalProperties": false
+                }
+              }
+            }
+          }
+        ]
+      }
+    }
+  ]
+}