| { | |
| "$schema": "http://json-schema.org/draft-07/schema#", | |
| "title": "Inference Configuration", | |
| "description": "Configuration schema for model inference entrypoint", | |
| "type": "object", | |
| "required": ["schema_version", "inference_type", "load_time_parameters"], | |
| "definitions": { | |
| "ggufFile": { | |
| "oneOf": [ | |
| { | |
| "type": "string", | |
| "pattern": "^(?!/)(?:[^/\\n]+/)*[^/\\n]+\\.gguf$", | |
| "description": "Relative path to .gguf file (no leading slash, subdirectories allowed)" | |
| }, | |
| { | |
| "type": "string", | |
| "pattern": "^/(?:[^/\\n]+/)*[^/\\n]+\\.gguf$", | |
| "description": "Absolute path to .gguf file (leading slash required)" | |
| }, | |
| { | |
| "type": "string", | |
| "pattern": "^https?://[^\\s]+\\.gguf(\\?[^\\s]*)?$", | |
| "description": "HTTPS/HTTP URL to .gguf file (query parameters allowed)" | |
| } | |
| ] | |
| }, | |
| "safetensorsFile": { | |
| "oneOf": [ | |
| { | |
| "type": "string", | |
| "pattern": "^(?!/)(?:[^/\\n]+/)*[^/\\n]+\\.safetensors$", | |
| "description": "Relative path to .safetensors file (no leading slash, subdirectories allowed)" | |
| }, | |
| { | |
| "type": "string", | |
| "pattern": "^/(?:[^/\\n]+/)*[^/\\n]+\\.safetensors$", | |
| "description": "Absolute path to .safetensors file (leading slash required)" | |
| }, | |
| { | |
| "type": "string", | |
| "pattern": "^https?://[^\\s]+\\.safetensors(\\?[^\\s]*)?$", | |
| "description": "HTTPS/HTTP URL to .safetensors file (query parameters allowed)" | |
| } | |
| ] | |
| }, | |
| "samplingParameters": { | |
| "type": "object", | |
| "description": "Sampling configuration for text generation", | |
| "properties": { | |
| "temperature": { | |
| "type": "number", | |
| "minimum": 0.0, | |
| "maximum": 2.0, | |
| "description": "Sampling temperature (0.0 = deterministic, higher = more random)" | |
| }, | |
| "top_p": { | |
| "type": "number", | |
| "minimum": 0.0, | |
| "maximum": 1.0, | |
| "description": "Nucleus sampling probability (cumulative probability threshold)" | |
| }, | |
| "min_p": { | |
| "type": "number", | |
| "minimum": 0.0, | |
| "maximum": 1.0, | |
| "description": "Minimum probability threshold for token consideration" | |
| }, | |
| "repetition_penalty": { | |
| "type": "number", | |
| "minimum": 0.0, | |
| "maximum": 2.0, | |
| "description": "Penalty for token repetition (1.0 = no penalty, higher = more penalty)" | |
| } | |
| }, | |
| "additionalProperties": false | |
| } | |
| }, | |
| "properties": { | |
| "schema_version": { | |
| "type": "string", | |
| "enum": ["1.0.0"], | |
| "description": "Schema version for compatibility checking and migration" | |
| }, | |
| "inference_type": {"type": "string", "description": "Combined inference backend and type in format: backend/type"}, | |
| "load_time_parameters": {"type": "object", "description": "Parameters required at model load time"}, | |
| "generation_time_parameters": {"type": "object", "description": "Optional parameters used during generation"} | |
| }, | |
| "allOf": [ | |
| { | |
| "if": {"properties": {"schema_version": {"const": "1.0.0"}}}, | |
| "then": { | |
| "properties": { | |
| "inference_type": {"enum": ["llama.cpp/text-to-text", "llama.cpp/image-to-text", "llama.cpp/lfm2-audio-v1"]} | |
| }, | |
| "allOf": [ | |
| { | |
| "if": {"properties": {"inference_type": {"const": "llama.cpp/text-to-text"}}}, | |
| "then": { | |
| "properties": { | |
| "load_time_parameters": { | |
| "required": ["model"], | |
| "properties": { | |
| "chat_template": {"type": "string", "description": "Optional chat template override"}, | |
| "model": {"$ref": "#/definitions/ggufFile", "description": "Text model file (local path or URL)"} | |
| }, | |
| "additionalProperties": false | |
| }, | |
| "generation_time_parameters": { | |
| "properties": {"sampling_parameters": {"$ref": "#/definitions/samplingParameters"}}, | |
| "additionalProperties": false | |
| } | |
| } | |
| } | |
| }, | |
| { | |
| "if": {"properties": {"inference_type": {"const": "llama.cpp/image-to-text"}}}, | |
| "then": { | |
| "properties": { | |
| "load_time_parameters": { | |
| "required": ["model", "multimodal_projector"], | |
| "properties": { | |
| "chat_template": {"type": "string", "description": "Optional chat template override"}, | |
| "model": { | |
| "$ref": "#/definitions/ggufFile", | |
| "description": "Backbone model file (local path or URL)" | |
| }, | |
| "multimodal_projector": { | |
| "$ref": "#/definitions/ggufFile", | |
| "description": "Multimodal projector file (local path or URL)" | |
| } | |
| }, | |
| "additionalProperties": false | |
| }, | |
| "generation_time_parameters": { | |
| "properties": {"sampling_parameters": {"$ref": "#/definitions/samplingParameters"}}, | |
| "additionalProperties": false | |
| } | |
| } | |
| } | |
| }, | |
| { | |
| "if": {"properties": {"inference_type": {"const": "llama.cpp/lfm2-audio-v1"}}}, | |
| "then": { | |
| "properties": { | |
| "load_time_parameters": { | |
| "required": ["model", "multimodal_projector", "audio_decoder", "audio_tokenizer"], | |
| "properties": { | |
| "chat_template": {"type": "string", "description": "Optional chat template override"}, | |
| "model": { | |
| "$ref": "#/definitions/ggufFile", | |
| "description": "Backbone model file (local path or URL)" | |
| }, | |
| "multimodal_projector": { | |
| "$ref": "#/definitions/ggufFile", | |
| "description": "Audio encoder file (local path or URL)" | |
| }, | |
| "audio_decoder": { | |
| "$ref": "#/definitions/ggufFile", | |
| "description": "Audio decoder file (local path or URL)" | |
| }, | |
| "audio_tokenizer": { | |
| "$ref": "#/definitions/safetensorsFile", | |
| "description": "Audio tokenizer file (local path or URL)" | |
| } | |
| }, | |
| "additionalProperties": false | |
| }, | |
| "generation_time_parameters": { | |
| "properties": { | |
| "sampling_parameters": {"$ref": "#/definitions/samplingParameters"}, | |
| "number_of_decoding_threads": { | |
| "type": "integer", | |
| "description": "Number of threads for audio decoding", | |
| "minimum": 1 | |
| } | |
| }, | |
| "additionalProperties": false | |
| } | |
| } | |
| } | |
| } | |
| ] | |
| } | |
| } | |
| ] | |
| } | |