LeapBundles / schema.json
ykhrustalev's picture
Update inference configuration schema
747e7be verified
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Inference Configuration",
"description": "Configuration schema for model inference entrypoint",
"type": "object",
"required": ["schema_version", "inference_type", "load_time_parameters"],
"definitions": {
"ggufFile": {
"oneOf": [
{
"type": "string",
"pattern": "^(?!/)(?:[^/\\n]+/)*[^/\\n]+\\.gguf$",
"description": "Relative path to .gguf file (no leading slash, subdirectories allowed)"
},
{
"type": "string",
"pattern": "^/(?:[^/\\n]+/)*[^/\\n]+\\.gguf$",
"description": "Absolute path to .gguf file (leading slash required)"
},
{
"type": "string",
"pattern": "^https?://[^\\s]+\\.gguf(\\?[^\\s]*)?$",
"description": "HTTPS/HTTP URL to .gguf file (query parameters allowed)"
}
]
},
"safetensorsFile": {
"oneOf": [
{
"type": "string",
"pattern": "^(?!/)(?:[^/\\n]+/)*[^/\\n]+\\.safetensors$",
"description": "Relative path to .safetensors file (no leading slash, subdirectories allowed)"
},
{
"type": "string",
"pattern": "^/(?:[^/\\n]+/)*[^/\\n]+\\.safetensors$",
"description": "Absolute path to .safetensors file (leading slash required)"
},
{
"type": "string",
"pattern": "^https?://[^\\s]+\\.safetensors(\\?[^\\s]*)?$",
"description": "HTTPS/HTTP URL to .safetensors file (query parameters allowed)"
}
]
},
"samplingParameters": {
"type": "object",
"description": "Sampling configuration for text generation",
"properties": {
"temperature": {
"type": "number",
"minimum": 0.0,
"maximum": 2.0,
"description": "Sampling temperature (0.0 = deterministic, higher = more random)"
},
"top_p": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"description": "Nucleus sampling probability (cumulative probability threshold)"
},
"min_p": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"description": "Minimum probability threshold for token consideration"
},
"repetition_penalty": {
"type": "number",
"minimum": 0.0,
"maximum": 2.0,
"description": "Penalty for token repetition (1.0 = no penalty, higher = more penalty)"
}
},
"additionalProperties": false
}
},
"properties": {
"schema_version": {
"type": "string",
"enum": ["1.0.0"],
"description": "Schema version for compatibility checking and migration"
},
"inference_type": {"type": "string", "description": "Combined inference backend and type in format: backend/type"},
"load_time_parameters": {"type": "object", "description": "Parameters required at model load time"},
"generation_time_parameters": {"type": "object", "description": "Optional parameters used during generation"}
},
"allOf": [
{
"if": {"properties": {"schema_version": {"const": "1.0.0"}}},
"then": {
"properties": {
"inference_type": {"enum": ["llama.cpp/text-to-text", "llama.cpp/image-to-text", "llama.cpp/lfm2-audio-v1"]}
},
"allOf": [
{
"if": {"properties": {"inference_type": {"const": "llama.cpp/text-to-text"}}},
"then": {
"properties": {
"load_time_parameters": {
"required": ["model"],
"properties": {
"chat_template": {"type": "string", "description": "Optional chat template override"},
"model": {"$ref": "#/definitions/ggufFile", "description": "Text model file (local path or URL)"}
},
"additionalProperties": false
},
"generation_time_parameters": {
"properties": {"sampling_parameters": {"$ref": "#/definitions/samplingParameters"}},
"additionalProperties": false
}
}
}
},
{
"if": {"properties": {"inference_type": {"const": "llama.cpp/image-to-text"}}},
"then": {
"properties": {
"load_time_parameters": {
"required": ["model", "multimodal_projector"],
"properties": {
"chat_template": {"type": "string", "description": "Optional chat template override"},
"model": {
"$ref": "#/definitions/ggufFile",
"description": "Backbone model file (local path or URL)"
},
"multimodal_projector": {
"$ref": "#/definitions/ggufFile",
"description": "Multimodal projector file (local path or URL)"
}
},
"additionalProperties": false
},
"generation_time_parameters": {
"properties": {"sampling_parameters": {"$ref": "#/definitions/samplingParameters"}},
"additionalProperties": false
}
}
}
},
{
"if": {"properties": {"inference_type": {"const": "llama.cpp/lfm2-audio-v1"}}},
"then": {
"properties": {
"load_time_parameters": {
"required": ["model", "multimodal_projector", "audio_decoder", "audio_tokenizer"],
"properties": {
"chat_template": {"type": "string", "description": "Optional chat template override"},
"model": {
"$ref": "#/definitions/ggufFile",
"description": "Backbone model file (local path or URL)"
},
"multimodal_projector": {
"$ref": "#/definitions/ggufFile",
"description": "Audio encoder file (local path or URL)"
},
"audio_decoder": {
"$ref": "#/definitions/ggufFile",
"description": "Audio decoder file (local path or URL)"
},
"audio_tokenizer": {
"$ref": "#/definitions/safetensorsFile",
"description": "Audio tokenizer file (local path or URL)"
}
},
"additionalProperties": false
},
"generation_time_parameters": {
"properties": {
"sampling_parameters": {"$ref": "#/definitions/samplingParameters"},
"number_of_decoding_threads": {
"type": "integer",
"description": "Number of threads for audio decoding",
"minimum": 1
}
},
"additionalProperties": false
}
}
}
}
]
}
}
]
}