ykhrustalev commited on
Commit
747e7be
·
verified ·
1 Parent(s): d5c949e

Update inference configuration schema

Browse files
Files changed (1) hide show
  1. schema.json +186 -0
schema.json ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "Inference Configuration",
4
+ "description": "Configuration schema for model inference entrypoint",
5
+ "type": "object",
6
+ "required": ["schema_version", "inference_type", "load_time_parameters"],
7
+ "definitions": {
8
+ "ggufFile": {
9
+ "oneOf": [
10
+ {
11
+ "type": "string",
12
+ "pattern": "^(?!/)(?:[^/\\n]+/)*[^/\\n]+\\.gguf$",
13
+ "description": "Relative path to .gguf file (no leading slash, subdirectories allowed)"
14
+ },
15
+ {
16
+ "type": "string",
17
+ "pattern": "^/(?:[^/\\n]+/)*[^/\\n]+\\.gguf$",
18
+ "description": "Absolute path to .gguf file (leading slash required)"
19
+ },
20
+ {
21
+ "type": "string",
22
+ "pattern": "^https?://[^\\s]+\\.gguf(\\?[^\\s]*)?$",
23
+ "description": "HTTPS/HTTP URL to .gguf file (query parameters allowed)"
24
+ }
25
+ ]
26
+ },
27
+ "safetensorsFile": {
28
+ "oneOf": [
29
+ {
30
+ "type": "string",
31
+ "pattern": "^(?!/)(?:[^/\\n]+/)*[^/\\n]+\\.safetensors$",
32
+ "description": "Relative path to .safetensors file (no leading slash, subdirectories allowed)"
33
+ },
34
+ {
35
+ "type": "string",
36
+ "pattern": "^/(?:[^/\\n]+/)*[^/\\n]+\\.safetensors$",
37
+ "description": "Absolute path to .safetensors file (leading slash required)"
38
+ },
39
+ {
40
+ "type": "string",
41
+ "pattern": "^https?://[^\\s]+\\.safetensors(\\?[^\\s]*)?$",
42
+ "description": "HTTPS/HTTP URL to .safetensors file (query parameters allowed)"
43
+ }
44
+ ]
45
+ },
46
+ "samplingParameters": {
47
+ "type": "object",
48
+ "description": "Sampling configuration for text generation",
49
+ "properties": {
50
+ "temperature": {
51
+ "type": "number",
52
+ "minimum": 0.0,
53
+ "maximum": 2.0,
54
+ "description": "Sampling temperature (0.0 = deterministic, higher = more random)"
55
+ },
56
+ "top_p": {
57
+ "type": "number",
58
+ "minimum": 0.0,
59
+ "maximum": 1.0,
60
+ "description": "Nucleus sampling probability (cumulative probability threshold)"
61
+ },
62
+ "min_p": {
63
+ "type": "number",
64
+ "minimum": 0.0,
65
+ "maximum": 1.0,
66
+ "description": "Minimum probability threshold for token consideration"
67
+ },
68
+ "repetition_penalty": {
69
+ "type": "number",
70
+ "minimum": 0.0,
71
+ "maximum": 2.0,
72
+ "description": "Penalty for token repetition (1.0 = no penalty, higher = more penalty)"
73
+ }
74
+ },
75
+ "additionalProperties": false
76
+ }
77
+ },
78
+ "properties": {
79
+ "schema_version": {
80
+ "type": "string",
81
+ "enum": ["1.0.0"],
82
+ "description": "Schema version for compatibility checking and migration"
83
+ },
84
+ "inference_type": {"type": "string", "description": "Combined inference backend and type in format: backend/type"},
85
+ "load_time_parameters": {"type": "object", "description": "Parameters required at model load time"},
86
+ "generation_time_parameters": {"type": "object", "description": "Optional parameters used during generation"}
87
+ },
88
+ "allOf": [
89
+ {
90
+ "if": {"properties": {"schema_version": {"const": "1.0.0"}}},
91
+ "then": {
92
+ "properties": {
93
+ "inference_type": {"enum": ["llama.cpp/text-to-text", "llama.cpp/image-to-text", "llama.cpp/lfm2-audio-v1"]}
94
+ },
95
+ "allOf": [
96
+ {
97
+ "if": {"properties": {"inference_type": {"const": "llama.cpp/text-to-text"}}},
98
+ "then": {
99
+ "properties": {
100
+ "load_time_parameters": {
101
+ "required": ["model"],
102
+ "properties": {
103
+ "chat_template": {"type": "string", "description": "Optional chat template override"},
104
+ "model": {"$ref": "#/definitions/ggufFile", "description": "Text model file (local path or URL)"}
105
+ },
106
+ "additionalProperties": false
107
+ },
108
+ "generation_time_parameters": {
109
+ "properties": {"sampling_parameters": {"$ref": "#/definitions/samplingParameters"}},
110
+ "additionalProperties": false
111
+ }
112
+ }
113
+ }
114
+ },
115
+ {
116
+ "if": {"properties": {"inference_type": {"const": "llama.cpp/image-to-text"}}},
117
+ "then": {
118
+ "properties": {
119
+ "load_time_parameters": {
120
+ "required": ["model", "multimodal_projector"],
121
+ "properties": {
122
+ "chat_template": {"type": "string", "description": "Optional chat template override"},
123
+ "model": {
124
+ "$ref": "#/definitions/ggufFile",
125
+ "description": "Backbone model file (local path or URL)"
126
+ },
127
+ "multimodal_projector": {
128
+ "$ref": "#/definitions/ggufFile",
129
+ "description": "Multimodal projector file (local path or URL)"
130
+ }
131
+ },
132
+ "additionalProperties": false
133
+ },
134
+ "generation_time_parameters": {
135
+ "properties": {"sampling_parameters": {"$ref": "#/definitions/samplingParameters"}},
136
+ "additionalProperties": false
137
+ }
138
+ }
139
+ }
140
+ },
141
+ {
142
+ "if": {"properties": {"inference_type": {"const": "llama.cpp/lfm2-audio-v1"}}},
143
+ "then": {
144
+ "properties": {
145
+ "load_time_parameters": {
146
+ "required": ["model", "multimodal_projector", "audio_decoder", "audio_tokenizer"],
147
+ "properties": {
148
+ "chat_template": {"type": "string", "description": "Optional chat template override"},
149
+ "model": {
150
+ "$ref": "#/definitions/ggufFile",
151
+ "description": "Backbone model file (local path or URL)"
152
+ },
153
+ "multimodal_projector": {
154
+ "$ref": "#/definitions/ggufFile",
155
+ "description": "Audio encoder file (local path or URL)"
156
+ },
157
+ "audio_decoder": {
158
+ "$ref": "#/definitions/ggufFile",
159
+ "description": "Audio decoder file (local path or URL)"
160
+ },
161
+ "audio_tokenizer": {
162
+ "$ref": "#/definitions/safetensorsFile",
163
+ "description": "Audio tokenizer file (local path or URL)"
164
+ }
165
+ },
166
+ "additionalProperties": false
167
+ },
168
+ "generation_time_parameters": {
169
+ "properties": {
170
+ "sampling_parameters": {"$ref": "#/definitions/samplingParameters"},
171
+ "number_of_decoding_threads": {
172
+ "type": "integer",
173
+ "description": "Number of threads for audio decoding",
174
+ "minimum": 1
175
+ }
176
+ },
177
+ "additionalProperties": false
178
+ }
179
+ }
180
+ }
181
+ }
182
+ ]
183
+ }
184
+ }
185
+ ]
186
+ }