1use super::BaseMetadata;
4use chat_prompts::PromptTemplateType;
5use serde::{Deserialize, Serialize};
6use std::path::{Path, PathBuf};
7
8#[derive(Debug)]
10pub struct GgmlMetadataBuilder {
11 metadata: GgmlMetadata,
12}
13impl GgmlMetadataBuilder {
14 pub fn new<S: Into<String>>(model_name: S, model_alias: S, pt: PromptTemplateType) -> Self {
15 let metadata = GgmlMetadata {
16 model_name: model_name.into(),
17 model_alias: model_alias.into(),
18 prompt_template: pt,
19 ..Default::default()
20 };
21
22 Self { metadata }
23 }
24
25 pub fn with_prompt_template(mut self, template: PromptTemplateType) -> Self {
26 self.metadata.prompt_template = template;
27 self
28 }
29
30 pub fn enable_plugin_log(mut self, enable: bool) -> Self {
31 self.metadata.log_enable = enable;
32 self
33 }
34
35 pub fn enable_debug_log(mut self, enable: bool) -> Self {
36 self.metadata.debug_log = enable;
37 self
38 }
39
40 pub fn enable_prompts_log(mut self, enable: bool) -> Self {
41 self.metadata.log_prompts = enable;
42 self
43 }
44
45 pub fn enable_embeddings(mut self, enable: bool) -> Self {
46 self.metadata.embeddings = enable;
47 self
48 }
49
50 pub fn with_n_predict(mut self, n: i32) -> Self {
51 self.metadata.n_predict = n;
52 self
53 }
54
55 pub fn with_main_gpu(mut self, gpu: Option<u64>) -> Self {
56 self.metadata.main_gpu = gpu;
57 self
58 }
59
60 pub fn with_tensor_split(mut self, split: Option<String>) -> Self {
61 self.metadata.tensor_split = split;
62 self
63 }
64
65 pub fn with_threads(mut self, threads: u64) -> Self {
66 self.metadata.threads = threads;
67 self
68 }
69
70 pub fn with_reverse_prompt(mut self, prompt: Option<String>) -> Self {
71 self.metadata.reverse_prompt = prompt;
72 self
73 }
74
75 pub fn with_mmproj(mut self, path: Option<String>) -> Self {
76 self.metadata.mmproj = path;
77 self
78 }
79
80 pub fn with_image(mut self, path: impl Into<String>) -> Self {
81 self.metadata.image = Some(path.into());
82 self
83 }
84
85 pub fn with_n_gpu_layers(mut self, n: u64) -> Self {
86 self.metadata.n_gpu_layers = n;
87 self
88 }
89
90 pub fn disable_mmap(mut self, disable: Option<bool>) -> Self {
91 self.metadata.use_mmap = disable.map(|v| !v);
92 self
93 }
94
95 pub fn with_split_mode(mut self, mode: String) -> Self {
96 self.metadata.split_mode = mode;
97 self
98 }
99
100 pub fn with_ctx_size(mut self, size: u64) -> Self {
101 self.metadata.ctx_size = size;
102 self
103 }
104
105 pub fn with_batch_size(mut self, size: u64) -> Self {
106 self.metadata.batch_size = size;
107 self
108 }
109
110 pub fn with_ubatch_size(mut self, size: u64) -> Self {
111 self.metadata.ubatch_size = size;
112 self
113 }
114
115 pub fn with_temperature(mut self, temp: f64) -> Self {
116 self.metadata.temperature = temp;
117 self
118 }
119
120 pub fn with_top_p(mut self, top_p: f64) -> Self {
121 self.metadata.top_p = top_p;
122 self
123 }
124
125 pub fn with_repeat_penalty(mut self, penalty: f64) -> Self {
126 self.metadata.repeat_penalty = penalty;
127 self
128 }
129
130 pub fn with_presence_penalty(mut self, penalty: f64) -> Self {
131 self.metadata.presence_penalty = penalty;
132 self
133 }
134
135 pub fn with_frequency_penalty(mut self, penalty: f64) -> Self {
136 self.metadata.frequency_penalty = penalty;
137 self
138 }
139
140 pub fn with_grammar(mut self, grammar: impl Into<String>) -> Self {
141 self.metadata.grammar = grammar.into();
142 self
143 }
144
145 pub fn with_json_schema(mut self, schema: Option<String>) -> Self {
146 self.metadata.json_schema = schema;
147 self
148 }
149
150 pub fn include_usage(mut self, include: bool) -> Self {
151 self.metadata.include_usage = include;
152 self
153 }
154
155 pub fn build(self) -> GgmlMetadata {
156 self.metadata
157 }
158}
159
160#[derive(Debug, Clone, Deserialize, Serialize)]
162pub struct GgmlMetadata {
163 #[serde(skip_serializing)]
165 pub model_name: String,
166 #[serde(skip_serializing)]
168 pub model_alias: String,
169 #[serde(skip_serializing)]
171 pub log_prompts: bool,
172 #[serde(skip_serializing)]
174 pub prompt_template: PromptTemplateType,
175
176 #[serde(rename = "enable-log")]
178 pub log_enable: bool,
179 #[serde(rename = "enable-debug-log")]
180 pub debug_log: bool,
181 #[serde(rename = "embedding")]
184 pub embeddings: bool,
185 #[serde(rename = "n-predict")]
187 pub n_predict: i32,
188 #[serde(skip_serializing_if = "Option::is_none", rename = "reverse-prompt")]
190 pub reverse_prompt: Option<String>,
191 #[serde(skip_serializing_if = "Option::is_none")]
193 pub mmproj: Option<String>,
194 #[serde(skip_serializing_if = "Option::is_none")]
196 pub image: Option<String>,
197
198 #[serde(rename = "n-gpu-layers")]
200 pub n_gpu_layers: u64,
201 #[serde(rename = "main-gpu")]
203 #[serde(skip_serializing_if = "Option::is_none")]
204 pub main_gpu: Option<u64>,
205 #[serde(rename = "tensor-split")]
207 #[serde(skip_serializing_if = "Option::is_none")]
208 pub tensor_split: Option<String>,
209 #[serde(skip_serializing_if = "Option::is_none", rename = "use-mmap")]
211 pub use_mmap: Option<bool>,
212 #[serde(rename = "split-mode")]
217 pub split_mode: String,
218
219 #[serde(rename = "ctx-size")]
222 pub ctx_size: u64,
223 #[serde(rename = "batch-size")]
225 pub batch_size: u64,
226 #[serde(rename = "ubatch-size")]
228 pub ubatch_size: u64,
229 #[serde(rename = "threads")]
231 pub threads: u64,
232
233 #[serde(rename = "temp")]
236 pub temperature: f64,
237 #[serde(rename = "top-p")]
239 pub top_p: f64,
240 #[serde(rename = "repeat-penalty")]
242 pub repeat_penalty: f64,
243 #[serde(rename = "presence-penalty")]
245 pub presence_penalty: f64,
246 #[serde(rename = "frequency-penalty")]
248 pub frequency_penalty: f64,
249
250 pub grammar: String,
253 #[serde(skip_serializing_if = "Option::is_none")]
255 pub json_schema: Option<String>,
256
257 pub include_usage: bool,
259}
260impl Default for GgmlMetadata {
261 fn default() -> Self {
262 Self {
263 model_name: String::new(),
264 model_alias: String::new(),
265 log_prompts: false,
266 debug_log: false,
267 prompt_template: PromptTemplateType::Llama2Chat,
268 log_enable: false,
269 embeddings: false,
270 n_predict: -1,
271 reverse_prompt: None,
272 mmproj: None,
273 image: None,
274 n_gpu_layers: 100,
275 main_gpu: None,
276 tensor_split: None,
277 use_mmap: Some(true),
278 split_mode: "layer".to_string(),
279 ctx_size: 4096,
280 batch_size: 2048,
281 ubatch_size: 512,
282 threads: 2,
283 temperature: 0.8,
284 top_p: 0.9,
285 repeat_penalty: 1.0,
286 presence_penalty: 0.0,
287 frequency_penalty: 0.0,
288 grammar: String::new(),
289 json_schema: None,
290 include_usage: false,
291 }
292 }
293}
294impl BaseMetadata for GgmlMetadata {
295 fn model_name(&self) -> &str {
296 &self.model_name
297 }
298
299 fn model_alias(&self) -> &str {
300 &self.model_alias
301 }
302}
303impl GgmlMetadata {
304 pub fn prompt_template(&self) -> PromptTemplateType {
305 self.prompt_template
306 }
307}
308
309#[derive(Debug)]
311pub struct GgmlTtsMetadataBuilder {
312 metadata: GgmlTtsMetadata,
313}
314impl GgmlTtsMetadataBuilder {
315 pub fn new<S: Into<String>, P: AsRef<Path>>(
316 model_name: S,
317 model_alias: S,
318 codec_model: P,
319 ) -> Self {
320 let metadata = GgmlTtsMetadata {
321 model_name: model_name.into(),
322 model_alias: model_alias.into(),
323 codec_model: codec_model.as_ref().to_path_buf(),
324 ..Default::default()
325 };
326
327 Self { metadata }
328 }
329
330 pub fn enable_tts(mut self, enable: bool) -> Self {
331 self.metadata.enable_tts = enable;
332 self
333 }
334
335 pub fn with_speaker_file(mut self, speaker_file: Option<PathBuf>) -> Self {
336 self.metadata.speaker_file = speaker_file;
337 self
338 }
339
340 pub fn with_ctx_size(mut self, size: u64) -> Self {
341 self.metadata.ctx_size = size;
342 self
343 }
344
345 pub fn with_batch_size(mut self, size: u64) -> Self {
346 self.metadata.batch_size = size;
347 self
348 }
349
350 pub fn with_ubatch_size(mut self, size: u64) -> Self {
351 self.metadata.ubatch_size = size;
352 self
353 }
354
355 pub fn with_n_predict(mut self, n: i32) -> Self {
356 self.metadata.n_predict = n;
357 self
358 }
359
360 pub fn with_n_gpu_layers(mut self, n: u64) -> Self {
361 self.metadata.n_gpu_layers = n;
362 self
363 }
364
365 pub fn with_temperature(mut self, temp: f64) -> Self {
366 self.metadata.temperature = temp;
367 self
368 }
369
370 pub fn enable_plugin_log(mut self, enable: bool) -> Self {
371 self.metadata.log_enable = enable;
372 self
373 }
374
375 pub fn enable_debug_log(mut self, enable: bool) -> Self {
376 self.metadata.debug_log = enable;
377 self
378 }
379
380 pub fn build(self) -> GgmlTtsMetadata {
381 self.metadata
382 }
383}
384
385#[derive(Debug, Clone, Deserialize, Serialize)]
387pub struct GgmlTtsMetadata {
388 pub model_name: String,
389 pub model_alias: String,
390 #[serde(rename = "tts")]
391 pub enable_tts: bool,
392 #[serde(rename = "model-vocoder")]
393 pub codec_model: PathBuf,
394 #[serde(rename = "tts-speaker-file", skip_serializing_if = "Option::is_none")]
395 pub speaker_file: Option<PathBuf>,
396 #[serde(rename = "ctx-size")]
397 pub ctx_size: u64,
398 #[serde(rename = "batch-size")]
399 pub batch_size: u64,
400 #[serde(rename = "ubatch-size")]
401 pub ubatch_size: u64,
402 pub n_predict: i32,
403 pub n_gpu_layers: u64,
404 #[serde(rename = "temp")]
405 pub temperature: f64,
406 #[serde(rename = "enable-log")]
407 pub log_enable: bool,
408 #[serde(rename = "enable-debug-log")]
409 pub debug_log: bool,
410}
411impl Default for GgmlTtsMetadata {
412 fn default() -> Self {
413 Self {
414 model_name: "tts".to_string(),
415 model_alias: "tts".to_string(),
416 enable_tts: false,
417 codec_model: PathBuf::from(""),
418 speaker_file: None,
419 ctx_size: 8192,
420 batch_size: 8192,
421 ubatch_size: 8192,
422 n_predict: 4096,
423 n_gpu_layers: 100,
424 temperature: 0.8,
425 log_enable: false,
426 debug_log: false,
427 }
428 }
429}
430impl BaseMetadata for GgmlTtsMetadata {
431 fn model_name(&self) -> &str {
432 &self.model_name
433 }
434
435 fn model_alias(&self) -> &str {
436 &self.model_alias
437 }
438}
439impl GgmlTtsMetadata {
440 pub fn prompt_template(&self) -> PromptTemplateType {
441 PromptTemplateType::Tts
442 }
443}