pub struct GgmlMetadata {Show 28 fields
pub model_name: String,
pub model_alias: String,
pub log_prompts: bool,
pub prompt_template: PromptTemplateType,
pub log_enable: bool,
pub debug_log: bool,
pub embeddings: bool,
pub n_predict: i32,
pub reverse_prompt: Option<String>,
pub mmproj: Option<String>,
pub image: Option<String>,
pub n_gpu_layers: u64,
pub main_gpu: Option<u64>,
pub tensor_split: Option<String>,
pub use_mmap: Option<bool>,
pub split_mode: String,
pub ctx_size: u64,
pub batch_size: u64,
pub ubatch_size: u64,
pub threads: u64,
pub temperature: f64,
pub top_p: f64,
pub repeat_penalty: f64,
pub presence_penalty: f64,
pub frequency_penalty: f64,
pub grammar: String,
pub json_schema: Option<String>,
pub include_usage: bool,
}
Expand description
Metadata for chat and embeddings models
Fields§
§model_name: String
§model_alias: String
§log_prompts: bool
§prompt_template: PromptTemplateType
§log_enable: bool
§debug_log: bool
§embeddings: bool
§n_predict: i32
Number of tokens to predict, -1 = infinity, -2 = until context filled. Defaults to -1.
reverse_prompt: Option<String>
Halt generation at PROMPT, return control in interactive mode.
mmproj: Option<String>
path to the multimodal projector file for llava
image: Option<String>
Path to the image file for llava
n_gpu_layers: u64
§main_gpu: Option<u64>
The main GPU to use. Defaults to None.
tensor_split: Option<String>
How split tensors should be distributed accross GPUs. If None the model is not split; otherwise, a comma-separated list of non-negative values, e.g., “3,2” presents 60% of the data to GPU 0 and 40% to GPU 1. Defaults to None.
use_mmap: Option<bool>
Whether to use memory-mapped files for the model. Defaults to true
.
split_mode: String
How to split the model across multiple GPUs. Possible values:
none
: use one GPU onlylayer
: split layers and KV across GPUs (default)row
: split rows across GPUs
ctx_size: u64
Size of the prompt context. 0 means loaded from model. Defaults to 4096.
batch_size: u64
Logical maximum batch size. Defaults to 2048.
ubatch_size: u64
Physical maximum batch size. Defaults to 512.
threads: u64
Number of threads to use during generation. Defaults to 2.
temperature: f64
Adjust the randomness of the generated text. Between 0.0 and 2.0. Defaults to 0.8.
top_p: f64
Top-p sampling. Between 0.0 and 1.0. Defaults to 0.9.
repeat_penalty: f64
Penalize repeat sequence of tokens. Defaults to 1.0.
presence_penalty: f64
Repeat alpha presence penalty. Defaults to 0.0.
frequency_penalty: f64
Repeat alpha frequency penalty. Defaults to 0.0.
grammar: String
BNF-like grammar to constrain generations (see samples in grammars/ dir). Defaults to empty string.
json_schema: Option<String>
JSON schema to constrain generations (https://json-schema.org/), e.g. {}
for any JSON object. For schemas w/ external $refs, use –grammar + example/json_schema_to_grammar.py instead.
include_usage: bool
Whether to include usage in the stream response. Defaults to false.
Implementations§
Source§impl GgmlMetadata
impl GgmlMetadata
pub fn prompt_template(&self) -> PromptTemplateType
Trait Implementations§
Source§impl BaseMetadata for GgmlMetadata
impl BaseMetadata for GgmlMetadata
fn model_name(&self) -> &str
fn model_alias(&self) -> &str
Source§impl Clone for GgmlMetadata
impl Clone for GgmlMetadata
Source§fn clone(&self) -> GgmlMetadata
fn clone(&self) -> GgmlMetadata
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moreSource§impl Debug for GgmlMetadata
impl Debug for GgmlMetadata
Source§impl Default for GgmlMetadata
impl Default for GgmlMetadata
Source§impl<'de> Deserialize<'de> for GgmlMetadata
impl<'de> Deserialize<'de> for GgmlMetadata
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Auto Trait Implementations§
impl Freeze for GgmlMetadata
impl RefUnwindSafe for GgmlMetadata
impl Send for GgmlMetadata
impl Sync for GgmlMetadata
impl Unpin for GgmlMetadata
impl UnwindSafe for GgmlMetadata
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<R, P> ReadPrimitive<R> for P
impl<R, P> ReadPrimitive<R> for P
Source§fn read_from_little_endian(read: &mut R) -> Result<Self, Error>
fn read_from_little_endian(read: &mut R) -> Result<Self, Error>
ReadEndian::read_from_little_endian()
.