diff --git a/CODEOWNERS b/CODEOWNERS index adeba5395831a..69c2e9d73b90e 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -3,3 +3,9 @@ /ci/ @ggerganov /.devops/ @ngxson /examples/server/ @ngxson +/ggml/src/ggml-cuda/fattn* @JohannesGaessler +/ggml/src/ggml-cuda/mmq.* @JohannesGaessler +/ggml/src/ggml-cuda/mmv.* @JohannesGaessler +/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler +/ggml/src/ggml-opt.cpp @JohannesGaessler +/ggml/src/gguf.cpp @JohannesGaessler diff --git a/common/common.cpp b/common/common.cpp index 4bb140ee2eb15..97f8758fa6531 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -2,6 +2,9 @@ #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING #endif +#include "ggml.h" +#include "gguf.h" + #include "common.h" #include "log.h" // Change JSON_ASSERT from assert() to GGML_ASSERT: diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 736035d78cf69..2db43e39e9d99 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -1,4 +1,6 @@ #include "ggml.h" +#include "gguf.h" + #include "llama.h" #include "common.h" #include "log.h" diff --git a/examples/cvector-generator/cvector-generator.cpp b/examples/cvector-generator/cvector-generator.cpp index 7c9f502285057..e899c107819f3 100644 --- a/examples/cvector-generator/cvector-generator.cpp +++ b/examples/cvector-generator/cvector-generator.cpp @@ -1,7 +1,9 @@ +#include "ggml.h" +#include "gguf.h" + #include "arg.h" #include "common.h" #include "llama.h" -#include "ggml.h" #include "pca.hpp" #include "mean.hpp" diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index 058b5cc860213..d5dcd20a0ae4a 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -1,7 +1,9 @@ -#include "arg.h" -#include "common.h" #include "ggml.h" #include "ggml-alloc.h" +#include "gguf.h" + +#include "arg.h" +#include "common.h" #include #include diff --git a/examples/gguf-hash/gguf-hash.cpp b/examples/gguf-hash/gguf-hash.cpp index e96c75117f533..9523ec122f573 100644 --- a/examples/gguf-hash/gguf-hash.cpp +++ b/examples/gguf-hash/gguf-hash.cpp @@ -1,4 +1,5 @@ #include "ggml.h" +#include "gguf.h" #include /* abort() */ #include diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp index 9e3d44984a06f..ef3ceb686f697 100644 --- a/examples/gguf-split/gguf-split.cpp +++ b/examples/gguf-split/gguf-split.cpp @@ -1,16 +1,18 @@ +#include "ggml.h" +#include "gguf.h" #include "llama.h" #include "common.h" #include +#include +#include +#include #include +#include +#include #include #include #include -#include - -#include -#include -#include #if defined(_WIN32) #include @@ -296,7 +298,7 @@ struct split_strategy { total_size += ggml_nbytes(t); } total_size = total_size / 1000 / 1000; // convert to megabytes - printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size); + printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size); i_split++; } } diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index 7498f85efc4f9..f31989c8c55c6 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -1,10 +1,9 @@ #include "ggml.h" +#include "gguf.h" #include -#include #include #include -#include #include #undef MIN @@ -135,9 +134,10 @@ static bool gguf_ex_read_0(const std::string & fname) { for (int i = 0; i < n_tensors; ++i) { const char * name = gguf_get_tensor_name (ctx, i); + const size_t size = gguf_get_tensor_size (ctx, i); const size_t offset = gguf_get_tensor_offset(ctx, i); - printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); + printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset); } } @@ -182,9 +182,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) { for (int i = 0; i < n_tensors; ++i) { const char * name = gguf_get_tensor_name (ctx, i); + const size_t size = gguf_get_tensor_size (ctx, i); const size_t offset = gguf_get_tensor_offset(ctx, i); - printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); + printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset); } } @@ -199,7 +200,8 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) { struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name); - printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data); + printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n", + __func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data); // print first 10 elements const float * data = (const float *) cur->data; @@ -215,7 +217,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) { const float * data = (const float *) cur->data; for (int j = 0; j < ggml_nelements(cur); ++j) { if (data[j] != 100 + i) { - fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]); + fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i)); gguf_free(ctx); return false; } @@ -245,6 +247,8 @@ int main(int argc, char ** argv) { check_data = false; } + srand(123456); + const std::string fname(argv[1]); const std::string mode (argv[2]); diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 3cd0d2fa8590c..7a8a3156bfdef 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -7,6 +7,7 @@ #include "ggml-cpu.h" #include "ggml-alloc.h" #include "ggml-backend.h" +#include "gguf.h" //#ifdef GGML_USE_CUDA //#include "ggml-cuda.h" @@ -262,7 +263,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { { const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); int arr_n = gguf_get_arr_n(ctx_gguf, i); - const void * data = gguf_get_arr_data(ctx_gguf, i); + const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i); std::stringstream ss; ss << "["; for (int j = 0; j < arr_n; j++) { @@ -2734,7 +2735,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i total_size_org += orig_size; total_size_new += new_size; gguf_set_tensor_type(ctx_out, name.c_str(), new_type); - gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size); + GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size); + gguf_set_tensor_data(ctx_out, name.c_str(), new_data); fout.write((const char *)new_data, new_size); size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size; for (size_t j = 0; j < pad; ++j) { diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index e33d974827cbe..724ff16a4e054 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -243,7 +243,8 @@ set(GGML_PUBLIC_HEADERS include/ggml-metal.h include/ggml-rpc.h include/ggml-sycl.h - include/ggml-vulkan.h) + include/ggml-vulkan.h + include/gguf.h) set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") #if (GGML_METAL) diff --git a/ggml/include/ggml-cpp.h b/ggml/include/ggml-cpp.h index 219361af43e06..a12342c25debe 100644 --- a/ggml/include/ggml-cpp.h +++ b/ggml/include/ggml-cpp.h @@ -7,6 +7,7 @@ #include "ggml.h" #include "ggml-alloc.h" #include "ggml-backend.h" +#include "gguf.h" #include // Smart pointers for ggml types diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index c714fc8c837bb..8630d92c5c6a4 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -241,12 +241,6 @@ #define GGML_ROPE_TYPE_MROPE 8 #define GGML_ROPE_TYPE_VISION 24 -#define GGUF_MAGIC "GGUF" - -#define GGUF_VERSION 3 - -#define GGUF_DEFAULT_ALIGNMENT 32 - #define GGML_UNUSED(x) (void)(x) #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1)) @@ -403,12 +397,6 @@ extern "C" { GGML_PREC_F32, }; - enum ggml_backend_type { - GGML_BACKEND_TYPE_CPU = 0, - GGML_BACKEND_TYPE_GPU = 10, - GGML_BACKEND_TYPE_GPU_SPLIT = 20, - }; - // model file types enum ggml_ftype { GGML_FTYPE_UNKNOWN = -1, @@ -587,8 +575,6 @@ extern "C" { struct ggml_tensor { enum ggml_type type; - GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor"); - struct ggml_backend_buffer * buffer; int64_t ne[GGML_MAX_DIMS]; // number of elements @@ -2111,132 +2097,6 @@ extern "C" { int64_t n_per_row, const float * imatrix); - // - // gguf - // - - enum gguf_type { - GGUF_TYPE_UINT8 = 0, - GGUF_TYPE_INT8 = 1, - GGUF_TYPE_UINT16 = 2, - GGUF_TYPE_INT16 = 3, - GGUF_TYPE_UINT32 = 4, - GGUF_TYPE_INT32 = 5, - GGUF_TYPE_FLOAT32 = 6, - GGUF_TYPE_BOOL = 7, - GGUF_TYPE_STRING = 8, - GGUF_TYPE_ARRAY = 9, - GGUF_TYPE_UINT64 = 10, - GGUF_TYPE_INT64 = 11, - GGUF_TYPE_FLOAT64 = 12, - GGUF_TYPE_COUNT, // marks the end of the enum - }; - - struct gguf_context; - - struct gguf_init_params { - bool no_alloc; - - // if not NULL, create a ggml_context and allocate the tensor data in it - struct ggml_context ** ctx; - }; - - GGML_API struct gguf_context * gguf_init_empty(void); - GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); - //GGML_API struct gguf_context * gguf_init_from_buffer(..); - - GGML_API void gguf_free(struct gguf_context * ctx); - - GGML_API const char * gguf_type_name(enum gguf_type type); - - GGML_API int gguf_get_version (const struct gguf_context * ctx); - GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx); - GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx); - GGML_API void * gguf_get_data (const struct gguf_context * ctx); - - GGML_API int gguf_get_n_kv(const struct gguf_context * ctx); - GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key); - GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id); - - GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id); - GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id); - - // will abort if the wrong type is used for the key - GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id); - GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id); - GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id); - GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id); - GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id); - GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id); - GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id); - GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id); - GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id); - GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id); - GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id); - GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id); - GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id); - GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id); - GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id); - GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i); - - GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx); - GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name); - GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i); - GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i); - GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i); - - // removes key if it exists - GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key); - - // overrides existing values or adds a new one - GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val); - GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val); - GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val); - GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val); - GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val); - GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val); - GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val); - GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val); - GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val); - GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val); - GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val); - GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val); - GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n); - GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n); - - // set or add KV pairs from another context - GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src); - - // manage tensor info - GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor); - GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type); - GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size); - - // writing gguf files can be done in 2 ways: - // - // - write the entire gguf_context to a binary file in a single pass: - // - // gguf_write_to_file(ctx, fname); - // - // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data: - // - // FILE * f = fopen(fname, "wb"); - // fseek(f, gguf_get_meta_size(ctx), SEEK_SET); - // fwrite(f, ...); - // void * data = gguf_meta_get_meta_data(ctx); - // fseek(f, 0, SEEK_SET); - // fwrite(f, data, gguf_get_meta_size(ctx)); - // free(data); - // fclose(f); - // - - // write the entire context to a binary file - GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta); - - // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding - GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx); - GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data); - #ifdef __cplusplus // restrict not standard in C++ # if defined(__GNUC__) diff --git a/ggml/include/gguf.h b/ggml/include/gguf.h new file mode 100644 index 0000000000000..79ee202062b01 --- /dev/null +++ b/ggml/include/gguf.h @@ -0,0 +1,202 @@ +// This file contains functionality related to "GGUF" files, the binary file format used by ggml. +// GGUF files have the following structure: +// +// 1. File magic "GGUF" (4 bytes). +// 2. File version (uint32_t). +// 3. Number of ggml tensors in file (int64_t). +// 4. Number of key-value-pairs in file (int64_t). +// 5. For each KV pair: +// 1. The key (string). +// 2. The value type (gguf_type). +// 3a. If the value type is GGUF_TYPE_ARRAY: +// 1. The type of the array (gguf_type). +// 2. The number of elements in the array (uint64_t). +// 3. The binary representation of each element in the array. +// 3b. Otherwise: +// 1. The binary representation of the value. +// 6. For each ggml tensor: +// 1. The tensor name (string). +// 2. The number of dimensions of the tensor (uint32_t). +// 3. For each dimension: +// 1. The size of the tensor in the dimension (int64_t). +// 4. The tensor data type (ggml_type). +// 5. The tensor data offset in the tensor data binary blob (uint64_t). +// 7. The tensor data binary blob (optional, aligned). +// +// Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator. +// All enums are stored as int32_t. +// All bool values are stored as int8_t. +// If the special key "general.alignment" (uint32_t) is defined it is used for alignment, +// otherwise GGUF_DEFAULT_ALIGNMENT is used. +// +// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de) + +#pragma once + +#include "ggml.h" + +#include +#include + +#define GGUF_MAGIC "GGUF" +#define GGUF_VERSION 3 + +#define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment" + +#define GGUF_DEFAULT_ALIGNMENT 32 + +#ifdef __cplusplus +extern "C" { +#endif + + // types that can be stored as GGUF KV data + enum gguf_type { + GGUF_TYPE_UINT8 = 0, + GGUF_TYPE_INT8 = 1, + GGUF_TYPE_UINT16 = 2, + GGUF_TYPE_INT16 = 3, + GGUF_TYPE_UINT32 = 4, + GGUF_TYPE_INT32 = 5, + GGUF_TYPE_FLOAT32 = 6, + GGUF_TYPE_BOOL = 7, + GGUF_TYPE_STRING = 8, + GGUF_TYPE_ARRAY = 9, + GGUF_TYPE_UINT64 = 10, + GGUF_TYPE_INT64 = 11, + GGUF_TYPE_FLOAT64 = 12, + GGUF_TYPE_COUNT, // marks the end of the enum + }; + + struct gguf_context; + + struct gguf_init_params { + bool no_alloc; + + // if not NULL, create a ggml_context and allocate the tensor data in it + struct ggml_context ** ctx; + }; + + GGML_API struct gguf_context * gguf_init_empty(void); + GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); + //GGML_API struct gguf_context * gguf_init_from_buffer(..); + + GGML_API void gguf_free(struct gguf_context * ctx); + + GGML_API const char * gguf_type_name(enum gguf_type type); + + GGML_API uint32_t gguf_get_version (const struct gguf_context * ctx); + GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx); + GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx); + + GGML_API int64_t gguf_get_n_kv(const struct gguf_context * ctx); + GGML_API int64_t gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found + GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id); + + GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id); + GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id); + + // will abort if the wrong type is used for the key + GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int64_t key_id); + GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int64_t key_id); + GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id); + GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id); + GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id); + GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id); + GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id); + GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id); + GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id); + GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id); + GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id); + GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id); + GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id); + GGML_API size_t gguf_get_arr_n (const struct gguf_context * ctx, int64_t key_id); + + // get raw pointer to the first element of the array with the given key_id + // for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference) + GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id); + + // get ith C string from array with given key_id + GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i); + + GGML_API int64_t gguf_get_n_tensors (const struct gguf_context * ctx); + GGML_API int64_t gguf_find_tensor (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found + GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id); + GGML_API const char * gguf_get_tensor_name (const struct gguf_context * ctx, int64_t tensor_id); + GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int64_t tensor_id); + GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx, int64_t tensor_id); + + // removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist) + GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key); + + // overrides an existing KV pair or adds a new one, the new KV pair is always at the back + GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val); + GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val); + GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val); + GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val); + GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val); + GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val); + GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val); + GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val); + GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val); + GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val); + GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val); + GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val); + + // creates a new array with n elements of the given type and copies the corresponding number of bytes from data + GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n); + + // creates a new array with n strings and copies the corresponding strings from data + GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n); + + // set or add KV pairs from another context + GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src); + + // add tensor to GGUF context, tensor name must be unique + GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor); + + // after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated + // in such a way that the tensor data remains as one contiguous block (except for padding) + GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type); + + // assumes that at least gguf_get_tensor_size bytes can be read from data + GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data); + + // writing gguf files can be done in 3 ways: + // + // - write the entire gguf_context to a binary file in a single pass: + // + // gguf_write_to_file(ctx, fname, /*only_meta =*/ false); + // + // - write only the meta data to a file, then re-open the file and append the tensor data: + // + // gguf_write_to_file(ctx, fname, /*only_meta =*/ true); + // FILE * f = fopen(fname, "ab"); + // fwrite(f, ...); // write tensor data + // fclose(f); + // + // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data: + // + // FILE * f = fopen(fname, "wb"); + // const size_t size_meta = gguf_get_meta_size(ctx); + // fseek(f, size_meta, SEEK_SET); + // fwrite(f, ...); // write tensor data + // void * data = malloc(size_meta); + // gguf_get_meta_data(ctx, data); + // rewind(f); + // fwrite(data, 1, data, f); + // free(data); + // fclose(f); + // + + // write the entire context to a binary file + GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta); + + // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding + GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx); + + // writes the meta data to pointer "data" + GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data); + +#ifdef __cplusplus +} +#endif diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 84101c32c2b50..ae1cd23376ccf 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -208,6 +208,7 @@ add_library(ggml-base ../include/ggml-backend.h ../include/ggml-cpp.h ../include/ggml-opt.h + ../include/gguf.h ggml.c ggml-alloc.c ggml-backend.cpp @@ -215,7 +216,8 @@ add_library(ggml-base ggml-threading.cpp ggml-threading.h ggml-quants.c - ggml-quants.h) + ggml-quants.h + gguf.cpp) target_include_directories(ggml-base PRIVATE .) diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index 549772c57c90a..eab017889c919 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -3,6 +3,8 @@ // GGML internal header #include "ggml.h" +#include "gguf.h" + #include #include #include // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/ @@ -551,22 +553,15 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x) #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x) -// expose GGUF internals for test code - -GGML_API size_t gguf_type_size(enum gguf_type type); - -GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params); - -struct gguf_buf { - void * data; - size_t size; - size_t offset; -}; -GGML_API struct gguf_buf gguf_buf_init(size_t size); -GGML_API void gguf_buf_free(struct gguf_buf buf); - -GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta); - #ifdef __cplusplus } #endif + +#ifdef __cplusplus +#include + +// expose GGUF internals for test code +GGML_API size_t gguf_type_size(enum gguf_type type); +GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params); +GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector & buf, bool only_meta); +#endif // __cplusplus diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 2bbe5f48257b2..90abc6ad45233 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1588,15 +1588,8 @@ static struct ggml_tensor * ggml_new_tensor_impl( struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs); -#ifdef __clang__ - // temporary until ggml_tensor::backend is removed - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wdeprecated-declarations" -#endif - *result = (struct ggml_tensor) { /*.type =*/ type, - /*.backend =*/ GGML_BACKEND_TYPE_CPU, /*.buffer =*/ NULL, /*.ne =*/ { 1, 1, 1, 1 }, /*.nb =*/ { 0, 0, 0, 0 }, @@ -1612,10 +1605,6 @@ static struct ggml_tensor * ggml_new_tensor_impl( /*.padding =*/ { 0 }, }; -#ifdef __clang__ - #pragma clang diagnostic pop -#endif - // TODO: this should not be needed as long as we don't rely on aligned SIMD loads //GGML_ASSERT_ALIGNED(result->data); @@ -6417,1271 +6406,6 @@ size_t ggml_quantize_chunk( //////////////////////////////////////////////////////////////////////////////// -struct gguf_str { - uint64_t n; // GGUFv2 - char * data; -}; - -static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = { - [GGUF_TYPE_UINT8] = sizeof(uint8_t), - [GGUF_TYPE_INT8] = sizeof(int8_t), - [GGUF_TYPE_UINT16] = sizeof(uint16_t), - [GGUF_TYPE_INT16] = sizeof(int16_t), - [GGUF_TYPE_UINT32] = sizeof(uint32_t), - [GGUF_TYPE_INT32] = sizeof(int32_t), - [GGUF_TYPE_FLOAT32] = sizeof(float), - [GGUF_TYPE_BOOL] = sizeof(bool), - [GGUF_TYPE_STRING] = sizeof(struct gguf_str), - [GGUF_TYPE_UINT64] = sizeof(uint64_t), - [GGUF_TYPE_INT64] = sizeof(int64_t), - [GGUF_TYPE_FLOAT64] = sizeof(double), - [GGUF_TYPE_ARRAY] = 0, // undefined -}; -static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); - -static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = { - [GGUF_TYPE_UINT8] = "u8", - [GGUF_TYPE_INT8] = "i8", - [GGUF_TYPE_UINT16] = "u16", - [GGUF_TYPE_INT16] = "i16", - [GGUF_TYPE_UINT32] = "u32", - [GGUF_TYPE_INT32] = "i32", - [GGUF_TYPE_FLOAT32] = "f32", - [GGUF_TYPE_BOOL] = "bool", - [GGUF_TYPE_STRING] = "str", - [GGUF_TYPE_ARRAY] = "arr", - [GGUF_TYPE_UINT64] = "u64", - [GGUF_TYPE_INT64] = "i64", - [GGUF_TYPE_FLOAT64] = "f64", -}; -static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); - -union gguf_value { - uint8_t uint8; - int8_t int8; - uint16_t uint16; - int16_t int16; - uint32_t uint32; - int32_t int32; - float float32; - uint64_t uint64; - int64_t int64; - double float64; - bool bool_; - - struct gguf_str str; - - struct { - enum gguf_type type; - - uint64_t n; // GGUFv2 - void * data; - } arr; -}; - -struct gguf_kv { - struct gguf_str key; - - enum gguf_type type; - union gguf_value value; -}; - -struct gguf_header { - char magic[4]; - - uint32_t version; - uint64_t n_tensors; // GGUFv2 - uint64_t n_kv; // GGUFv2 -}; - -struct gguf_tensor_info { - struct gguf_str name; - - uint32_t n_dims; - uint64_t ne[GGML_MAX_DIMS]; - - enum ggml_type type; - - uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT` - - // for writing API - const void * data; - size_t size; -}; - -struct gguf_context { - struct gguf_header header; - - struct gguf_kv * kv; - struct gguf_tensor_info * infos; - - size_t alignment; - size_t offset; // offset of `data` from beginning of file - size_t size; // size of `data` in bytes - - //uint8_t * padding; - void * data; -}; - -size_t gguf_type_size(enum gguf_type type) { - GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT); - return GGUF_TYPE_SIZE[type]; -} - -static bool gguf_tensor_info_sanitize(struct gguf_tensor_info * info) { - if (info->n_dims > GGML_MAX_DIMS) { - fprintf(stderr, "%s: invalid number of dimensions (%" PRIu32 ")\n", __func__, info->n_dims); - return false; - } - - if (info->type < 0 || info->type >= GGML_TYPE_COUNT) { - fprintf(stderr, "%s: invalid type (%d)\n", __func__, info->type); - return false; - } - - if (strlen(info->name.data) >= GGML_MAX_NAME) { - fprintf(stderr, "%s: tensor '%s' name is too long\n", __func__, info->name.data); - return false; - } - - for (uint32_t i = 0; i < info->n_dims; ++i) { - if (info->ne[i] <= 0) { - fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[i]); - return false; - } - } - - // prevent overflow for total number of elements - if (INT64_MAX/info->ne[1] <= info->ne[0]) { - fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[1]); - return false; - } - - if (INT64_MAX/info->ne[2] <= info->ne[0]*info->ne[1]) { - fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[2]); - return false; - } - - if (INT64_MAX/info->ne[3] <= info->ne[0]*info->ne[1]*info->ne[2]) { - fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[3]); - return false; - } - - return true; -} - -static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) { - const size_t n = fread(dst, 1, size, file); - *offset += n; - return n == size; -} - -static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) { - p->n = 0; - p->data = NULL; - - bool ok = true; - - ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); - - // early exit if string length is invalid, prevents from integer overflow - if (p->n == SIZE_MAX) { - fprintf(stderr, "%s: invalid string length (%" PRIu64 ")\n", __func__, p->n); - return false; - } - - p->data = calloc(p->n + 1, 1); - if (!p->data) { - fprintf(stderr, "%s: failed to allocate memory for string of length %" PRIu64 "\n", __func__, p->n); - return false; - } - - ok = ok && gguf_fread_el(file, p->data, p->n, offset); - - return ok; -} - -static void gguf_free_kv(struct gguf_kv * kv) { - if (kv->key.data) { - GGML_FREE(kv->key.data); - } - - if (kv->type == GGUF_TYPE_STRING) { - if (kv->value.str.data) { - GGML_FREE(kv->value.str.data); - } - } - - if (kv->type == GGUF_TYPE_ARRAY) { - if (kv->value.arr.data) { - if (kv->value.arr.type == GGUF_TYPE_STRING) { - for (uint64_t j = 0; j < kv->value.arr.n; ++j) { - struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j]; - if (str->data) { - GGML_FREE(str->data); - } - } - } - GGML_FREE(kv->value.arr.data); - } - } -} - -struct gguf_context * gguf_init_empty(void) { - struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context)); - if (!ctx) { - fprintf(stderr, "%s: failed to allocate memory for context\n", __func__); - return NULL; - } - - memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic)); - ctx->header.version = GGUF_VERSION; - ctx->header.n_tensors = 0; - ctx->header.n_kv = 0; - - ctx->kv = NULL; - ctx->infos = NULL; - - ctx->alignment = GGUF_DEFAULT_ALIGNMENT; - ctx->offset = 0; - ctx->size = 0; - - ctx->data = NULL; - - return ctx; -} - -struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) { - // offset from start of file - size_t offset = 0; - - char magic[4]; - - // check the magic before making allocations - { - gguf_fread_el(file, &magic, sizeof(magic), &offset); - - for (uint32_t i = 0; i < sizeof(magic); i++) { - if (magic[i] != GGUF_MAGIC[i]) { - fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]); - return NULL; - } - } - } - - bool ok = true; - - struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context)); - if (!ctx) { - fprintf(stderr, "%s: failed to allocate memory for context\n", __func__); - return NULL; - } - - // read the header - { - strncpy(ctx->header.magic, magic, 4); - - ctx->kv = NULL; - ctx->infos = NULL; - ctx->data = NULL; - - ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset); - ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset); - ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset); - - if (ctx->header.version == 1) { - fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__); - gguf_free(ctx); - return NULL; - } - - // sanity-checks to prevent from integer/buffer overflows - - ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/sizeof(struct gguf_tensor_info)); - ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/ggml_tensor_overhead()); - ok = ok && (ctx->header.n_kv < (SIZE_MAX/2)/sizeof(struct gguf_kv)); - - if (!ok) { - fprintf(stderr, "%s: failed to read header\n", __func__); - gguf_free(ctx); - return NULL; - } - } - - // read the kv pairs - { - const uint64_t n_kv = ctx->header.n_kv; - - if (n_kv > 0) { - ctx->kv = calloc(n_kv, sizeof(struct gguf_kv)); - if (!ctx->kv) { - fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__); - gguf_free(ctx); - return NULL; - } - } - - for (uint64_t i = 0; i < n_kv; ++i) { - struct gguf_kv * kv = &ctx->kv[i]; - - //fprintf(stderr, "%s: reading kv %d\n", __func__, i); - - ok = ok && gguf_fread_str(file, &kv->key, &offset); - ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset); - - //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data); - - switch (kv->type) { - case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break; - case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break; - case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break; - case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break; - case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break; - case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break; - case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break; - case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break; - case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break; - case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break; - case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break; - case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break; - case GGUF_TYPE_ARRAY: - { - ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset); - ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset); - - switch (kv->value.arr.type) { - case GGUF_TYPE_UINT8: - case GGUF_TYPE_INT8: - case GGUF_TYPE_UINT16: - case GGUF_TYPE_INT16: - case GGUF_TYPE_UINT32: - case GGUF_TYPE_INT32: - case GGUF_TYPE_FLOAT32: - case GGUF_TYPE_UINT64: - case GGUF_TYPE_INT64: - case GGUF_TYPE_FLOAT64: - case GGUF_TYPE_BOOL: - { - // prevent from integer overflow in the malloc below - if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) { - fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n); - gguf_free(ctx); - return NULL; - } - - kv->value.arr.data = calloc(kv->value.arr.n, gguf_type_size(kv->value.arr.type)); - if (!kv->value.arr.data) { - fprintf(stderr, "%s: failed to allocate memory for array\n", __func__); - gguf_free(ctx); - return NULL; - } - - ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset); - } break; - case GGUF_TYPE_STRING: - { - // prevent from integer overflow in the malloc below - if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) { - fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n); - gguf_free(ctx); - return NULL; - } - - kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct gguf_str)); - if (!kv->value.arr.data) { - fprintf(stderr, "%s: failed to allocate memory for array\n", __func__); - gguf_free(ctx); - return NULL; - } - - for (uint64_t j = 0; j < kv->value.arr.n; ++j) { - ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset); - } - } break; - case GGUF_TYPE_ARRAY: - default: - { - fprintf(stderr, "%s: invalid array type %d\n", __func__, kv->value.arr.type); - ok = false; - } break; - } - } break; - default: - { - fprintf(stderr, "%s: invalid type %d\n", __func__, kv->type); - ok = false; - } break; - } - - if (!ok) { - break; - } - } - - if (!ok) { - fprintf(stderr, "%s: failed to read key-value pairs\n", __func__); - gguf_free(ctx); - return NULL; - } - } - - // read the tensor infos - if (ctx->header.n_tensors > 0) { - ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct gguf_tensor_info)); - if (!ctx->infos) { - fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__); - gguf_free(ctx); - return NULL; - } - - for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; - - for (int j = 0; j < GGML_MAX_DIMS; ++j) { - info->ne[j] = 1; - } - - ok = ok && gguf_fread_str(file, &info->name, &offset); - ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset); - - ok = ok && (info->n_dims <= GGML_MAX_DIMS); - - for (uint32_t j = 0; j < info->n_dims; ++j) { - ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset); - } - - ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset); - ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset); - - ok = ok && gguf_tensor_info_sanitize(info); - - // make sure there is no duplicated tensor names - for (uint64_t j = 0; j < i && ok; ++j) { - if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) { - fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data); - ok = false; - } - } - - if (!ok) { - fprintf(stderr, "%s: failed to read tensor info\n", __func__); - gguf_free(ctx); - return NULL; - } - } - } - - ctx->alignment = GGUF_DEFAULT_ALIGNMENT; - - int alignment_idx = gguf_find_key(ctx, "general.alignment"); - if (alignment_idx != -1) { - ctx->alignment = gguf_get_val_u32(ctx, alignment_idx); - } - - // we require the data section to be aligned, so take into account any padding - { - const size_t offset_pad = offset % ctx->alignment; - - if (offset_pad != 0) { - offset += ctx->alignment - offset_pad; - fseek(file, offset, SEEK_SET); - } - } - - // store the current file offset - this is where the data section starts - ctx->offset = offset; - - // compute the total size of the data section, taking into account the alignment - { - ctx->size = 0; - for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; - - const int64_t ne = - (int64_t) info->ne[0] * - (int64_t) info->ne[1] * - (int64_t) info->ne[2] * - (int64_t) info->ne[3]; - - if (ggml_blck_size(info->type) == 0 ) { - // this tensor type support have been removed: - fprintf(stderr, "%s: tensor '%s' of type %d: %s\n", - __func__, info->name.data, (int) info->type, ggml_type_name(info->type)); - gguf_free(ctx); - return NULL; - } - - if (ne % ggml_blck_size(info->type) != 0) { - fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n", - __func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type)); - gguf_free(ctx); - return NULL; - } - - const size_t size_cur = ggml_row_size(info->type, ne); - - ctx->size += GGML_PAD(size_cur, ctx->alignment); - } - } - - // load the tensor data only if requested - if (params.ctx != NULL) { - // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob - // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of - // the ggml_tensor structs to the appropriate locations in the binary blob - - // compute the exact size needed for the new ggml_context - const size_t mem_size = - params.no_alloc ? - (ctx->header.n_tensors )*ggml_tensor_overhead() : - (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size; - - struct ggml_init_params pdata = { - .mem_size = mem_size, - .mem_buffer = NULL, - .no_alloc = params.no_alloc, - }; - - *params.ctx = ggml_init(pdata); - if (*params.ctx == NULL) { - fprintf(stderr, "%s: failed to initialize context\n", __func__); - gguf_free(ctx); - return NULL; - } - - struct ggml_context * ctx_data = *params.ctx; - - struct ggml_tensor * data = NULL; - - if (!params.no_alloc) { - data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size); - - ok = ok && data != NULL; - - // read the binary blob with the tensor data - ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset); - - if (!ok) { - fprintf(stderr, "%s: failed to read tensor data\n", __func__); - ggml_free(ctx_data); - gguf_free(ctx); - return NULL; - } - - ctx->data = data->data; - } - - ggml_set_no_alloc(ctx_data, true); - - // create the tensors - for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) { - const int64_t ne[GGML_MAX_DIMS] = { - ctx->infos[i].ne[0], - ctx->infos[i].ne[1], - ctx->infos[i].ne[2], - ctx->infos[i].ne[3], - }; - - struct ggml_tensor * cur = ggml_new_tensor(ctx_data, ctx->infos[i].type, ctx->infos[i].n_dims, ne); - - ok = ok && cur != NULL; - - if (!ok) { - break; - } - - ggml_set_name(cur, ctx->infos[i].name.data); - - // point the data member to the appropriate location in the binary blob using the tensor infos - if (!params.no_alloc) { - //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file - cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data - } - } - - if (!ok) { - fprintf(stderr, "%s: failed to read the tensor data\n", __func__); - ggml_free(ctx_data); - gguf_free(ctx); - return NULL; - } - - ggml_set_no_alloc(ctx_data, params.no_alloc); - } - - return ctx; -} - -struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) { - FILE * file = ggml_fopen(fname, "rb"); - if (!file) { - fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno)); - return NULL; - } - - struct gguf_context * result = gguf_init_from_file_impl(file, params); - fclose(file); - return result; -} - -void gguf_free(struct gguf_context * ctx) { - if (ctx == NULL) { - return; - } - - if (ctx->kv) { - // free string memory - not great.. - for (uint64_t i = 0; i < ctx->header.n_kv; ++i) { - gguf_free_kv(&ctx->kv[i]); - } - - GGML_FREE(ctx->kv); - } - - if (ctx->infos) { - for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; - - if (info->name.data) { - GGML_FREE(info->name.data); - } - } - - GGML_FREE(ctx->infos); - } - - GGML_FREE(ctx); -} - -const char * gguf_type_name(enum gguf_type type) { - return GGUF_TYPE_NAME[type]; -} - -int gguf_get_version(const struct gguf_context * ctx) { - return ctx->header.version; -} - -size_t gguf_get_alignment(const struct gguf_context * ctx) { - return ctx->alignment; -} - -size_t gguf_get_data_offset(const struct gguf_context * ctx) { - return ctx->offset; -} - -void * gguf_get_data(const struct gguf_context * ctx) { - return ctx->data; -} - -int gguf_get_n_kv(const struct gguf_context * ctx) { - return ctx->header.n_kv; -} - -int gguf_find_key(const struct gguf_context * ctx, const char * key) { - // return -1 if key not found - int keyfound = -1; - - const int n_kv = gguf_get_n_kv(ctx); - - for (int i = 0; i < n_kv; ++i) { - if (strcmp(key, gguf_get_key(ctx, i)) == 0) { - keyfound = i; - break; - } - } - - return keyfound; -} - -const char * gguf_get_key(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - return ctx->kv[key_id].key.data; -} - -enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - return ctx->kv[key_id].type; -} - -enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); - return ctx->kv[key_id].value.arr.type; -} - -const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); - return ctx->kv[key_id].value.arr.data; -} - -const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); - struct gguf_kv * kv = &ctx->kv[key_id]; - struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i]; - return str->data; -} - -int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); - return ctx->kv[key_id].value.arr.n; -} - -uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8); - return ctx->kv[key_id].value.uint8; -} - -int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8); - return ctx->kv[key_id].value.int8; -} - -uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16); - return ctx->kv[key_id].value.uint16; -} - -int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16); - return ctx->kv[key_id].value.int16; -} - -uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32); - return ctx->kv[key_id].value.uint32; -} - -int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32); - return ctx->kv[key_id].value.int32; -} - -float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32); - return ctx->kv[key_id].value.float32; -} - -uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64); - return ctx->kv[key_id].value.uint64; -} - -int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64); - return ctx->kv[key_id].value.int64; -} - -double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64); - return ctx->kv[key_id].value.float64; -} - -bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL); - return ctx->kv[key_id].value.bool_; -} - -const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING); - return ctx->kv[key_id].value.str.data; -} - -const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) { - GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY); - GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING); - return &ctx->kv[key_id].value; -} - -int gguf_get_n_tensors(const struct gguf_context * ctx) { - return ctx->header.n_tensors; -} - -int gguf_find_tensor(const struct gguf_context * ctx, const char * name) { - // return -1 if tensor not found - int tensorfound = -1; - - const int n_tensors = gguf_get_n_tensors(ctx); - - for (int i = 0; i < n_tensors; ++i) { - if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) { - tensorfound = i; - break; - } - } - - return tensorfound; -} - -size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) { - return ctx->infos[i].offset; -} - -char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) { - return ctx->infos[i].name.data; -} - -enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int i) { - return ctx->infos[i].type; -} - -// returns the index -static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) { - const int idx = gguf_find_key(ctx, key); - if (idx >= 0) { - return idx; - } - - const int n_kv = gguf_get_n_kv(ctx); - - ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv)); - ctx->kv[n_kv].key.n = strlen(key); - ctx->kv[n_kv].key.data = strdup(key); - ctx->header.n_kv++; - - return n_kv; -} - -void gguf_remove_key(struct gguf_context * ctx, const char * key) { - const int idx = gguf_find_key(ctx, key); - if (idx >= 0) { - const int n_kv = gguf_get_n_kv(ctx); - gguf_free_kv(&ctx->kv[idx]); - for (int i = idx; i < n_kv-1; ++i) { - ctx->kv[i] = ctx->kv[i+1]; - } - ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv)); - ctx->header.n_kv--; - } -} - -void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_UINT8; - ctx->kv[idx].value.uint8 = val; -} - -void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_INT8; - ctx->kv[idx].value.int8 = val; -} - -void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_UINT16; - ctx->kv[idx].value.uint16 = val; -} - -void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_INT16; - ctx->kv[idx].value.int16 = val; -} - -void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_UINT32; - ctx->kv[idx].value.uint32 = val; -} - -void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_INT32; - ctx->kv[idx].value.int32 = val; -} - -void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_FLOAT32; - ctx->kv[idx].value.float32 = val; -} - -void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_UINT64; - ctx->kv[idx].value.uint64 = val; -} - -void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_INT64; - ctx->kv[idx].value.int64 = val; -} - -void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_FLOAT64; - ctx->kv[idx].value.float64 = val; -} - -void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_BOOL; - ctx->kv[idx].value.bool_ = val; -} - -void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_STRING; - ctx->kv[idx].value.str.n = strlen(val); - ctx->kv[idx].value.str.data = strdup(val); -} - -void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_ARRAY; - ctx->kv[idx].value.arr.type = type; - ctx->kv[idx].value.arr.n = n; - ctx->kv[idx].value.arr.data = GGML_CALLOC(n, gguf_type_size(type)); - memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type)); -} - -void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) { - const int idx = gguf_get_or_add_key(ctx, key); - - ctx->kv[idx].type = GGUF_TYPE_ARRAY; - ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING; - ctx->kv[idx].value.arr.n = n; - ctx->kv[idx].value.arr.data = GGML_CALLOC(n, sizeof(struct gguf_str)); - for (int i = 0; i < n; i++) { - struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i]; - str->n = strlen(data[i]); - str->data = strdup(data[i]); - } -} - -// set or add KV pairs from another context -void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { - for (uint32_t i = 0; i < src->header.n_kv; i++) { - switch (src->kv[i].type) { - case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break; - case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break; - case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break; - case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break; - case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break; - case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break; - case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break; - case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break; - case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break; - case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break; - case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break; - case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break; - case GGUF_TYPE_ARRAY: - { - if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) { - const char ** data = GGML_CALLOC(src->kv[i].value.arr.n, sizeof(char *)); - for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) { - data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data; - } - gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); - GGML_FREE((void *)data); - } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) { - GGML_ABORT("nested arrays not supported"); - } else { - gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n); - } - } break; - default: GGML_ABORT("invalid type"); - } - } -} - -void gguf_add_tensor( - struct gguf_context * ctx, - const struct ggml_tensor * tensor) { - GGML_ASSERT(tensor); - if (gguf_find_tensor(ctx, tensor->name) != -1) { - GGML_ABORT("duplicated tensor name"); - } - - const int idx = ctx->header.n_tensors; - ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info)); - - ctx->infos[idx].name.n = strlen(tensor->name); - ctx->infos[idx].name.data = strdup(tensor->name); - - for (int i = 0; i < GGML_MAX_DIMS; ++i) { - ctx->infos[idx].ne[i] = 1; - } - - ctx->infos[idx].n_dims = ggml_n_dims(tensor); - for (uint32_t i = 0; i < ctx->infos[idx].n_dims; i++) { - ctx->infos[idx].ne[i] = tensor->ne[i]; - } - - ctx->infos[idx].type = tensor->type; - ctx->infos[idx].offset = 0; - ctx->infos[idx].data = tensor->data; - ctx->infos[idx].size = ggml_nbytes(tensor); - - if (ctx->header.n_tensors > 0) { - ctx->infos[idx].offset = ctx->infos[idx - 1].offset + GGML_PAD(ctx->infos[idx - 1].size, ctx->alignment); - } - - ctx->header.n_tensors++; -} - -void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) { - const int idx = gguf_find_tensor(ctx, name); - if (idx < 0) { - GGML_ABORT("tensor not found"); - } - - ctx->infos[idx].type = type; -} - -void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) { - const int idx = gguf_find_tensor(ctx, name); - if (idx < 0) { - GGML_ABORT("tensor not found"); - } - - ctx->infos[idx].data = data; - ctx->infos[idx].size = size; - - // update offsets - for (uint32_t i = idx + 1; i < ctx->header.n_tensors; ++i) { - ctx->infos[i].offset = ctx->infos[i - 1].offset + GGML_PAD(ctx->infos[i - 1].size, ctx->alignment); - } -} - -//static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) { -// fwrite(&val->n, sizeof(val->n), 1, file); -// fwrite(val->data, sizeof(char), val->n, file); -//} -// -//static void gguf_fwrite_el(FILE * file, const void * val, size_t size) { -// fwrite(val, sizeof(char), size, file); -//} - -struct gguf_buf gguf_buf_init(size_t size) { - struct gguf_buf buf = { - /*buf.data =*/ size == 0 ? NULL : GGML_CALLOC(1, size), - /*buf.size =*/ size, - /*buf.offset =*/ 0, - }; - - return buf; -} - -void gguf_buf_free(struct gguf_buf buf) { - if (buf.data) { - GGML_FREE(buf.data); - } -} - -static void gguf_buf_grow(struct gguf_buf * buf, size_t size) { - if (buf->offset + size > buf->size) { - buf->size = 1.5*(buf->offset + size); - if (buf->data) { - buf->data = realloc(buf->data, buf->size); - } - } -} - -static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) { - gguf_buf_grow(buf, sizeof(val->n) + val->n); - - if (buf->data) { - memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n)); - } - buf->offset += sizeof(val->n); - - if (buf->data) { - memcpy((char *) buf->data + buf->offset, val->data, val->n); - } - buf->offset += val->n; -} - -static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) { - gguf_buf_grow(buf, el_size); - - if (buf->data) { - memcpy((char *) buf->data + buf->offset, val, el_size); - } - buf->offset += el_size; -} - -void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) { - // write header - gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic)); - gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version)); - gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors)); - gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv)); - - // write key-value pairs - for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { - struct gguf_kv * kv = &ctx->kv[i]; - - gguf_bwrite_str(buf, &kv->key); - gguf_bwrite_el (buf, &kv->type, sizeof(kv->type)); - - switch (kv->type) { - case GGUF_TYPE_UINT8: gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break; - case GGUF_TYPE_INT8: gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break; - case GGUF_TYPE_UINT16: gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break; - case GGUF_TYPE_INT16: gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break; - case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break; - case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break; - case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break; - case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break; - case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break; - case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break; - case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break; - case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break; - case GGUF_TYPE_ARRAY: - { - gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type)); - gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) ); - - switch (kv->value.arr.type) { - case GGUF_TYPE_UINT8: - case GGUF_TYPE_INT8: - case GGUF_TYPE_UINT16: - case GGUF_TYPE_INT16: - case GGUF_TYPE_UINT32: - case GGUF_TYPE_INT32: - case GGUF_TYPE_FLOAT32: - case GGUF_TYPE_UINT64: - case GGUF_TYPE_INT64: - case GGUF_TYPE_FLOAT64: - case GGUF_TYPE_BOOL: - { - gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type)); - } break; - case GGUF_TYPE_STRING: - { - for (uint32_t j = 0; j < kv->value.arr.n; ++j) { - gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]); - } - } break; - case GGUF_TYPE_ARRAY: - default: GGML_ABORT("invalid type"); - } - } break; - default: GGML_ABORT("invalid type"); - } - } - - // write tensor infos - for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; - - gguf_bwrite_str(buf, &info->name); - gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims)); - for (uint32_t j = 0; j < info->n_dims; ++j) { - gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j])); - } - gguf_bwrite_el(buf, &info->type, sizeof(info->type)); - gguf_bwrite_el(buf, &info->offset, sizeof(info->offset)); - } - - // we require the data section to be aligned, so take into account any padding - { - const size_t offset = buf->offset; - const size_t offset_pad = GGML_PAD(offset, ctx->alignment); - - if (offset_pad != offset) { - uint8_t pad = 0; - for (size_t i = 0; i < offset_pad - offset; ++i) { - gguf_bwrite_el(buf, &pad, sizeof(pad)); - } - } - } - - if (only_meta) { - return; - } - - size_t offset = 0; - - // write tensor data - for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; - - const size_t size = info->size; - const size_t size_pad = GGML_PAD(size, ctx->alignment); - - gguf_bwrite_el(buf, info->data, size); - - if (size_pad != size) { - uint8_t pad = 0; - for (size_t j = 0; j < size_pad - size; ++j) { - gguf_bwrite_el(buf, &pad, sizeof(pad)); - } - } - - GGML_ASSERT(offset == info->offset); - - offset += size_pad; - } -} - -void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) { - FILE * file = ggml_fopen(fname, "wb"); - if (!file) { - GGML_ABORT("failed to open file for writing"); - } - - struct gguf_buf buf = gguf_buf_init(16*1024); - - gguf_write_to_buf(ctx, &buf, only_meta); - - fwrite(buf.data, 1, buf.offset, file); - - gguf_buf_free(buf); - - fclose(file); -} - -size_t gguf_get_meta_size(const struct gguf_context * ctx) { - // no allocs - only compute size - struct gguf_buf buf = gguf_buf_init(0); - - gguf_write_to_buf(ctx, &buf, true); - - return buf.offset; -} - -void gguf_get_meta_data(const struct gguf_context * ctx, void * data) { - struct gguf_buf buf = gguf_buf_init(16*1024); - - gguf_write_to_buf(ctx, &buf, true); - - memcpy(data, buf.data, buf.offset); - - gguf_buf_free(buf); -} - void ggml_log_set(ggml_log_callback log_callback, void * user_data) { g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default; g_logger_state.log_callback_user_data = user_data; diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp new file mode 100644 index 0000000000000..e8d751693219d --- /dev/null +++ b/ggml/src/gguf.cpp @@ -0,0 +1,1325 @@ +#include "ggml.h" +#include "ggml-backend.h" +#include "ggml-impl.h" +#include "gguf.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +template +struct type_to_gguf_type; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_UINT8; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_INT8; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_UINT16; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_INT16; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_UINT32; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_INT32; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_FLOAT32; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_BOOL; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_STRING; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_UINT64; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_INT64; +}; + +template <> +struct type_to_gguf_type { + static constexpr enum gguf_type value = GGUF_TYPE_FLOAT64; +}; + +static const std::map GGUF_TYPE_SIZE = { + {GGUF_TYPE_UINT8, sizeof(uint8_t)}, + {GGUF_TYPE_INT8, sizeof(int8_t)}, + {GGUF_TYPE_UINT16, sizeof(uint16_t)}, + {GGUF_TYPE_INT16, sizeof(int16_t)}, + {GGUF_TYPE_UINT32, sizeof(uint32_t)}, + {GGUF_TYPE_INT32, sizeof(int32_t)}, + {GGUF_TYPE_FLOAT32, sizeof(float)}, + {GGUF_TYPE_BOOL, sizeof(int8_t)}, + {GGUF_TYPE_STRING, 0}, // undefined + {GGUF_TYPE_ARRAY, 0}, // undefined + {GGUF_TYPE_UINT64, sizeof(uint64_t)}, + {GGUF_TYPE_INT64, sizeof(int64_t)}, + {GGUF_TYPE_FLOAT64, sizeof(double)}, +}; +static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); + +static const std::map GGUF_TYPE_NAME = { + {GGUF_TYPE_UINT8, "u8"}, + {GGUF_TYPE_INT8, "i8"}, + {GGUF_TYPE_UINT16, "u16"}, + {GGUF_TYPE_INT16, "i16"}, + {GGUF_TYPE_UINT32, "u32"}, + {GGUF_TYPE_INT32, "i32"}, + {GGUF_TYPE_FLOAT32, "f32"}, + {GGUF_TYPE_BOOL, "bool"}, + {GGUF_TYPE_STRING, "str"}, + {GGUF_TYPE_ARRAY, "arr"}, + {GGUF_TYPE_UINT64, "u64"}, + {GGUF_TYPE_INT64, "i64"}, + {GGUF_TYPE_FLOAT64, "f64"}, +}; +static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); + +size_t gguf_type_size(enum gguf_type type) { + auto it = GGUF_TYPE_SIZE.find(type); + return it == GGUF_TYPE_SIZE.end() ? 0 : it->second; +} + +struct gguf_kv { + std::string key; + + bool is_array; + enum gguf_type type; + + std::vector data; + std::vector data_string; + + template + gguf_kv(const std::string & key, const T value) + : key(key), is_array(false), type(type_to_gguf_type::value) { + GGML_ASSERT(!key.empty()); + data.resize(sizeof(T)); + memcpy(data.data(), &value, sizeof(T)); + } + + template + gguf_kv(const std::string & key, const std::vector & value) + : key(key), is_array(true), type(type_to_gguf_type::value) { + GGML_ASSERT(!key.empty()); + data.resize(value.size()*sizeof(T)); + for (size_t i = 0; i < value.size(); ++i) { + const T tmp = value[i]; + memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T)); + } + } + + gguf_kv(const std::string & key, const std::string & value) + : key(key), is_array(false), type(GGUF_TYPE_STRING) { + GGML_ASSERT(!key.empty()); + data_string.push_back(value); + } + + gguf_kv(const std::string & key, const std::vector & value) + : key(key), is_array(true), type(GGUF_TYPE_STRING) { + GGML_ASSERT(!key.empty()); + data_string = value; + } + + const std::string & get_key() const { + return key; + } + + const enum gguf_type & get_type() const { + return type; + } + + size_t get_ne() const { + if (type == GGUF_TYPE_STRING) { + const size_t ne = data_string.size(); + GGML_ASSERT(is_array || ne == 1); + return ne; + } + const size_t type_size = gguf_type_size(type); + GGML_ASSERT(data.size() % type_size == 0); + const size_t ne = data.size() / type_size; + GGML_ASSERT(is_array || ne == 1); + return ne; + } + + template + const T & get_val(const size_t i = 0) const { + GGML_ASSERT(type_to_gguf_type::value == type); + if constexpr (std::is_same::value) { + GGML_ASSERT(data_string.size() >= i+1); + return data_string[i]; + } + const size_t type_size = gguf_type_size(type); + GGML_ASSERT(data.size() % type_size == 0); + GGML_ASSERT(data.size() >= (i+1)*type_size); + return reinterpret_cast(data.data())[i]; + } + + void cast(const enum gguf_type new_type) { + const size_t new_type_size = gguf_type_size(new_type); + GGML_ASSERT(data.size() % new_type_size == 0); + type = new_type; + } +}; + +struct gguf_tensor_info { + struct ggml_tensor t; // for holding the equivalent info + uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT` +}; + +struct gguf_context { + uint32_t version = GGUF_VERSION; + + std::vector kv; + std::vector info; + + size_t alignment = GGUF_DEFAULT_ALIGNMENT; + size_t offset = 0; // offset of `data` from beginning of file + size_t size = 0; // size of `data` in bytes + + void * data = nullptr; +}; + +struct gguf_reader { + FILE * file; + + gguf_reader(FILE * file) : file(file) {} + + template + bool read(T & dst) { + return fread(&dst, 1, sizeof(dst), file) == sizeof(dst); + } + + template + bool read(std::vector & dst, const size_t n) { + dst.resize(n); + for (size_t i = 0; i < dst.size(); ++i) { + if constexpr (std::is_same::value) { + bool tmp; + if (!read(tmp)) { + return false; + } + dst[i] = tmp; + } else { + if (!read(dst[i])) { + return false; + } + } + } + return true; + } + + bool read(bool & dst) { + int8_t tmp = -1; + if (!read(tmp)) { + return false; + } + dst = tmp != 0; + return true; + } + + bool read(enum ggml_type & dst) { + int32_t tmp = -1; + if (!read(tmp)) { + return false; + } + dst = ggml_type(tmp); + return true; + } + + bool read(enum gguf_type & dst) { + int32_t tmp = -1; + if (!read(tmp)) { + return false; + } + dst = gguf_type(tmp); + return true; + } + + bool read(std::string & dst) { + uint64_t size = -1; + if (!read(size)) { + return false; + } + dst.resize(size); + return fread(dst.data(), 1, dst.length(), file) == dst.length(); + } + + bool read(void * dst, const size_t size) { + return fread(dst, 1, size, file) == size; + } +}; + +struct gguf_context * gguf_init_empty(void) { + return new gguf_context; +} + +template +bool gguf_read_emplace_helper(struct gguf_reader & gr, std::vector & kv, const std::string & key, const bool is_array, const size_t n) { + if (is_array) { + std::vector value; + try { + if (!gr.read(value, n)) { + return false; + } + } catch (std::length_error &) { + fprintf(stderr, "%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str()); + return false; + } catch (std::bad_alloc &) { + fprintf(stderr, "%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str()); + return false; + } + kv.emplace_back(key, value); + } else { + T value; + if (!gr.read(value)) { + return false; + } + kv.emplace_back(key, value); + } + return true; +} + +struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) { + struct gguf_reader gr(file); + struct gguf_context * ctx = new gguf_context; + + bool ok = true; + + // file magic + { + std::vector magic; + ok = ok && gr.read(magic, 4); + + if (!ok) { + fprintf(stderr, "%s: failed to read magic\n", __func__); + gguf_free(ctx); + return nullptr; + } + + for (uint32_t i = 0; i < magic.size(); i++) { + if (magic[i] != GGUF_MAGIC[i]) { + fprintf(stderr, "%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]); + gguf_free(ctx); + return nullptr; + } + } + } + + // header + int64_t n_kv = 0; + int64_t n_tensors = 0; + + if (ok && gr.read(ctx->version)) { + if (ctx->version == 1) { + fprintf(stderr, "%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__); + ok = false; + } + if (ctx->version > GGUF_VERSION) { + fprintf(stderr, "%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n", + __func__, ctx->version, GGUF_VERSION); + ok = false; + } + } else { + ok = false; + } + + if (ok && gr.read(n_tensors)) { + static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing"); + if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) { + fprintf(stderr, "%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n", + __func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info)); + ok = false; + } + } else { + ok = false; + } + + if (ok && gr.read(n_kv)) { + static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing"); + if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) { + fprintf(stderr, "%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n", + __func__, n_kv, SIZE_MAX/sizeof(gguf_kv)); + ok = false; + } + } else { + ok = false; + } + + if (!ok) { + fprintf(stderr, "%s: failed to read header\n", __func__); + gguf_free(ctx); + return nullptr; + } + + // KV pairs + { + for (int64_t i = 0; ok && i < n_kv; ++i) { + std::string key; + gguf_type type = gguf_type(-1); + bool is_array = false; + uint64_t n = 1; + + try { + ok = ok && gr.read(key); + } catch (std::length_error &) { + fprintf(stderr, "%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i); + ok = false; + } catch (std::bad_alloc &) { + fprintf(stderr, "%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i); + ok = false; + } + for (size_t j = 0; ok && j < ctx->kv.size(); ++j) { + if (key == ctx->kv[j].key) { + fprintf(stderr, "%s: duplicate key '%s' for tensors %" PRIi64 " and %" PRIi64 " \n", __func__, key.c_str(), j, i); + ok = false; + } + } + if (!ok) { + break; + } + + ok = ok && gr.read(type); + if (type == GGUF_TYPE_ARRAY) { + is_array = true; + ok = ok && gr.read(type); + ok = ok && gr.read(n); + } + if (!ok) { + break; + } + + switch (type) { + case GGUF_TYPE_UINT8: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_INT8: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_UINT16: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_INT16: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_UINT32: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_INT32: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_FLOAT32: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_BOOL: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_STRING: ok = ok && gguf_read_emplace_helper(gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_UINT64: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_INT64: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_FLOAT64: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; + case GGUF_TYPE_ARRAY: + default: + { + fprintf(stderr, "%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type); + ok = false; + } break; + } + } + + if (!ok) { + fprintf(stderr, "%s: failed to read key-value pairs\n", __func__); + gguf_free(ctx); + return nullptr; + } + GGML_ASSERT(int64_t(ctx->kv.size()) == n_kv); + + const int alignment_idx = gguf_find_key(ctx, GGUF_KEY_GENERAL_ALIGNMENT); + ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx); + + if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) { + fprintf(stderr, "%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment); + gguf_free(ctx); + return nullptr; + } + } + + // read the tensor info + for (int64_t i = 0; ok && i < n_tensors; ++i) { + struct gguf_tensor_info info; + + // tensor name + { + std::string name; + try { + ok = ok && gr.read(name); + } catch (std::length_error &) { + fprintf(stderr, "%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i); + ok = false; + } catch (std::bad_alloc &) { + fprintf(stderr, "%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i); + ok = false; + } + if (name.length() >= GGML_MAX_NAME) { + fprintf(stderr, "%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME); + ok = false; + break; + } + ggml_set_name(&info.t, name.c_str()); + + // make sure there are no duplicate tensor names + for (int64_t j = 0; ok && j < i; ++j) { + if (strcmp(info.t.name, ctx->info[j].t.name) == 0) { + fprintf(stderr, "%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i); + ok = false; + break; + } + } + } + if (!ok) { + break; + } + + // tensor shape + { + uint32_t n_dims = -1; + ok = ok && gr.read(n_dims); + if (n_dims > GGML_MAX_DIMS) { + fprintf(stderr, "%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n", + __func__, info.t.name, n_dims, GGML_MAX_DIMS); + ok = false; + break; + } + for (uint32_t j = 0; ok && j < GGML_MAX_DIMS; ++j) { + info.t.ne[j] = 1; + if (j < n_dims) { + ok = ok && gr.read(info.t.ne[j]); + } + + // check that all ne are non-negative + if (info.t.ne[j] < 0) { + fprintf(stderr, "%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n", + __func__, info.t.name, j, info.t.ne[j]); + ok = false; + break; + } + } + + // check that the total number of elements is representable + if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) || + (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) || + (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) { + + fprintf(stderr, "%s: total number of elements in tensor '%s' with shape " + "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n", + __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX); + ok = false; + break; + } + } + if (!ok) { + break; + } + + // tensor type + { + ok = ok && gr.read(info.t.type); + + // check that tensor type is within defined range + if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) { + fprintf(stderr, "%s: tensor '%s' has invalid ggml type %d (%s)\n", + __func__, info.t.name, info.t.type, ggml_type_name(info.t.type)); + ok = false; + break; + } + const size_t type_size = ggml_type_size(info.t.type); + const int64_t blck_size = ggml_blck_size(info.t.type); + + // check that row size is divisible by block size + if (blck_size == 0 || info.t.ne[0] % blck_size != 0) { + fprintf(stderr, "%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, " + "not a multiple of block size (%" PRId64 ")\n", + __func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size); + ok = false; + break; + } + + // calculate byte offsets given the tensor shape and type + info.t.nb[0] = type_size; + info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size); + for (int j = 2; j < GGML_MAX_DIMS; ++j) { + info.t.nb[j] = info.t.nb[j - 1]*info.t.ne[j - 1]; + } + } + if (!ok) { + break; + } + + // tensor data offset within buffer + ok = ok && gr.read(info.offset); + + ctx->info.push_back(info); + } + + if (!ok) { + fprintf(stderr, "%s: failed to read tensor info\n", __func__); + gguf_free(ctx); + return nullptr; + } + GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors); + + // we require the data section to be aligned, so take into account any padding + if (fseek(file, GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) { + fprintf(stderr, "%s: failed to seek to beginning of data section\n", __func__); + gguf_free(ctx); + return nullptr; + } + + // store the current file offset - this is where the data section starts + ctx->offset = ftell(file); + + // compute the total size of the data section, taking into account the alignment + { + ctx->size = 0; + for (size_t i = 0; i < ctx->info.size(); ++i) { + const gguf_tensor_info & ti = ctx->info[i]; + if (ti.offset != ctx->size) { + fprintf(stderr, "%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n", + __func__, ti.t.name, ti.offset, ctx->size); + fprintf(stderr, "%s: failed to read tensor data\n", __func__); + gguf_free(ctx); + return nullptr; + } + ctx->size += GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment); + } + } + + // load the tensor data only if requested + if (params.ctx != nullptr) { + // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob + // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of + // the ggml_tensor structs to the appropriate locations in the binary blob + + // compute the exact size needed for the new ggml_context + const size_t mem_size = + params.no_alloc ? + (n_tensors )*ggml_tensor_overhead() : + (n_tensors + 1)*ggml_tensor_overhead() + ctx->size; + + struct ggml_init_params pdata = { + /*mem_size =*/ mem_size, + /*mem_buffer =*/ nullptr, + /*no_alloc =*/ params.no_alloc, + }; + + *params.ctx = ggml_init(pdata); + if (*params.ctx == nullptr) { + fprintf(stderr, "%s: failed to initialize ggml context for storing tensors\n", __func__); + gguf_free(ctx); + return nullptr; + } + + struct ggml_context * ctx_data = *params.ctx; + + struct ggml_tensor * data = nullptr; + + if (!params.no_alloc) { + data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size); + + ok = ok && data != nullptr; + + // read the binary blob with the tensor data + ok = ok && gr.read(data->data, ctx->size); + + if (!ok) { + fprintf(stderr, "%s: failed to read tensor data binary blob\n", __func__); + ggml_free(ctx_data); + *params.ctx = nullptr; + gguf_free(ctx); + return nullptr; + } + + ctx->data = data->data; + } + + ggml_set_no_alloc(ctx_data, true); + + // create the tensors + for (size_t i = 0; i < ctx->info.size(); ++i) { + const struct gguf_tensor_info & info = ctx->info[i]; + + struct ggml_tensor * cur = ggml_new_tensor(ctx_data, info.t.type, GGML_MAX_DIMS, info.t.ne); + + ok = ok && cur != nullptr; + + if (!ok) { + break; + } + + ggml_set_name(cur, info.t.name); + + // point the data member to the appropriate location in the binary blob using the tensor info + if (!params.no_alloc) { + cur->data = (char *) data->data + info.offset; + } + } + + if (!ok) { + fprintf(stderr, "%s: failed to create tensors\n", __func__); + ggml_free(ctx_data); + *params.ctx = nullptr; + gguf_free(ctx); + return nullptr; + } + + ggml_set_no_alloc(ctx_data, params.no_alloc); + } + + return ctx; +} + +struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) { + FILE * file = ggml_fopen(fname, "rb"); + + if (!file) { + fprintf(stderr, "%s: failed to open GGUF file '%s'\n", __func__, fname); + return nullptr; + } + + struct gguf_context * result = gguf_init_from_file_impl(file, params); + fclose(file); + return result; +} + +void gguf_free(struct gguf_context * ctx) { + if (ctx == nullptr) { + return; + } + delete ctx; +} + +const char * gguf_type_name(enum gguf_type type) { + auto it = GGUF_TYPE_NAME.find(type); + return it == GGUF_TYPE_NAME.end() ? nullptr : it->second; +} + +uint32_t gguf_get_version(const struct gguf_context * ctx) { + return ctx->version; +} + +size_t gguf_get_alignment(const struct gguf_context * ctx) { + return ctx->alignment; +} + +size_t gguf_get_data_offset(const struct gguf_context * ctx) { + return ctx->offset; +} + +int64_t gguf_get_n_kv(const struct gguf_context * ctx) { + return ctx->kv.size(); +} + +int64_t gguf_find_key(const struct gguf_context * ctx, const char * key) { + // return -1 if key not found + int64_t keyfound = -1; + + const int64_t n_kv = gguf_get_n_kv(ctx); + + for (int64_t i = 0; i < n_kv; ++i) { + if (strcmp(key, gguf_get_key(ctx, i)) == 0) { + keyfound = i; + break; + } + } + + return keyfound; +} + +const char * gguf_get_key(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + return ctx->kv[key_id].get_key().c_str(); +} + +enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + return ctx->kv[key_id].is_array ? GGUF_TYPE_ARRAY : ctx->kv[key_id].get_type(); +} + +enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].is_array); + return ctx->kv[key_id].get_type(); +} + +const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING); + return ctx->kv[key_id].data.data(); +} + +const char * gguf_get_arr_str(const struct gguf_context * ctx, int64_t key_id, size_t i) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_type() == GGUF_TYPE_STRING); + return ctx->kv[key_id].data_string[i].c_str(); +} + +size_t gguf_get_arr_n(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + + if (ctx->kv[key_id].type == GGUF_TYPE_STRING) { + return ctx->kv[key_id].data_string.size(); + } + + const size_t type_size = gguf_type_size(ctx->kv[key_id].type); + GGML_ASSERT(ctx->kv[key_id].data.size() % type_size == 0); + return ctx->kv[key_id].data.size() / type_size; +} + +uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +int8_t gguf_get_val_i8(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +int16_t gguf_get_val_i16(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +int32_t gguf_get_val_i32(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +float gguf_get_val_f32(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +int64_t gguf_get_val_i64(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +double gguf_get_val_f64(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val(); +} + +const char * gguf_get_val_str(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + return ctx->kv[key_id].get_val().c_str(); +} + +const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].get_ne() == 1); + GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING); + return ctx->kv[key_id].data.data(); +} + +int64_t gguf_get_n_tensors(const struct gguf_context * ctx) { + return ctx->info.size(); +} + +int64_t gguf_find_tensor(const struct gguf_context * ctx, const char * name) { + // return -1 if tensor not found + int64_t tensor_id = -1; + + const int64_t n_tensors = gguf_get_n_tensors(ctx); + + for (int64_t i = 0; i < n_tensors; ++i) { + if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) { + tensor_id = i; + break; + } + } + + return tensor_id; +} + +size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id) { + GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx)); + return ctx->info[tensor_id].offset; +} + +const char * gguf_get_tensor_name(const struct gguf_context * ctx, int64_t tensor_id) { + GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx)); + return ctx->info[tensor_id].t.name; +} + +enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int64_t tensor_id) { + GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx)); + return ctx->info[tensor_id].t.type; +} + +size_t gguf_get_tensor_size(const struct gguf_context * ctx, int64_t tensor_id) { + GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx)); + return ggml_nbytes(&ctx->info[tensor_id].t); +} + +int64_t gguf_remove_key(struct gguf_context * ctx, const char * key) { + const int64_t key_id = gguf_find_key(ctx, key); + if (key_id >= 0) { + ctx->kv.erase(ctx->kv.begin() + key_id); + } + return key_id; +} + +template +static void gguf_check_reserved_keys(const std::string & key, const T val) { + if (key == GGUF_KEY_GENERAL_ALIGNMENT) { + if constexpr (std::is_same::value) { + GGML_ASSERT(val > 0 && (val & (val - 1)) == 0 && GGUF_KEY_GENERAL_ALIGNMENT " must be power of 2"); + } else { + GGML_ABORT(GGUF_KEY_GENERAL_ALIGNMENT " must be type u32"); + } + } +} + +void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, val); +} + +void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) { + gguf_check_reserved_keys(key, val); + gguf_remove_key(ctx, key); + ctx->kv.emplace_back(key, std::string(val)); +} + +void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n) { + gguf_check_reserved_keys(key, data); + gguf_remove_key(ctx, key); + + const size_t nbytes = n*gguf_type_size(type); + std::vector tmp(nbytes); + if (!tmp.empty()) { + memcpy(tmp.data(), data, nbytes); + } + ctx->kv.emplace_back(key, tmp); + ctx->kv.back().cast(type); +} + +void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, size_t n) { + gguf_check_reserved_keys(key, data); + gguf_remove_key(ctx, key); + + std::vector tmp(n); + for (size_t i = 0; i < n; ++i) { + tmp[i] = data[i]; + } + ctx->kv.emplace_back(key, tmp); +} + +// set or add KV pairs from another context +void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src) { + const int64_t n_kv = gguf_get_n_kv(ctx); + for (int64_t i = 0; i < n_kv; ++i) { + const struct gguf_kv & kv = src->kv[i]; + + if (!kv.is_array) { + switch (kv.get_type()) { + case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, kv.get_key().c_str(), kv.get_val()); break; + case GGUF_TYPE_STRING: gguf_set_val_str (ctx, kv.get_key().c_str(), kv.get_val().c_str()); break; + case GGUF_TYPE_ARRAY: + default: GGML_ABORT("invalid type"); + } + continue; + } + + const size_t ne = kv.get_ne(); + + switch (kv.get_type()) { + case GGUF_TYPE_UINT8: + case GGUF_TYPE_INT8: + case GGUF_TYPE_UINT16: + case GGUF_TYPE_INT16: + case GGUF_TYPE_UINT32: + case GGUF_TYPE_INT32: + case GGUF_TYPE_FLOAT32: + case GGUF_TYPE_UINT64: + case GGUF_TYPE_INT64: + case GGUF_TYPE_FLOAT64: + case GGUF_TYPE_BOOL: { + gguf_set_arr_data(ctx, kv.get_key().c_str(), kv.get_type(), kv.data.data(), ne); + } break; + case GGUF_TYPE_STRING: { + std::vector tmp(ne); + for (size_t j = 0; j < ne; ++j) { + tmp[j] = kv.data_string[j].c_str(); + } + gguf_set_arr_str(ctx, kv.get_key().c_str(), tmp.data(), ne); + } break; + case GGUF_TYPE_ARRAY: + default: GGML_ABORT("invalid type"); + } + } +} + +void gguf_add_tensor( + struct gguf_context * ctx, + const struct ggml_tensor * tensor) { + GGML_ASSERT(tensor); + if (gguf_find_tensor(ctx, tensor->name) != -1) { + GGML_ABORT("duplicate tensor name: %s", tensor->name); + } + + struct gguf_tensor_info ti; + ti.t = *tensor; + ti.offset = ctx->info.empty() ? 0 : + ctx->info.back().offset + GGML_PAD(ggml_nbytes(&ctx->info.back().t), ctx->alignment); + ctx->info.push_back(ti); +} + +void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) { + const int64_t tensor_id = gguf_find_tensor(ctx, name); + if (tensor_id < 0) { + GGML_ABORT("tensor not found: %s", name); + } + struct ggml_tensor * tensor = &ctx->info[tensor_id].t; + const size_t type_size = ggml_type_size(type); + const int64_t blck_size = ggml_blck_size(type); + + tensor->type = type; + GGML_ASSERT(tensor->ne[0] % blck_size == 0 && "tensor row size not divisible by block size of new type"); + + tensor->nb[0] = type_size; + tensor->nb[1] = tensor->nb[0]*(tensor->ne[0]/blck_size); + for (int i = 2; i < GGML_MAX_DIMS; i++) { + tensor->nb[i] = tensor->nb[i - 1]*tensor->ne[i - 1]; + } + + // update offsets + const int64_t n_tensors = gguf_get_n_tensors(ctx); + for (int64_t i = tensor_id + 1; i < n_tensors; ++i) { + ctx->info[i].offset = ctx->info[i - 1].offset + GGML_PAD(ggml_nbytes(&ctx->info[i - 1].t), ctx->alignment); + } +} + +void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data) { + const int64_t tensor_id = gguf_find_tensor(ctx, name); + if (tensor_id < 0) { + GGML_ABORT("tensor not found: %s", name); + } + + ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const +} + +struct gguf_writer { + std::vector & buf; + + gguf_writer(std::vector & buf) : buf(buf) {} + + template + void write(const T & val) { + for (size_t i = 0; i < sizeof(val); ++i) { + buf.push_back(reinterpret_cast(&val)[i]); + } + } + + void write(const std::vector & val) { + buf.insert(buf.end(), val.begin(), val.end()); + } + + void write(const bool & val) { + const int8_t val8 = val ? 1 : 0; + write(val8); + } + + void write(const std::string & val) { + { + const uint64_t n = val.length(); + write(n); + } + for (size_t i = 0; i < val.length(); ++i) { + buf.push_back(reinterpret_cast(val.data())[i]); + } + } + + void write(const char * val) { + write(std::string(val)); + } + + void write(const enum ggml_type & val) { + write(int32_t(val)); + } + + void write(const enum gguf_type & val) { + write(int32_t(val)); + } + + void write(const struct gguf_kv & kv) { + const uint64_t ne = kv.get_ne(); + + write(kv.get_key()); + + if (kv.is_array) { + write(GGUF_TYPE_ARRAY); + write(kv.get_type()); + write(ne); + } else { + write(kv.get_type()); + } + + switch (kv.get_type()) { + case GGUF_TYPE_UINT8: + case GGUF_TYPE_INT8: + case GGUF_TYPE_UINT16: + case GGUF_TYPE_INT16: + case GGUF_TYPE_UINT32: + case GGUF_TYPE_INT32: + case GGUF_TYPE_FLOAT32: + case GGUF_TYPE_UINT64: + case GGUF_TYPE_INT64: + case GGUF_TYPE_FLOAT64: { + write(kv.data); + } break; + case GGUF_TYPE_BOOL: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_STRING: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_ARRAY: + default: GGML_ABORT("invalid type"); + } + } + + void write_tensor_meta(const struct gguf_tensor_info & info) { + write(info.t.name); + + const uint32_t n_dims = ggml_n_dims(&info.t); + write(n_dims); + + for (uint32_t j = 0; j < n_dims; ++j) { + write(info.t.ne[j]); + } + write(info.t.type); + write(info.offset); + } + + void pad(const size_t alignment) { + while (buf.size() % alignment != 0) { + const int8_t zero = 0; + write(zero); + } + } + + void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) { + GGML_ASSERT(buf.size() - offset_data == info.offset); + + GGML_ASSERT(ggml_is_contiguous(&info.t)); + const size_t offset = buf.size(); + const size_t nbytes = ggml_nbytes(&info.t); + + buf.resize(offset + nbytes); + if (info.t.buffer) { + ggml_backend_tensor_get(&info.t, buf.data() + offset, 0, nbytes); + } else { + GGML_ASSERT(info.t.data); + memcpy(buf.data() + offset, info.t.data, nbytes); + } + + pad(alignment); + } +}; + +void gguf_write_to_buf(const struct gguf_context * ctx, std::vector & buf, bool only_meta) { + struct gguf_writer gw(buf); + + const int64_t n_kv = gguf_get_n_kv(ctx); + const int64_t n_tensors = gguf_get_n_tensors(ctx); + + // write header + gw.write(GGUF_MAGIC[0]); + gw.write(GGUF_MAGIC[1]); + gw.write(GGUF_MAGIC[2]); + gw.write(GGUF_MAGIC[3]); + gw.write(ctx->version); + gw.write(n_tensors); + gw.write(n_kv); + + // write key-value pairs + for (int64_t i = 0; i < n_kv; ++i) { + gw.write(ctx->kv[i]); + } + + // write tensor info + for (int64_t i = 0; i < n_tensors; ++i) { + gw.write_tensor_meta(ctx->info[i]); + } + + // we require the data section to be aligned + gw.pad(ctx->alignment); + + if (only_meta) { + return; + } + + const size_t offset_data = gw.buf.size(); + + // write tensor data + for (int64_t i = 0; i < n_tensors; ++i) { + gw.write_tensor_data(ctx->info[i], offset_data, ctx->alignment); + } +} + +bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) { + FILE * file = ggml_fopen(fname, "wb"); + + if (!file) { + fprintf(stderr, "%s: failed to open file '%s' for writing GGUF data\n", __func__, fname); + return false; + } + + std::vector buf; + gguf_write_to_buf(ctx, buf, only_meta); + const bool ok = fwrite(buf.data(), 1, buf.size(), file) == buf.size(); + fclose(file); + return ok; +} + +size_t gguf_get_meta_size(const struct gguf_context * ctx) { + // only return size + std::vector buf; + gguf_write_to_buf(ctx, buf, /*only_meta =*/ true); + return buf.size(); +} + +void gguf_get_meta_data(const struct gguf_context * ctx, void * data) { + std::vector buf; + gguf_write_to_buf(ctx, buf, /*only_meta =*/ true); + memcpy(data, buf.data(), buf.size()); +} diff --git a/src/llama-impl.cpp b/src/llama-impl.cpp index a05ba4f635c01..6ec709dd323a6 100644 --- a/src/llama-impl.cpp +++ b/src/llama-impl.cpp @@ -1,5 +1,6 @@ #include "llama-impl.h" +#include "gguf.h" #include "llama.h" #include @@ -138,7 +139,7 @@ std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { { const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); int arr_n = gguf_get_arr_n(ctx_gguf, i); - const void * data = gguf_get_arr_data(ctx_gguf, i); + const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i); std::stringstream ss; ss << "["; for (int j = 0; j < arr_n; j++) { diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index 7743b46522ce5..33e4ae1373a1a 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -18,7 +18,7 @@ const char * llama_file_version_name(llama_fver version) { } namespace GGUFMeta { - template + template struct GKV_Base_Type { static constexpr gguf_type gt = gt_; @@ -60,10 +60,11 @@ namespace GGUFMeta { public: static constexpr gguf_type gt = GGUF_TYPE_ARRAY; static ArrayInfo getter(const gguf_context *ctx, const int k) { + const enum gguf_type arr_type = gguf_get_arr_type(ctx, k); return ArrayInfo { - gguf_get_arr_type(ctx, k), + arr_type, size_t(gguf_get_arr_n(ctx, k)), - gguf_get_arr_data(ctx, k), + arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx, k), }; } }; diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 42974f8f13a36..20ead0829e230 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -875,7 +875,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // update the gguf meta data as we go gguf_set_tensor_type(ctx_outs[cur_split].get(), name.c_str(), new_type); - gguf_set_tensor_data(ctx_outs[cur_split].get(), name.c_str(), new_data, new_size); + GGML_ASSERT(gguf_get_tensor_size(ctx_outs[cur_split].get(), gguf_find_tensor(ctx_outs[cur_split].get(), name.c_str())) == new_size); + gguf_set_tensor_data(ctx_outs[cur_split].get(), name.c_str(), new_data); // write tensor data + padding fout.write((const char *) new_data, new_size); diff --git a/tests/test-gguf.cpp b/tests/test-gguf.cpp index 1bb5fb47c4317..452bd55768163 100644 --- a/tests/test-gguf.cpp +++ b/tests/test-gguf.cpp @@ -15,66 +15,71 @@ constexpr int offset_has_tensors = 2000; constexpr int offset_has_data = 3000; enum handcrafted_file_type { - HANDCRAFTED_HEADER_BAD_MAGIC = 10, - HANDCRAFTED_HEADER_BAD_VERSION_1 = 20, - HANDCRAFTED_HEADER_BAD_VERSION_FUTURE = 30, - HANDCRAFTED_HEADER_BAD_N_TENSORS = 40, - HANDCRAFTED_HEADER_BAD_N_KV = 50, - HANDCRAFTED_HEADER_EMPTY = 800, - - HANDCRAFTED_KV_BAD_KEY_SIZE = 10 + offset_has_kv, - HANDCRAFTED_KV_BAD_TYPE = 20 + offset_has_kv, - HANDCRAFTED_KV_BAD_VALUE_SIZE = 30 + offset_has_kv, - HANDCRAFTED_KV_DUPLICATE_KEY = 40 + offset_has_kv, - HANDCRAFTED_KV_SUCCESS = 800 + offset_has_kv, - - HANDCRAFTED_TENSORS_BAD_NAME_SIZE = 10 + offset_has_tensors, - HANDCRAFTED_TENSORS_BAD_N_DIMS = 20 + offset_has_tensors, - HANDCRAFTED_TENSORS_BAD_SHAPE = 30 + offset_has_tensors, - HANDCRAFTED_TENSORS_NE_TOO_BIG = 40 + offset_has_tensors, - HANDCRAFTED_TENSORS_BAD_TYPE = 50 + offset_has_tensors, - HANDCRAFTED_TENSORS_BAD_OFFSET = 60 + offset_has_tensors, - HANDCRAFTED_TENSORS_DUPLICATE_NAME = 70 + offset_has_tensors, - HANDCRAFTED_TENSORS_BAD_ALIGNMENT = 80 + offset_has_tensors, - HANDCRAFTED_TENSORS_SUCCESS = 800 + offset_has_tensors, - HANDCRAFTED_TENSORS_CUSTOM_ALIGN = 810 + offset_has_tensors, - - HANDCRAFTED_DATA_NOT_ENOUGH_DATA = 10 + offset_has_data, - HANDCRAFTED_DATA_BAD_ALIGNMENT = 20 + offset_has_data, - HANDCRAFTED_DATA_SUCCESS = 800 + offset_has_data, - HANDCRAFTED_DATA_CUSTOM_ALIGN = 810 + offset_has_data, + HANDCRAFTED_HEADER_BAD_MAGIC = 10, + HANDCRAFTED_HEADER_BAD_VERSION_1 = 20, + HANDCRAFTED_HEADER_BAD_VERSION_FUTURE = 30, + HANDCRAFTED_HEADER_BAD_N_TENSORS = 40, + HANDCRAFTED_HEADER_BAD_N_KV = 50, + HANDCRAFTED_HEADER_EMPTY = 800, + + HANDCRAFTED_KV_BAD_KEY_SIZE = 10 + offset_has_kv, + HANDCRAFTED_KV_BAD_TYPE = 20 + offset_has_kv, + // HANDCRAFTED_KV_BAD_VALUE_SIZE = 30 + offset_has_kv, // removed because it can result in allocations > 1 TB (default sanitizer limit) + HANDCRAFTED_KV_DUPLICATE_KEY = 40 + offset_has_kv, + HANDCRAFTED_KV_BAD_ALIGN = 50 + offset_has_kv, + HANDCRAFTED_KV_SUCCESS = 800 + offset_has_kv, + + HANDCRAFTED_TENSORS_BAD_NAME_SIZE = 10 + offset_has_tensors, + HANDCRAFTED_TENSORS_BAD_N_DIMS = 20 + offset_has_tensors, + HANDCRAFTED_TENSORS_BAD_SHAPE = 30 + offset_has_tensors, + HANDCRAFTED_TENSORS_NE_TOO_BIG = 40 + offset_has_tensors, + HANDCRAFTED_TENSORS_BAD_TYPE = 50 + offset_has_tensors, + HANDCRAFTED_TENSORS_BAD_OFFSET = 60 + offset_has_tensors, + HANDCRAFTED_TENSORS_DUPLICATE_NAME = 70 + offset_has_tensors, + HANDCRAFTED_TENSORS_BAD_ALIGN = 75 + offset_has_tensors, + HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN = 80 + offset_has_tensors, + HANDCRAFTED_TENSORS_SUCCESS = 800 + offset_has_tensors, + HANDCRAFTED_TENSORS_CUSTOM_ALIGN = 810 + offset_has_tensors, + + HANDCRAFTED_DATA_NOT_ENOUGH_DATA = 10 + offset_has_data, + HANDCRAFTED_DATA_BAD_ALIGN = 15 + offset_has_data, + HANDCRAFTED_DATA_INCONSISTENT_ALIGN = 20 + offset_has_data, + HANDCRAFTED_DATA_SUCCESS = 800 + offset_has_data, + HANDCRAFTED_DATA_CUSTOM_ALIGN = 810 + offset_has_data, }; std::string handcrafted_file_type_name(const enum handcrafted_file_type hft) { switch (hft) { - case HANDCRAFTED_HEADER_BAD_MAGIC: return "HEADER_BAD_MAGIC"; - case HANDCRAFTED_HEADER_BAD_VERSION_1: return "HEADER_BAD_VERSION_1"; - case HANDCRAFTED_HEADER_BAD_VERSION_FUTURE: return "HEADER_BAD_VERSION_FUTURE"; - case HANDCRAFTED_HEADER_BAD_N_KV: return "HEADER_BAD_N_KV"; - case HANDCRAFTED_HEADER_BAD_N_TENSORS: return "HEADER_BAD_N_TENSORS"; - case HANDCRAFTED_HEADER_EMPTY: return "HEADER_EMPTY"; - - case HANDCRAFTED_KV_BAD_KEY_SIZE: return "KV_BAD_KEY_SIZE"; - case HANDCRAFTED_KV_BAD_TYPE: return "KV_BAD_TYPE"; - case HANDCRAFTED_KV_BAD_VALUE_SIZE: return "KV_BAD_VALUE_SIZE"; - case HANDCRAFTED_KV_DUPLICATE_KEY: return "KV_DUPLICATE_KEY"; - case HANDCRAFTED_KV_SUCCESS: return "KV_RANDOM_KV"; - - case HANDCRAFTED_TENSORS_BAD_NAME_SIZE: return "TENSORS_BAD_NAME_SIZE"; - case HANDCRAFTED_TENSORS_BAD_N_DIMS: return "TENSORS_BAD_N_DIMS"; - case HANDCRAFTED_TENSORS_BAD_SHAPE: return "TENSORS_BAD_SHAPE"; - case HANDCRAFTED_TENSORS_NE_TOO_BIG: return "TENSORS_NE_TOO_BIG"; - case HANDCRAFTED_TENSORS_BAD_TYPE: return "TENSORS_BAD_TYPE"; - case HANDCRAFTED_TENSORS_BAD_OFFSET: return "TENSORS_BAD_OFFSET"; - case HANDCRAFTED_TENSORS_DUPLICATE_NAME: return "TENSORS_DUPLICATE_NAME"; - case HANDCRAFTED_TENSORS_BAD_ALIGNMENT: return "TENSORS_BAD_ALIGNMENT"; - case HANDCRAFTED_TENSORS_SUCCESS: return "TENSORS_SUCCESS"; - case HANDCRAFTED_TENSORS_CUSTOM_ALIGN: return "TENSORS_CUSTOM_ALIGN"; - - case HANDCRAFTED_DATA_NOT_ENOUGH_DATA: return "DATA_NOT_ENOUGH_DATA"; - case HANDCRAFTED_DATA_BAD_ALIGNMENT: return "DATA_BAD_ALIGNMENT"; - case HANDCRAFTED_DATA_SUCCESS: return "DATA_SUCCESS"; - case HANDCRAFTED_DATA_CUSTOM_ALIGN: return "DATA_CUSTOM_ALIGN"; + case HANDCRAFTED_HEADER_BAD_MAGIC: return "HEADER_BAD_MAGIC"; + case HANDCRAFTED_HEADER_BAD_VERSION_1: return "HEADER_BAD_VERSION_1"; + case HANDCRAFTED_HEADER_BAD_VERSION_FUTURE: return "HEADER_BAD_VERSION_FUTURE"; + case HANDCRAFTED_HEADER_BAD_N_KV: return "HEADER_BAD_N_KV"; + case HANDCRAFTED_HEADER_BAD_N_TENSORS: return "HEADER_BAD_N_TENSORS"; + case HANDCRAFTED_HEADER_EMPTY: return "HEADER_EMPTY"; + + case HANDCRAFTED_KV_BAD_KEY_SIZE: return "KV_BAD_KEY_SIZE"; + case HANDCRAFTED_KV_BAD_TYPE: return "KV_BAD_TYPE"; + case HANDCRAFTED_KV_DUPLICATE_KEY: return "KV_DUPLICATE_KEY"; + case HANDCRAFTED_KV_BAD_ALIGN: return "KV_BAD_ALIGN"; + case HANDCRAFTED_KV_SUCCESS: return "KV_RANDOM_KV"; + + case HANDCRAFTED_TENSORS_BAD_NAME_SIZE: return "TENSORS_BAD_NAME_SIZE"; + case HANDCRAFTED_TENSORS_BAD_N_DIMS: return "TENSORS_BAD_N_DIMS"; + case HANDCRAFTED_TENSORS_BAD_SHAPE: return "TENSORS_BAD_SHAPE"; + case HANDCRAFTED_TENSORS_NE_TOO_BIG: return "TENSORS_NE_TOO_BIG"; + case HANDCRAFTED_TENSORS_BAD_TYPE: return "TENSORS_BAD_TYPE"; + case HANDCRAFTED_TENSORS_BAD_OFFSET: return "TENSORS_BAD_OFFSET"; + case HANDCRAFTED_TENSORS_DUPLICATE_NAME: return "TENSORS_DUPLICATE_NAME"; + case HANDCRAFTED_TENSORS_BAD_ALIGN: return "TENSORS_BAD_ALIGN"; + case HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN: return "TENSORS_INCONSISTENT_ALIGN"; + case HANDCRAFTED_TENSORS_SUCCESS: return "TENSORS_SUCCESS"; + case HANDCRAFTED_TENSORS_CUSTOM_ALIGN: return "TENSORS_CUSTOM_ALIGN"; + + case HANDCRAFTED_DATA_NOT_ENOUGH_DATA: return "DATA_NOT_ENOUGH_DATA"; + case HANDCRAFTED_DATA_BAD_ALIGN: return "DATA_BAD_ALIGN"; + case HANDCRAFTED_DATA_INCONSISTENT_ALIGN: return "DATA_INCONSISTENT_ALIGN"; + case HANDCRAFTED_DATA_SUCCESS: return "DATA_SUCCESS"; + case HANDCRAFTED_DATA_CUSTOM_ALIGN: return "DATA_CUSTOM_ALIGN"; } GGML_ABORT("fatal error"); } @@ -140,31 +145,41 @@ std::vector> get_kv_types(std::mt19937 return kv_types; } -static void helper_write(const void * data, const size_t nbytes, FILE * file) { +template +static void helper_write(FILE * file, const T & val) { + GGML_ASSERT(fwrite(&val, 1, sizeof(val), file) == sizeof(val)); +} + +static void helper_write(FILE * file, const void * data, const size_t nbytes) { GGML_ASSERT(fwrite(data, 1, nbytes, file) == nbytes); } static FILE * get_handcrafted_file(const unsigned int seed, const enum handcrafted_file_type hft, const int extra_bytes = 0) { FILE * file = tmpfile(); + if (!file) { + return file; + } + std::mt19937 rng(seed); + uint32_t alignment = GGUF_DEFAULT_ALIGNMENT; if (hft == HANDCRAFTED_HEADER_BAD_MAGIC) { const char bad_magic[4] = {'F', 'U', 'G', 'G'}; - helper_write(bad_magic, sizeof(bad_magic), file); + helper_write(file, bad_magic, sizeof(bad_magic)); } else { - helper_write(GGUF_MAGIC, 4, file); + helper_write(file, GGUF_MAGIC, 4); } if (hft == HANDCRAFTED_HEADER_BAD_VERSION_1) { const uint32_t version = 1; - helper_write(&version, sizeof(version), file); + helper_write(file, version); } else if (hft == HANDCRAFTED_HEADER_BAD_VERSION_FUTURE) { const uint32_t version = GGUF_VERSION + 1; - helper_write(&version, sizeof(version), file); + helper_write(file, version); } else { const uint32_t version = GGUF_VERSION; - helper_write(&version, sizeof(version), file); + helper_write(file, version); } std::vector tensor_configs; @@ -174,10 +189,10 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft if (hft == HANDCRAFTED_HEADER_BAD_N_TENSORS) { const uint64_t n_tensors = -1; - helper_write(&n_tensors, sizeof(n_tensors), file); + helper_write(file, n_tensors); } else { const uint64_t n_tensors = tensor_configs.size(); - helper_write(&n_tensors, sizeof(n_tensors), file); + helper_write(file, n_tensors); } std::vector> kv_types; @@ -186,41 +201,49 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft } { uint64_t n_kv = kv_types.size(); - if (hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) { + if (hft == HANDCRAFTED_KV_BAD_ALIGN || + hft == HANDCRAFTED_TENSORS_BAD_ALIGN || hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || + hft == HANDCRAFTED_DATA_BAD_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) { + n_kv += 1; } else if (hft == HANDCRAFTED_HEADER_BAD_N_KV) { n_kv = -1; } - helper_write(&n_kv, sizeof(n_kv), file); + helper_write(file, n_kv); } if (hft < offset_has_kv) { + while (ftell(file) % alignment != 0) { + const char pad = 0; + helper_write(file, pad); + } + for (int i = 0; i < extra_bytes; ++i) { const char tmp = 0; - helper_write(&tmp, sizeof(tmp), file); + helper_write(file, tmp); } rewind(file); return file; } for (int i = 0; i < int(kv_types.size()); ++i) { - const enum gguf_type type = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? -1 : kv_types[i].first); - const enum gguf_type type_arr = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? -1 : kv_types[i].second); + const enum gguf_type type = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? GGUF_TYPE_COUNT : kv_types[i].first); + const enum gguf_type type_arr = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? GGUF_TYPE_COUNT : kv_types[i].second); const std::string key = "my_key_" + std::to_string((hft == HANDCRAFTED_KV_DUPLICATE_KEY ? i/2 : i)); if (hft == HANDCRAFTED_KV_BAD_KEY_SIZE) { const uint64_t n = -1; - helper_write(&n, sizeof(n), file); + helper_write(file, n); } else { const uint64_t n = key.length(); - helper_write(&n, sizeof(n), file); + helper_write(file, n); } - helper_write(key.data(), key.length(), file); + helper_write(file, key.data(), key.length()); { const int32_t type32 = int32_t(type); - helper_write(&type32, sizeof(type32), file); + helper_write(file, type32); } uint32_t data[16]; @@ -233,69 +256,67 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft if (type == GGUF_TYPE_STRING) { const uint64_t n = rng() % sizeof(data); - helper_write(&n, sizeof(n), file); - helper_write(data, n, file); + helper_write(file, n); + helper_write(file, data, n); continue; } if (type == GGUF_TYPE_ARRAY) { { const int32_t type32 = int32_t(type_arr); - helper_write(&type32, sizeof(type32), file); + helper_write(file, type32); } if (type_arr == GGUF_TYPE_STRING) { const uint64_t nstr = rng() % (16 + 1); - helper_write(&nstr, sizeof(nstr), file); + helper_write(file, nstr); for (uint64_t istr = 0; istr < nstr; ++istr) { const uint64_t n = rng() % (sizeof(uint32_t) + 1); - helper_write(&n, sizeof(n), file); - helper_write(&data[istr], n, file); + helper_write(file, n); + helper_write(file, &data[istr], n); } continue; } const size_t type_size = gguf_type_size(type_arr); const uint64_t n = (rng() % sizeof(data)) / type_size; - helper_write(&n, sizeof(n), file); - helper_write(&data, n*type_size, file); + helper_write(file, n); + helper_write(file, &data, n*type_size); continue; } - size_t type_size = hft == HANDCRAFTED_KV_BAD_TYPE ? 1 : gguf_type_size(type); - if (hft == HANDCRAFTED_KV_BAD_VALUE_SIZE) { - type_size += rng() % 3; - } - helper_write(data, type_size, file); + helper_write(file, data, hft == HANDCRAFTED_KV_BAD_TYPE ? 1 : gguf_type_size(type)); } - if (hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) { - const std::string key = "general.alignment"; - { - const uint64_t n = key.length(); - helper_write(&n, sizeof(n), file); - } - helper_write(key.data(), key.length(), file); + if (hft == HANDCRAFTED_KV_BAD_ALIGN || + hft == HANDCRAFTED_TENSORS_BAD_ALIGN || hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || + hft == HANDCRAFTED_DATA_BAD_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) { + + const uint64_t n = strlen(GGUF_KEY_GENERAL_ALIGNMENT); + helper_write(file, n); + helper_write(file, GGUF_KEY_GENERAL_ALIGNMENT, n); const int32_t type = gguf_type(GGUF_TYPE_UINT32); - helper_write(&type, sizeof(type), file); + helper_write(file, type); - const uint32_t alignment = GGUF_DEFAULT_ALIGNMENT + 1; - helper_write(&alignment, sizeof(alignment), file); + alignment = expect_context_not_null(hft) ? 1 : 13; + helper_write(file, alignment); } if (hft < offset_has_tensors) { + while (ftell(file) % alignment != 0) { + const char pad = 0; + helper_write(file, pad); + } + for (int i = 0; i < extra_bytes; ++i) { const char tmp = 0; - helper_write(&tmp, sizeof(tmp), file); + helper_write(file, tmp); } rewind(file); return file; } - uint32_t alignment = GGUF_DEFAULT_ALIGNMENT; - if (hft == HANDCRAFTED_TENSORS_BAD_ALIGNMENT || hft == HANDCRAFTED_DATA_BAD_ALIGNMENT) { - alignment -= 1; - } else if (hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) { - alignment += 1; + if (hft == HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN || hft == HANDCRAFTED_DATA_INCONSISTENT_ALIGN) { + alignment = 1; } uint64_t offset = 0; @@ -313,9 +334,9 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft } { const uint64_t n = name.length(); - helper_write(&n, sizeof(n), file); + helper_write(file, n); } - helper_write(name.data(), name.length(), file); + helper_write(file, name.data(), name.length()); uint32_t n_dims = hft == HANDCRAFTED_TENSORS_NE_TOO_BIG ? 2 : 1; for (int i = GGML_MAX_DIMS-1; i >= 1; --i) { @@ -326,35 +347,35 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft } if (hft == HANDCRAFTED_TENSORS_BAD_N_DIMS) { const uint32_t n_dims_bad = GGML_MAX_DIMS + 1; - helper_write(&n_dims_bad, sizeof(n_dims_bad), file); + helper_write(file, n_dims_bad); } else { - helper_write(&n_dims, sizeof(n_dims), file); + helper_write(file, n_dims); } if (hft == HANDCRAFTED_TENSORS_BAD_SHAPE) { for (uint32_t j = 0; j < n_dims; ++j) { const int64_t bad_dim = -1; - helper_write(&bad_dim, sizeof(bad_dim), file); + helper_write(file, bad_dim); } } else if (hft == HANDCRAFTED_TENSORS_NE_TOO_BIG){ for (uint32_t j = 0; j < n_dims; ++j) { const int64_t big_dim = 4*int64_t(INT32_MAX); - helper_write(&big_dim, sizeof(big_dim), file); + helper_write(file, big_dim); } } else { - helper_write(shape.data(), n_dims*sizeof(int64_t), file); + helper_write(file, shape.data(), n_dims*sizeof(int64_t)); } { - const int32_t type32 = hft == HANDCRAFTED_TENSORS_BAD_TYPE ? -1 : int32_t(type); - helper_write(&type32, sizeof(type32), file); + const int32_t type32 = hft == HANDCRAFTED_TENSORS_BAD_TYPE ? GGML_TYPE_COUNT : int32_t(type); + helper_write(file, type32); } if (hft == HANDCRAFTED_TENSORS_BAD_OFFSET) { const uint64_t bad_offset = -1; - helper_write(&bad_offset, sizeof(bad_offset), file); + helper_write(file, bad_offset); } else { - helper_write(&offset, sizeof(offset), file); + helper_write(file, offset); } int64_t ne = shape[0]; @@ -364,12 +385,9 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft offset += GGML_PAD(ggml_row_size(type, ne), alignment); } - const uint32_t alignment_overshoot = ftell(file) % alignment; - if (alignment_overshoot != 0) { - for (size_t i = alignment_overshoot; i < alignment; ++i) { - const char pad = 0; - helper_write(&pad, sizeof(pad), file); - } + while (ftell(file) % alignment != 0) { + const char pad = 0; + helper_write(file, pad); } if (hft >= offset_has_data) { @@ -380,13 +398,13 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft } for (uint64_t i = 0; i < nbytes; ++i) { const uint8_t random_byte = i % 256; - helper_write(&random_byte, sizeof(random_byte), file); + helper_write(file, random_byte); } } for (int i = 0; i < extra_bytes; ++i) { const char tmp = 0; - helper_write(&tmp, sizeof(tmp), file); + helper_write(file, tmp); } rewind(file); return file; @@ -505,6 +523,16 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i } const char * data_gguf = reinterpret_cast(gguf_get_arr_data(gguf_ctx, id)); + + if (type_arr == GGUF_TYPE_BOOL) { + for (size_t arr_i = 0; arr_i < arr_n; ++arr_i) { + if (bool(data8[arr_i]) != bool(data_gguf[arr_i])) { + ok = false; + } + } + continue; + } + if (!std::equal(data8, data8 + arr_n*type_size, data_gguf)) { ok = false; } @@ -512,12 +540,20 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i } const char * data_gguf = reinterpret_cast(gguf_get_val_data(gguf_ctx, id)); + + if (type == GGUF_TYPE_BOOL) { + if (bool(*data8) != bool(*data_gguf)) { + ok = false; + } + continue; + } + if (!std::equal(data8, data8 + gguf_type_size(type), data_gguf)) { ok = false; } } - const uint32_t expected_alignment = alignment_defined ? GGUF_DEFAULT_ALIGNMENT + 1 : GGUF_DEFAULT_ALIGNMENT; + const uint32_t expected_alignment = alignment_defined ? 1 : GGUF_DEFAULT_ALIGNMENT; if (gguf_get_alignment(gguf_ctx) != expected_alignment) { ok = false; } @@ -539,7 +575,7 @@ static bool handcrafted_check_tensors(const gguf_context * gguf_ctx, const unsig bool ok = true; - const int id_alignment = gguf_find_key(gguf_ctx, "general.alignment"); + const int id_alignment = gguf_find_key(gguf_ctx, GGUF_KEY_GENERAL_ALIGNMENT); const uint32_t alignment = id_alignment >= 0 ? gguf_get_val_u32(gguf_ctx, id_alignment) : GGUF_DEFAULT_ALIGNMENT; uint64_t expected_offset = 0; @@ -607,7 +643,7 @@ static bool handcrafted_check_tensor_data(const gguf_context * gguf_ctx, const u std::vector data(size); GGML_ASSERT(fseek(file, gguf_get_data_offset(gguf_ctx) + offset, SEEK_SET) == 0); - GGML_ASSERT(fread(data.data(), 1, size, file) == size); + GGML_ASSERT(fread(data.data(), 1, data.size(), file) == data.size()); for (size_t j = 0; j < size; ++j) { const uint8_t expected_byte = (j + offset) % 256; @@ -627,15 +663,15 @@ static std::pair test_handcrafted_file(const unsigned int seed) { const std::vector hfts = { HANDCRAFTED_HEADER_BAD_MAGIC, HANDCRAFTED_HEADER_BAD_VERSION_1, - // HANDCRAFTED_FILE_TYPE_BAD_VERSION_FUTURE, // FIXME + HANDCRAFTED_HEADER_BAD_VERSION_FUTURE, HANDCRAFTED_HEADER_BAD_N_KV, HANDCRAFTED_HEADER_BAD_N_TENSORS, HANDCRAFTED_HEADER_EMPTY, HANDCRAFTED_KV_BAD_KEY_SIZE, HANDCRAFTED_KV_BAD_TYPE, - // HANDCRAFTED_KV_BAD_VALUE_SIZE, // FIXME sanitizer limit - // HANDCRAFTED_FILE_TYPE_DUPLICATE_KEY, // FIXME + HANDCRAFTED_KV_DUPLICATE_KEY, + HANDCRAFTED_KV_BAD_ALIGN, HANDCRAFTED_KV_SUCCESS, HANDCRAFTED_TENSORS_BAD_NAME_SIZE, @@ -643,14 +679,16 @@ static std::pair test_handcrafted_file(const unsigned int seed) { HANDCRAFTED_TENSORS_BAD_SHAPE, HANDCRAFTED_TENSORS_NE_TOO_BIG, HANDCRAFTED_TENSORS_BAD_TYPE, - // HANDCRAFTED_TENSORS_BAD_OFFSET, // FIXME + HANDCRAFTED_TENSORS_BAD_OFFSET, HANDCRAFTED_TENSORS_DUPLICATE_NAME, - // HANDCRAFTED_TENSORS_BAD_ALIGNMENT, // FIXME + HANDCRAFTED_TENSORS_BAD_ALIGN, + HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN, HANDCRAFTED_TENSORS_SUCCESS, HANDCRAFTED_TENSORS_CUSTOM_ALIGN, HANDCRAFTED_DATA_NOT_ENOUGH_DATA, - // HANDCRAFTED_DATA_BAD_ALIGNMENT, // FIXME + HANDCRAFTED_DATA_BAD_ALIGN, + HANDCRAFTED_DATA_INCONSISTENT_ALIGN, HANDCRAFTED_DATA_SUCCESS, HANDCRAFTED_DATA_CUSTOM_ALIGN, }; @@ -674,6 +712,7 @@ static std::pair test_handcrafted_file(const unsigned int seed) { /*no_alloc =*/ false, /*ctx =*/ hft >= offset_has_data ? &ctx : nullptr, }; + struct gguf_context * gguf_ctx = gguf_init_from_file_impl(file, gguf_params); if (expect_context_not_null(hft)) { @@ -689,7 +728,7 @@ static std::pair test_handcrafted_file(const unsigned int seed) { } ntest++; - if (false && hft >= offset_has_data && !expect_context_not_null(hft)) { // FIXME + if (hft >= offset_has_data && !expect_context_not_null(hft)) { printf("%s: - no_dangling_ggml_context_pointer: ", __func__); if (ctx) { printf("\033[1;31mFAIL\033[0m\n"); @@ -700,23 +739,6 @@ static std::pair test_handcrafted_file(const unsigned int seed) { ntest++; } - if (false && expect_context_not_null(hft)) { // FIXME - FILE * file_eb = get_handcrafted_file(seed, hft, /*extra_bytes =*/ 1); - struct gguf_context * gguf_ctx_eb = gguf_init_from_file_impl(file_eb, gguf_params); - - printf("%s: - context_null_with_extra_bytes: ", __func__); - if (gguf_ctx_eb) { - printf("\033[1;31mFAIL\033[0m\n"); - } else { - printf("\033[1;32mOK\033[0m\n"); - npass++; - } - ntest++; - - gguf_free(gguf_ctx_eb); - fclose(file_eb); - } - const bool alignment_defined = hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN; if (expect_context_not_null(hft)) { @@ -763,14 +785,15 @@ static std::pair test_handcrafted_file(const unsigned int seed) { ntest++; } + fclose(file); if (gguf_ctx) { ggml_free(ctx); gguf_free(gguf_ctx); } - fclose(file); printf("\n"); } + return std::make_pair(npass, ntest); } @@ -789,10 +812,6 @@ static struct random_gguf_context_result get_random_gguf_context(ggml_backend_t const std::string key = "my_key_" + std::to_string(rng() % 1024); const enum gguf_type type = gguf_type(rng() % GGUF_TYPE_COUNT); - if (type == GGUF_TYPE_STRING || type == GGUF_TYPE_ARRAY) { - continue; // FIXME memory leak - } - switch (type) { case GGUF_TYPE_UINT8: gguf_set_val_u8 (gguf_ctx, key.c_str(), rng() % (1 << 7)); break; case GGUF_TYPE_INT8: gguf_set_val_i8 (gguf_ctx, key.c_str(), rng() % (1 << 7) - (1 << 6)); break; @@ -826,6 +845,9 @@ static struct random_gguf_context_result get_random_gguf_context(ggml_backend_t std::vector random_data((nbytes + sizeof(uint32_t) - 1) / sizeof(uint32_t)); for (size_t j = 0; j < random_data.size(); ++j) { random_data[j] = rng(); + if (type_arr == GGUF_TYPE_BOOL) { + random_data[j] &= 0x01010101; // the sanitizer complains if booleans are not 0 or 1 + } } gguf_set_arr_data(gguf_ctx, key.c_str(), type_arr, random_data.data(), ne); } break; @@ -928,6 +950,17 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other continue; } + if (type_arr == GGUF_TYPE_BOOL) { + const int8_t * data = reinterpret_cast(gguf_get_arr_data(ctx, id)); + const int8_t * data_other = reinterpret_cast(gguf_get_arr_data(other, idx_other)); + for (int arr_i = 0; arr_i < arr_n; ++arr_i) { + if (bool(data[arr_i]) != bool(data_other[arr_i])) { + ok = false; + } + } + continue; + } + if (type_arr == GGUF_TYPE_STRING) { for (int arr_i = 0; arr_i < arr_n; ++arr_i) { const std::string str = gguf_get_arr_str(ctx, id, arr_i); @@ -939,8 +972,8 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other continue; } - const char * data = reinterpret_cast(gguf_get_arr_data(ctx, id)); - const char * data_other = reinterpret_cast(gguf_get_arr_data(other, idx_other)); + const int8_t * data = reinterpret_cast(gguf_get_arr_data(ctx, id)); + const int8_t * data_other = reinterpret_cast(gguf_get_arr_data(other, idx_other)); if (!std::equal(data, data + arr_n*gguf_type_size(type_arr), data_other)) { ok = false; } @@ -1028,21 +1061,6 @@ static bool same_tensor_data(const struct ggml_context * orig, const struct ggml } static std::pair test_roundtrip(ggml_backend_dev_t dev, const unsigned int seed, const bool only_meta) { - FILE * file = tmpfile(); -#ifdef _WIN32 - if (!file) { - printf("%s: failed to create tmpfile(), needs elevated privileges on Windows"); - printf("%s: skipping tests"); - return std::make_pair(0, 0); - } -#else - GGML_ASSERT(file); -#endif // _WIN32 - - if (ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_CPU) { - return std::make_pair(0, 0); // FIXME - } - ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr); printf("%s: device=%s, backend=%s, only_meta=%s\n", __func__, ggml_backend_dev_description(dev), ggml_backend_name(backend), only_meta ? "yes" : "no"); @@ -1060,10 +1078,24 @@ static std::pair test_roundtrip(ggml_backend_dev_t dev, const unsigned bbuf = result.buffer; } - struct gguf_buf gbuf = gguf_buf_init(16 * 1024); - gguf_write_to_buf(gguf_ctx_0, &gbuf, only_meta); - helper_write(gbuf.data, gbuf.offset, file); - rewind(file); + FILE * file = tmpfile(); + +#ifdef _WIN32 + if (!file) { + printf("%s: failed to create tmpfile(), needs elevated privileges on Windows"); + printf("%s: skipping tests"); + return; + } +#else + GGML_ASSERT(file); +#endif // _WIN32 + + { + std::vector buf; + gguf_write_to_buf(gguf_ctx_0, buf, only_meta); + GGML_ASSERT(fwrite(buf.data(), 1, buf.size(), file) == buf.size()); + rewind(file); + } struct ggml_context * ctx_1 = nullptr; struct gguf_init_params gguf_params = { @@ -1151,9 +1183,8 @@ static std::pair test_roundtrip(ggml_backend_dev_t dev, const unsigned ggml_free(ctx_1); gguf_free(gguf_ctx_0); gguf_free(gguf_ctx_1); - gguf_buf_free(gbuf); ggml_backend_free(backend); - GGML_ASSERT(fclose(file) == 0); + fclose(file); printf("\n"); return std::make_pair(npass, ntest);