Skip to content

Commit

Permalink
GGUF: C++ refactor, backend support, misc fixes
Browse files Browse the repository at this point in the history
remove ggml_tensor.backend

update CODEOWNERS [no ci]

remove gguf_get_data from API

revise GGUF API data types
  • Loading branch information
JohannesGaessler committed Jan 3, 2025
1 parent 4b0c638 commit b87784d
Show file tree
Hide file tree
Showing 21 changed files with 1,800 additions and 1,632 deletions.
6 changes: 6 additions & 0 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,9 @@
/ci/ @ggerganov
/.devops/ @ngxson
/examples/server/ @ngxson
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
/ggml/src/ggml-opt.cpp @JohannesGaessler
/ggml/src/gguf.cpp @JohannesGaessler
3 changes: 3 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
#endif

#include "ggml.h"
#include "gguf.h"

#include "common.h"
#include "log.h"
// Change JSON_ASSERT from assert() to GGML_ASSERT:
Expand Down
2 changes: 2 additions & 0 deletions examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include "ggml.h"
#include "gguf.h"

#include "llama.h"
#include "common.h"
#include "log.h"
Expand Down
4 changes: 3 additions & 1 deletion examples/cvector-generator/cvector-generator.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include "ggml.h"
#include "gguf.h"

#include "arg.h"
#include "common.h"
#include "llama.h"
#include "ggml.h"
#include "pca.hpp"
#include "mean.hpp"

Expand Down
6 changes: 4 additions & 2 deletions examples/export-lora/export-lora.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include "arg.h"
#include "common.h"
#include "ggml.h"
#include "ggml-alloc.h"
#include "gguf.h"

#include "arg.h"
#include "common.h"

#include <map>
#include <vector>
Expand Down
1 change: 1 addition & 0 deletions examples/gguf-hash/gguf-hash.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "ggml.h"
#include "gguf.h"

#include <cstdlib> /* abort() */
#include <cstddef>
Expand Down
14 changes: 8 additions & 6 deletions examples/gguf-split/gguf-split.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
#include "ggml.h"
#include "gguf.h"
#include "llama.h"
#include "common.h"

#include <algorithm>
#include <cinttypes>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <stdexcept>
#include <cstring>
#include <fstream>
#include <string>
#include <vector>
#include <climits>

#include <cstdio>
#include <cstring>
#include <stdexcept>

#if defined(_WIN32)
#include <windows.h>
Expand Down Expand Up @@ -296,7 +298,7 @@ struct split_strategy {
total_size += ggml_nbytes(t);
}
total_size = total_size / 1000 / 1000; // convert to megabytes
printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
i_split++;
}
}
Expand Down
16 changes: 10 additions & 6 deletions examples/gguf/gguf.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
#include "ggml.h"
#include "gguf.h"

#include <cstdio>
#include <cinttypes>
#include <string>
#include <sstream>
#include <fstream>
#include <vector>

#undef MIN
Expand Down Expand Up @@ -135,9 +134,10 @@ static bool gguf_ex_read_0(const std::string & fname) {

for (int i = 0; i < n_tensors; ++i) {
const char * name = gguf_get_tensor_name (ctx, i);
const size_t size = gguf_get_tensor_size (ctx, i);
const size_t offset = gguf_get_tensor_offset(ctx, i);

printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
}
}

Expand Down Expand Up @@ -182,9 +182,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {

for (int i = 0; i < n_tensors; ++i) {
const char * name = gguf_get_tensor_name (ctx, i);
const size_t size = gguf_get_tensor_size (ctx, i);
const size_t offset = gguf_get_tensor_offset(ctx, i);

printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
}
}

Expand All @@ -199,7 +200,8 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {

struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);

printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);

// print first 10 elements
const float * data = (const float *) cur->data;
Expand All @@ -215,7 +217,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
const float * data = (const float *) cur->data;
for (int j = 0; j < ggml_nelements(cur); ++j) {
if (data[j] != 100 + i) {
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
gguf_free(ctx);
return false;
}
Expand Down Expand Up @@ -245,6 +247,8 @@ int main(int argc, char ** argv) {
check_data = false;
}

srand(123456);

const std::string fname(argv[1]);
const std::string mode (argv[2]);

Expand Down
6 changes: 4 additions & 2 deletions examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "ggml-cpu.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "gguf.h"

//#ifdef GGML_USE_CUDA
//#include "ggml-cuda.h"
Expand Down Expand Up @@ -262,7 +263,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
int arr_n = gguf_get_arr_n(ctx_gguf, i);
const void * data = gguf_get_arr_data(ctx_gguf, i);
const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "[";
for (int j = 0; j < arr_n; j++) {
Expand Down Expand Up @@ -2734,7 +2735,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
total_size_org += orig_size;
total_size_new += new_size;
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size);
gguf_set_tensor_data(ctx_out, name.c_str(), new_data);
fout.write((const char *)new_data, new_size);
size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size;
for (size_t j = 0; j < pad; ++j) {
Expand Down
3 changes: 2 additions & 1 deletion ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,8 @@ set(GGML_PUBLIC_HEADERS
include/ggml-metal.h
include/ggml-rpc.h
include/ggml-sycl.h
include/ggml-vulkan.h)
include/ggml-vulkan.h
include/gguf.h)

set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
#if (GGML_METAL)
Expand Down
1 change: 1 addition & 0 deletions ggml/include/ggml-cpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "gguf.h"
#include <memory>

// Smart pointers for ggml types
Expand Down
140 changes: 0 additions & 140 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,6 @@
#define GGML_ROPE_TYPE_MROPE 8
#define GGML_ROPE_TYPE_VISION 24

#define GGUF_MAGIC "GGUF"

#define GGUF_VERSION 3

#define GGUF_DEFAULT_ALIGNMENT 32

#define GGML_UNUSED(x) (void)(x)

#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
Expand Down Expand Up @@ -403,12 +397,6 @@ extern "C" {
GGML_PREC_F32,
};

enum ggml_backend_type {
GGML_BACKEND_TYPE_CPU = 0,
GGML_BACKEND_TYPE_GPU = 10,
GGML_BACKEND_TYPE_GPU_SPLIT = 20,
};

// model file types
enum ggml_ftype {
GGML_FTYPE_UNKNOWN = -1,
Expand Down Expand Up @@ -587,8 +575,6 @@ extern "C" {
struct ggml_tensor {
enum ggml_type type;

GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");

struct ggml_backend_buffer * buffer;

int64_t ne[GGML_MAX_DIMS]; // number of elements
Expand Down Expand Up @@ -2111,132 +2097,6 @@ extern "C" {
int64_t n_per_row,
const float * imatrix);

//
// gguf
//

enum gguf_type {
GGUF_TYPE_UINT8 = 0,
GGUF_TYPE_INT8 = 1,
GGUF_TYPE_UINT16 = 2,
GGUF_TYPE_INT16 = 3,
GGUF_TYPE_UINT32 = 4,
GGUF_TYPE_INT32 = 5,
GGUF_TYPE_FLOAT32 = 6,
GGUF_TYPE_BOOL = 7,
GGUF_TYPE_STRING = 8,
GGUF_TYPE_ARRAY = 9,
GGUF_TYPE_UINT64 = 10,
GGUF_TYPE_INT64 = 11,
GGUF_TYPE_FLOAT64 = 12,
GGUF_TYPE_COUNT, // marks the end of the enum
};

struct gguf_context;

struct gguf_init_params {
bool no_alloc;

// if not NULL, create a ggml_context and allocate the tensor data in it
struct ggml_context ** ctx;
};

GGML_API struct gguf_context * gguf_init_empty(void);
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
//GGML_API struct gguf_context * gguf_init_from_buffer(..);

GGML_API void gguf_free(struct gguf_context * ctx);

GGML_API const char * gguf_type_name(enum gguf_type type);

GGML_API int gguf_get_version (const struct gguf_context * ctx);
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
GGML_API void * gguf_get_data (const struct gguf_context * ctx);

GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);

GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);

// will abort if the wrong type is used for the key
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);

GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);

// removes key if it exists
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);

// overrides existing values or adds a new one
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);

// set or add KV pairs from another context
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);

// manage tensor info
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);

// writing gguf files can be done in 2 ways:
//
// - write the entire gguf_context to a binary file in a single pass:
//
// gguf_write_to_file(ctx, fname);
//
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
//
// FILE * f = fopen(fname, "wb");
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
// fwrite(f, ...);
// void * data = gguf_meta_get_meta_data(ctx);
// fseek(f, 0, SEEK_SET);
// fwrite(f, data, gguf_get_meta_size(ctx));
// free(data);
// fclose(f);
//

// write the entire context to a binary file
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);

// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);

#ifdef __cplusplus
// restrict not standard in C++
# if defined(__GNUC__)
Expand Down
Loading

0 comments on commit b87784d

Please sign in to comment.