From 19f2ff13629a09f5c0f8efe8ddb7936008c09163 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 5 Feb 2025 15:11:09 +0100 Subject: [PATCH 1/4] common : add default embeddings presets This commit adds default embeddings presets for the following models: - bge-small-en-v1.5 - e5-small-v2 - gte-small These can be used with llama-embedding and llama-server. For example, with llama-embedding: ```console ./build/bin/llama-embedding --embd-gte-small-default -p "Hello, how are you?" ``` And with llama-server: ```console ./build/bin/llama-server --embd-gte-small-default ``` And the embeddings endpoint can then be called with a POST request: ```console curl --request POST \ --url http://localhost:8080/embeddings \ --header "Content-Type: application/json" \ --data '{"input": "Hello, how are you?"}' ``` I'm not sure if these are the most common embedding models but hopefully this can be a good starting point for discussion and further improvements. Refs: https://github.com/ggerganov/llama.cpp/issues/10932 --- common/arg.cpp | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/common/arg.cpp b/common/arg.cpp index 76b8988819cda..86226b2ec024f 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2324,5 +2324,47 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } ).set_examples({LLAMA_EXAMPLE_TTS})); + add_opt(common_arg( + {"--embd-bge-small-en-default"}, + string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"), + [](common_params & params) { + params.hf_repo = "CompendiumLabs/bge-small-en-v1.5-gguf"; + params.hf_file = "bge-small-en-v1.5-q4_k_m.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--embd-e5-small-en-default"}, + string_format("use default e5-small-v2 model (note: can download weights from the internet)"), + [](common_params & params) { + params.hf_repo = "ChristianAzinn/e5-small-v2-gguf"; + params.hf_file = "e5-small-v2.Q4_K_M.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--embd-gte-small-default"}, + string_format("use default gte-small model (note: can download weights from the internet)"), + [](common_params & params) { + params.hf_repo = "ChristianAzinn/gte-small-gguf"; + params.hf_file = "gte-small.Q4_K_M.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + return ctx_arg; } From e07000b525d7193240ace18400a326504644e305 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 6 Feb 2025 08:55:11 +0100 Subject: [PATCH 2/4] squash! common : add default embeddings presets [no ci] Default to Q8_0 quantization. --- common/arg.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 86226b2ec024f..2b3581e71f5aa 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2329,7 +2329,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"), [](common_params & params) { params.hf_repo = "CompendiumLabs/bge-small-en-v1.5-gguf"; - params.hf_file = "bge-small-en-v1.5-q4_k_m.gguf"; + params.hf_file = "bge-small-en-v1.5-q8_0.gguf"; params.pooling_type = LLAMA_POOLING_TYPE_NONE; params.embd_normalize = 2; params.n_ctx = 512; @@ -2343,7 +2343,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("use default e5-small-v2 model (note: can download weights from the internet)"), [](common_params & params) { params.hf_repo = "ChristianAzinn/e5-small-v2-gguf"; - params.hf_file = "e5-small-v2.Q4_K_M.gguf"; + params.hf_file = "e5-small-v2.Q8_0.gguf"; params.pooling_type = LLAMA_POOLING_TYPE_NONE; params.embd_normalize = 2; params.n_ctx = 512; @@ -2357,7 +2357,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("use default gte-small model (note: can download weights from the internet)"), [](common_params & params) { params.hf_repo = "ChristianAzinn/gte-small-gguf"; - params.hf_file = "gte-small.Q4_K_M.gguf"; + params.hf_file = "gte-small.Q8_0.gguf"; params.pooling_type = LLAMA_POOLING_TYPE_NONE; params.embd_normalize = 2; params.n_ctx = 512; From 61f410f9e43913d3f0bb96c47c5d7c3ebfa2a991 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 6 Feb 2025 09:50:22 +0100 Subject: [PATCH 3/4] squash! common : add default embeddings presets Update gte-small model to ggml-org/gte-small-Q8_0-GGUF. --- common/arg.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 2b3581e71f5aa..fee12ec9b7c6b 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2356,8 +2356,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--embd-gte-small-default"}, string_format("use default gte-small model (note: can download weights from the internet)"), [](common_params & params) { - params.hf_repo = "ChristianAzinn/gte-small-gguf"; - params.hf_file = "gte-small.Q8_0.gguf"; + params.hf_repo = "ggml-org/gte-small-Q8_0-GGUF"; + params.hf_file = "gte-small-q8_0.gguf"; params.pooling_type = LLAMA_POOLING_TYPE_NONE; params.embd_normalize = 2; params.n_ctx = 512; From d1d0a61b004225299f507acd807a7318a6fb8c97 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 6 Feb 2025 10:43:16 +0100 Subject: [PATCH 4/4] squash! common : add default embeddings presets Update the remaining presets to use the models from ggml-org. --- common/arg.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index fee12ec9b7c6b..152f671ab738e 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2328,7 +2328,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--embd-bge-small-en-default"}, string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"), [](common_params & params) { - params.hf_repo = "CompendiumLabs/bge-small-en-v1.5-gguf"; + params.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF"; params.hf_file = "bge-small-en-v1.5-q8_0.gguf"; params.pooling_type = LLAMA_POOLING_TYPE_NONE; params.embd_normalize = 2; @@ -2342,8 +2342,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--embd-e5-small-en-default"}, string_format("use default e5-small-v2 model (note: can download weights from the internet)"), [](common_params & params) { - params.hf_repo = "ChristianAzinn/e5-small-v2-gguf"; - params.hf_file = "e5-small-v2.Q8_0.gguf"; + params.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF"; + params.hf_file = "e5-small-v2-q8_0.gguf"; params.pooling_type = LLAMA_POOLING_TYPE_NONE; params.embd_normalize = 2; params.n_ctx = 512;