From 19f2ff13629a09f5c0f8efe8ddb7936008c09163 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Wed, 5 Feb 2025 15:11:09 +0100
Subject: [PATCH 1/4] common : add default embeddings presets

This commit adds default embeddings presets for the following models:
- bge-small-en-v1.5
- e5-small-v2
- gte-small

These can be used with llama-embedding and llama-server.

For example, with llama-embedding:
```console
./build/bin/llama-embedding --embd-gte-small-default -p "Hello, how are you?"
```

And with llama-server:
```console
./build/bin/llama-server --embd-gte-small-default
```
And the embeddings endpoint can then be called with a POST request:
```console
curl --request POST \
    --url http://localhost:8080/embeddings \
    --header "Content-Type: application/json" \
    --data '{"input": "Hello, how are you?"}'
```

I'm not sure if these are the most common embedding models but hopefully
this can be a good starting point for discussion and further
improvements.

Refs: https://github.com/ggerganov/llama.cpp/issues/10932
---
 common/arg.cpp | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/common/arg.cpp b/common/arg.cpp
index 76b8988819cda..86226b2ec024f 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2324,5 +2324,47 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         }
     ).set_examples({LLAMA_EXAMPLE_TTS}));
 
+    add_opt(common_arg(
+        {"--embd-bge-small-en-default"},
+        string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"),
+        [](common_params & params) {
+            params.hf_repo = "CompendiumLabs/bge-small-en-v1.5-gguf";
+            params.hf_file = "bge-small-en-v1.5-q4_k_m.gguf";
+            params.pooling_type = LLAMA_POOLING_TYPE_NONE;
+            params.embd_normalize = 2;
+            params.n_ctx = 512;
+            params.verbose_prompt = true;
+            params.embedding = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
+
+    add_opt(common_arg(
+        {"--embd-e5-small-en-default"},
+        string_format("use default e5-small-v2 model (note: can download weights from the internet)"),
+        [](common_params & params) {
+            params.hf_repo = "ChristianAzinn/e5-small-v2-gguf";
+            params.hf_file = "e5-small-v2.Q4_K_M.gguf";
+            params.pooling_type = LLAMA_POOLING_TYPE_NONE;
+            params.embd_normalize = 2;
+            params.n_ctx = 512;
+            params.verbose_prompt = true;
+            params.embedding = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
+
+    add_opt(common_arg(
+        {"--embd-gte-small-default"},
+        string_format("use default gte-small model (note: can download weights from the internet)"),
+        [](common_params & params) {
+            params.hf_repo = "ChristianAzinn/gte-small-gguf";
+            params.hf_file = "gte-small.Q4_K_M.gguf";
+            params.pooling_type = LLAMA_POOLING_TYPE_NONE;
+            params.embd_normalize = 2;
+            params.n_ctx = 512;
+            params.verbose_prompt = true;
+            params.embedding = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
+
     return ctx_arg;
 }

From e07000b525d7193240ace18400a326504644e305 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Thu, 6 Feb 2025 08:55:11 +0100
Subject: [PATCH 2/4] squash! common : add default embeddings presets [no ci]

Default to Q8_0 quantization.
---
 common/arg.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 86226b2ec024f..2b3581e71f5aa 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2329,7 +2329,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"),
         [](common_params & params) {
             params.hf_repo = "CompendiumLabs/bge-small-en-v1.5-gguf";
-            params.hf_file = "bge-small-en-v1.5-q4_k_m.gguf";
+            params.hf_file = "bge-small-en-v1.5-q8_0.gguf";
             params.pooling_type = LLAMA_POOLING_TYPE_NONE;
             params.embd_normalize = 2;
             params.n_ctx = 512;
@@ -2343,7 +2343,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         string_format("use default e5-small-v2 model (note: can download weights from the internet)"),
         [](common_params & params) {
             params.hf_repo = "ChristianAzinn/e5-small-v2-gguf";
-            params.hf_file = "e5-small-v2.Q4_K_M.gguf";
+            params.hf_file = "e5-small-v2.Q8_0.gguf";
             params.pooling_type = LLAMA_POOLING_TYPE_NONE;
             params.embd_normalize = 2;
             params.n_ctx = 512;
@@ -2357,7 +2357,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         string_format("use default gte-small model (note: can download weights from the internet)"),
         [](common_params & params) {
             params.hf_repo = "ChristianAzinn/gte-small-gguf";
-            params.hf_file = "gte-small.Q4_K_M.gguf";
+            params.hf_file = "gte-small.Q8_0.gguf";
             params.pooling_type = LLAMA_POOLING_TYPE_NONE;
             params.embd_normalize = 2;
             params.n_ctx = 512;

From 61f410f9e43913d3f0bb96c47c5d7c3ebfa2a991 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Thu, 6 Feb 2025 09:50:22 +0100
Subject: [PATCH 3/4] squash! common : add default embeddings presets

Update gte-small model to ggml-org/gte-small-Q8_0-GGUF.
---
 common/arg.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 2b3581e71f5aa..fee12ec9b7c6b 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2356,8 +2356,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         {"--embd-gte-small-default"},
         string_format("use default gte-small model (note: can download weights from the internet)"),
         [](common_params & params) {
-            params.hf_repo = "ChristianAzinn/gte-small-gguf";
-            params.hf_file = "gte-small.Q8_0.gguf";
+            params.hf_repo = "ggml-org/gte-small-Q8_0-GGUF";
+            params.hf_file = "gte-small-q8_0.gguf";
             params.pooling_type = LLAMA_POOLING_TYPE_NONE;
             params.embd_normalize = 2;
             params.n_ctx = 512;

From d1d0a61b004225299f507acd807a7318a6fb8c97 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Thu, 6 Feb 2025 10:43:16 +0100
Subject: [PATCH 4/4] squash! common : add default embeddings presets

Update the remaining presets to use the models from ggml-org.
---
 common/arg.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index fee12ec9b7c6b..152f671ab738e 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2328,7 +2328,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         {"--embd-bge-small-en-default"},
         string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"),
         [](common_params & params) {
-            params.hf_repo = "CompendiumLabs/bge-small-en-v1.5-gguf";
+            params.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF";
             params.hf_file = "bge-small-en-v1.5-q8_0.gguf";
             params.pooling_type = LLAMA_POOLING_TYPE_NONE;
             params.embd_normalize = 2;
@@ -2342,8 +2342,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         {"--embd-e5-small-en-default"},
         string_format("use default e5-small-v2 model (note: can download weights from the internet)"),
         [](common_params & params) {
-            params.hf_repo = "ChristianAzinn/e5-small-v2-gguf";
-            params.hf_file = "e5-small-v2.Q8_0.gguf";
+            params.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF";
+            params.hf_file = "e5-small-v2-q8_0.gguf";
             params.pooling_type = LLAMA_POOLING_TYPE_NONE;
             params.embd_normalize = 2;
             params.n_ctx = 512;