From ede11f697aa1bb5fc8893bf8ab6d1fbdcdf4e22c Mon Sep 17 00:00:00 2001 From: Shay Rojansky Date: Fri, 29 Mar 2024 06:54:46 +0100 Subject: [PATCH] Support Milvus 2.4 GPU indexes Closes #74 --- Milvus.Client.Tests/CollectionTests.cs | 2 +- Milvus.Client.Tests/IndexTests.cs | 19 ++++++++++++ Milvus.Client.Tests/Utils.cs | 22 ++++++++++++++ Milvus.Client/IndexType.cs | 40 +++++++++++++++++++++++++ Milvus.Client/MilvusCollection.Index.cs | 6 ++++ 5 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 Milvus.Client.Tests/Utils.cs diff --git a/Milvus.Client.Tests/CollectionTests.cs b/Milvus.Client.Tests/CollectionTests.cs index 5bfd71a..0b97168 100644 --- a/Milvus.Client.Tests/CollectionTests.cs +++ b/Milvus.Client.Tests/CollectionTests.cs @@ -287,7 +287,7 @@ public async Task Compact() }); long compactionId = await collection.CompactAsync(); - if ((await Client.GetVersionAsync()).StartsWith("v2.4.", StringComparison.Ordinal)) + if (await Client.GetParsedMilvusVersion() >= new Version(2, 4)) { // Milvus 2.4 returns -1 here as the compaction ID return; diff --git a/Milvus.Client.Tests/IndexTests.cs b/Milvus.Client.Tests/IndexTests.cs index 7136b82..5dbe0ed 100644 --- a/Milvus.Client.Tests/IndexTests.cs +++ b/Milvus.Client.Tests/IndexTests.cs @@ -57,6 +57,25 @@ await Collection.CreateIndexAsync( await Collection.WaitForIndexBuildAsync("float_vector"); } + [Theory] + [InlineData(IndexType.GpuCagra, """{ "nlist": "8" }""")] + [InlineData(IndexType.GpuIvfFlat, """{ "nlist": "8" }""")] + [InlineData(IndexType.GpuIvfPq, """{ "nlist": "8", "m": "4" }""")] + [InlineData(IndexType.GpuBruteForce, """{ "nlist": "8" }""")] + public async Task Index_types_float_gpu(IndexType indexType, string extraParamsString) + { + if (await Client.GetParsedMilvusVersion() < new Version(2, 4)) + { + // GPU indexes were introduced in Milvus 2.4 + return; + } + + await Collection.CreateIndexAsync( + "float_vector", indexType, SimilarityMetricType.L2, + extraParams: JsonSerializer.Deserialize>(extraParamsString)); + await Collection.WaitForIndexBuildAsync("float_vector"); + } + [Theory] [InlineData(IndexType.BinFlat, """{ "n_trees": "10" }""")] [InlineData(IndexType.BinIvfFlat, """{ "n_trees": "8", "nlist": "8" }""")] diff --git a/Milvus.Client.Tests/Utils.cs b/Milvus.Client.Tests/Utils.cs new file mode 100644 index 0000000..4c1ee99 --- /dev/null +++ b/Milvus.Client.Tests/Utils.cs @@ -0,0 +1,22 @@ +namespace Milvus.Client.Tests; + +public static class Utils +{ + public static async Task GetParsedMilvusVersion(this MilvusClient client) + { + string version = await client.GetVersionAsync(); + + if (version.StartsWith("v", StringComparison.Ordinal)) + { + version = version[1..]; + } + + int dash = version.IndexOf('-'); + if (dash != -1) + { + version = version[..dash]; + } + + return Version.Parse(version); + } +} diff --git a/Milvus.Client/IndexType.cs b/Milvus.Client/IndexType.cs index 9bf09de..24d3cc6 100644 --- a/Milvus.Client/IndexType.cs +++ b/Milvus.Client/IndexType.cs @@ -104,6 +104,46 @@ public enum IndexType /// DiskANN, + /// + /// A graph-based index optimized for GPUs, GPU_CAGRA performs well on inference GPUs. It's best suited for + /// situations with a small number of queries, where training GPUs with lower memory frequency may not yield optimal + /// results. + /// + /// + /// + /// + GpuCagra, + + /// + /// This quantization-based index organizes vector data into clusters and employs product quantization for efficient + /// search. It is ideal for scenarios requiring fast queries and can manage limited memory resources while balancing + /// accuracy and speed.. + /// + /// + /// + /// + GpuIvfFlat, + + /// + /// This quantization-based index organizes vector data into clusters and employs product quantization for efficient + /// search. It is ideal for scenarios requiring fast queries and can manage limited memory resources while balancing + /// accuracy and speed.. + /// + /// + /// + /// + GpuIvfPq, + + /// + /// This index is tailored for cases where extremely high recall is crucial, guaranteeing a recall of 1 by comparing + /// each query with all vectors in the dataset. It only requires the metric type (metric_type) and top-k (limit) as + /// index building and search parameters. + /// + /// + /// + /// + GpuBruteForce, + /// /// ANNOY (Approximate Nearest Neighbors Oh Yeah) is an index that uses a hyperplane to divide a high-dimensional /// space into multiple subspaces, and then stores them in a tree structure. diff --git a/Milvus.Client/MilvusCollection.Index.cs b/Milvus.Client/MilvusCollection.Index.cs index 3a29b82..3ce276b 100644 --- a/Milvus.Client/MilvusCollection.Index.cs +++ b/Milvus.Client/MilvusCollection.Index.cs @@ -76,6 +76,12 @@ static string GetGrpcIndexType(IndexType indexType) IndexType.IvfSq8 => "IVF_SQ8", IndexType.Hnsw => "HNSW", IndexType.DiskANN => "DISKANN", + + IndexType.GpuCagra => "GPU_CAGRA", + IndexType.GpuIvfFlat => "GPU_IVF_FLAT", + IndexType.GpuIvfPq => "GPU_IVF_PQ", + IndexType.GpuBruteForce => "GPU_BRUTE_FORCE", + IndexType.RhnswFlat => "RHNSW_FLAT", IndexType.RhnswPq => "RHNSW_PQ", IndexType.RhnswSq => "RHNSW_SQ",