跳到主要内容

Dynamic Field

Collection schema 中定义的所有字段都必须包含在要插入的实体中。如果您希望某些字段是可选的,请考虑启用 dynamic field。本主题描述如何启用和使用 dynamic field。

概述

在 Milvus 中,您可以通过为 collection 中的每个字段设置名称和数据类型来创建 collection schema。当您向 schema 添加字段时,请确保此字段包含在您要插入的实体中。如果您希望某些字段是可选的,启用 dynamic field 是一个选择。

Dynamic field 是一个名为 $meta 的保留字段,其类型为 JavaScript Object Notation (JSON)。实体中未在 schema 中定义的任何字段都将作为键值对存储在此保留的 JSON 字段中。

对于启用了 dynamic field 的 collection,您可以使用 dynamic field 中的键进行标量过滤,就像使用在 schema 中显式定义的字段一样。

启用 dynamic field

在创建具有自定义设置的 collection 时,您可以手动启用 dynamic field。

from pymilvus import MilvusClient

client= MilvusClient(uri="http://localhost:19530")

client.create_collection(
collection_name="my_collection",
dimension=5,
enable_dynamic_field=True
)
import io.milvus.v2.client.ConnectConfig;
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.service.collection.request.CreateCollectionReq;

MilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()
.uri("http://localhost:19530")
.build());

CreateCollectionReq createCollectionReq = CreateCollectionReq.builder()
.collectionName("my_collection")
.dimension(5)
.enableDynamicField(true)
.build()
client.createCollection(createCollectionReq);
import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";

const client = new Client({
address: 'http://localhost:19530'
});

await client.createCollection({
collection_name: "customized_setup_2",
schema: schema,
enable_dynamic_field: true
});
import (
"context"
"fmt"

"github.com/milvus-io/milvus/client/v2/column"
"github.com/milvus-io/milvus/client/v2/entity"
"github.com/milvus-io/milvus/client/v2/index"
"github.com/milvus-io/milvus/client/v2/milvusclient"
)

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
Address: "localhost:19530",
})
if err != nil {
fmt.Println(err.Error())
// handle err
}
defer client.Close(ctx)

err = client.CreateCollection(ctx, milvusclient.SimpleCreateCollectionOptions("my_collection", 5).
WithAutoID(false).
WithDynamicSchema(true))
if err != nil {
fmt.Println(err.Error())
}
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
-d '{
"collectionName": "my_collection",
"dimension": 5,
"enableDynamicField": true
}'

使用 dynamic field

当您的 collection 中启用了 dynamic field 时,所有未在 schema 中定义的字段及其值都将作为键值对存储在 dynamic field 中。

例如,假设您的 collection schema 只定义了两个字段,名为 idvector,并启用了 dynamic field。现在,将以下数据集插入到此 collection 中。

[
{id: 0, vector: [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592], color: "pink_8682"},
{id: 1, vector: [0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104], color: "red_7025"},
{id: 2, vector: [0.43742130801983836, -0.5597502546264526, 0.6457887650909682, 0.7894058910881185, 0.20785793220625592], color: "orange_6781"},
{id: 3, vector: [0.3172005263489739, 0.9719044792798428, -0.36981146090600725, -0.4860894583077995, 0.95791889146345], color: "pink_9298"},
{id: 4, vector: [0.4452349528804562, -0.8757026943054742, 0.8220779437047674, 0.46406290649483184, 0.30337481143159106], color: "red_4794"},
{id: 5, vector: [0.985825131989184, -0.8144651566660419, 0.6299267002202009, 0.1206906911183383, -0.1446277761879955], color: "yellow_4222"},
{id: 6, vector: [0.8371977790571115, -0.015764369584852833, -0.31062937026679327, -0.562666951622192, -0.8984947637863987], color: "red_9392"},
{id: 7, vector: [-0.33445148015177995, -0.2567135004164067, 0.8987539745369246, 0.9402995886420709, 0.5378064918413052], color: "grey_8510"},
{id: 8, vector: [0.39524717779832685, 0.4000257286739164, -0.5890507376891594, -0.8650502298996872, -0.6140360785406336], color: "white_9381"},
{id: 9, vector: [0.5718280481994695, 0.24070317428066512, -0.3737913482606834, -0.06726932177492717, -0.6980531615588608], color: "purple_4976"}
]

上述数据集包含10个实体,每个实体都包含字段 idvectorcolor。这里,color 字段未在 schema 中定义。由于 collection 启用了 dynamic field,字段 color 将作为键值对存储在 dynamic field 中。

插入数据

以下代码演示了如何将此数据集插入到 collection 中。

data=[
{"id": 0, "vector": [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592], "color": "pink_8682"},
{"id": 1, "vector": [0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104], "color": "red_7025"},
{"id": 2, "vector": [0.43742130801983836, -0.5597502546264526, 0.6457887650909682, 0.7894058910881185, 0.20785793220625592], "color": "orange_6781"},
{"id": 3, "vector": [0.3172005263489739, 0.9719044792798428, -0.36981146090600725, -0.4860894583077995, 0.95791889146345], "color": "pink_9298"},
{"id": 4, "vector": [0.4452349528804562, -0.8757026943054742, 0.8220779437047674, 0.46406290649483184, 0.30337481143159106], "color": "red_4794"},
{"id": 5, "vector": [0.985825131989184, -0.8144651566660419, 0.6299267002202009, 0.1206906911183383, -0.1446277761879955], "color": "yellow_4222"},
{"id": 6, "vector": [0.8371977790571115, -0.015764369584852833, -0.31062937026679327, -0.562666951622192, -0.8984947637863987], "color": "red_9392"},
{"id": 7, "vector": [-0.33445148015177995, -0.2567135004164067, 0.8987539745369246, 0.9402995886420709, 0.5378064918413052], "color": "grey_8510"},
{"id": 8, "vector": [0.39524717779832685, 0.4000257286739164, -0.5890507376891594, -0.8650502298996872, -0.6140360785406336], "color": "white_9381"},
{"id": 9, "vector": [0.5718280481994695, 0.24070317428066512, -0.3737913482606834, -0.06726932177492717, -0.6980531615588608], "color": "purple_4976"}
]

res = client.insert(
collection_name="my_collection",
data=data
)

print(res)

# Output
# {'insert_count': 10, 'ids': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}
import com.google.gson.Gson;
import com.google.gson.JsonObject;

import io.milvus.v2.service.vector.request.InsertReq;
import io.milvus.v2.service.vector.response.InsertResp;

Gson gson = new Gson();
List<JsonObject> data = Arrays.asList(
gson.fromJson("{\"id\": 0, \"vector\": [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592], \"color\": \"pink_8682\"}", JsonObject.class),
gson.fromJson("{\"id\": 1, \"vector\": [0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104], \"color\": \"red_7025\"}", JsonObject.class),
gson.fromJson("{\"id\": 2, \"vector\": [0.43742130801983836, -0.5597502546264526, 0.6457887650909682, 0.7894058910881185, 0.20785793220625592], \"color\": \"orange_6781\"}", JsonObject.class),
gson.fromJson("{\"id\": 3, \"vector\": [0.3172005263489739, 0.9719044792798428, -0.36981146090600725, -0.4860894583077995, 0.95791889146345], \"color\": \"pink_9298\"}", JsonObject.class),
gson.fromJson("{\"id\": 4, \"vector\": [0.4452349528804562, -0.8757026943054742, 0.8220779437047674, 0.46406290649483184, 0.30337481143159106], \"color\": \"red_4794\"}", JsonObject.class),
gson.fromJson("{\"id\": 5, \"vector\": [0.985825131989184, -0.8144651566660419, 0.6299267002202009, 0.1206906911183383, -0.1446277761879955], \"color\": \"yellow_4222\"}", JsonObject.class),
gson.fromJson("{\"id\": 6, \"vector\": [0.8371977790571115, -0.015764369584852833, -0.31062937026679327, -0.562666951622192, -0.8984947637863987], \"color\": \"red_9392\"}", JsonObject.class),
gson.fromJson("{\"id\": 7, \"vector\": [-0.33445148015177995, -0.2567135004164067, 0.8987539745369246, 0.9402995886420709, 0.5378064918413052], \"color\": \"grey_8510\"}", JsonObject.class),
gson.fromJson("{\"id\": 8, \"vector\": [0.39524717779832685, 0.4000257286739164, -0.5890507376891594, -0.8650502298996872, -0.6140360785406336], \"color\": \"white_9381\"}", JsonObject.class),
gson.fromJson("{\"id\": 9, \"vector\": [0.5718280481994695, 0.24070317428066512, -0.3737913482606834, -0.06726932177492717, -0.6980531615588608], \"color\": \"purple_4976\"}", JsonObject.class)
);

InsertReq insertReq = InsertReq.builder()
.collectionName("my_collection")
.data(data)
.build();

InsertResp insertResp = client.insert(insertReq);
System.out.println(insertResp);

// Output:
//
// InsertResp(InsertCnt=10, primaryKeys=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
const { DataType } = require("@zilliz/milvus2-sdk-node")

// 3. Insert some data

const data = [
{id: 0, vector: [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592], color: "pink_8682"},
{id: 1, vector: [0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104], color: "red_7025"},
{id: 2, vector: [0.43742130801983836, -0.5597502546264526, 0.6457887650909682, 0.7894058910881185, 0.20785793220625592], color: "orange_6781"},
{id: 3, vector: [0.3172005263489739, 0.9719044792798428, -0.36981146090600725, -0.4860894583077995, 0.95791889146345], color: "pink_9298"},
{id: 4, vector: [0.4452349528804562, -0.8757026943054742, 0.8220779437047674, 0.46406290649483184, 0.30337481143159106], color: "red_4794"},
{id: 5, vector: [0.985825131989184, -0.8144651566660419, 0.6299267002202009, 0.1206906911183383, -0.1446277761879955], color: "yellow_4222"},
{id: 6, vector: [0.8371977790571115, -0.015764369584852833, -0.31062937026679327, -0.562666951622192, -0.8984947637863987], color: "red_9392"},
{id: 7, vector: [-0.33445148015177995, -0.2567135004164067, 0.8987539745369246, 0.9402995886420709, 0.5378064918413052], color: "grey_8510"},
{id: 8, vector: [0.39524717779832685, 0.4000257286739164, -0.5890507376891594, -0.8650502298996872, -0.6140360785406336], color: "white_9381"},
{id: 9, vector: [0.5718280481994695, 0.24070317428066512, -0.3737913482606834, -0.06726932177492717, -0.6980531615588608], color: "purple_4976"}
]

const res = await client.insert({
collection_name: "quick_setup",
data: data,
})

console.log(res.insert_cnt)

// Output
//
// 10
//
dynamicColumn := column.NewColumnString("color", []string{
"pink_8682", "red_7025", "orange_6781", "pink_9298", "red_4794", "yellow_4222", "red_9392", "grey_8510", "white_9381", "purple_4976",
})

_, err = client.Insert(ctx, milvusclient.NewColumnBasedInsertOption("my_collection").
WithInt64Column("id", []int64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}).
WithFloatVectorColumn("vector", 5, [][]float32{
{0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592},
{0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104},
{0.43742130801983836, -0.5597502546264526, 0.6457887650909682, 0.7894058910881185, 0.20785793220625592},
{0.3172005263489739, 0.9719044792798428, -0.36981146090600725, -0.4860894583077995, 0.95791889146345},
{0.4452349528804562, -0.8757026943054742, 0.8220779437047674, 0.46406290649483184, 0.30337481143159106},
{0.985825131989184, -0.8144651566660419, 0.6299267002202009, 0.1206906911183383, -0.1446277761879955},
{0.8371977790571115, -0.015764369584852833, -0.31062937026679327, -0.562666951622192, -0.8984947637863987},
{-0.33445148015177995, -0.2567135004164067, 0.8987539745369246, 0.9402995886420709, 0.5378064918413052},
{0.39524717779832685, 0.4000257286739164, -0.5890507376891594, -0.8650502298996872, -0.6140360785406336},
{0.5718280481994695, 0.24070317428066512, -0.3737913482606834, -0.06726932177492717, -0.6980531615588608},
}).
WithColumns(dynamicColumn),
)
if err != nil {
fmt.Println(err.Error())
// handle err
}
export CLUSTER_ENDPOINT="http://localhost:19530"
export TOKEN="root:Milvus"

curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
-d '{
"data": [
{"id": 0, "vector": [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592], "color": "pink_8682"},
{"id": 1, "vector": [0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104], "color": "red_7025"},
{"id": 2, "vector": [0.43742130801983836, -0.5597502546264526, 0.6457887650909682, 0.7894058910881185, 0.20785793220625592], "color": "orange_6781"},
{"id": 3, "vector": [0.3172005263489739, 0.9719044792798428, -0.36981146090600725, -0.4860894583077995, 0.95791889146345], "color": "pink_9298"},
{"id": 4, "vector": [0.4452349528804562, -0.8757026943054742, 0.8220779437047674, 0.46406290649483184, 0.30337481143159106], "color": "red_4794"},
{"id": 5, "vector": [0.985825131989184, -0.8144651566660419, 0.6299267002202009, 0.1206906911183383, -0.1446277761879955], "color": "yellow_4222"},
{"id": 6, "vector": [0.8371977790571115, -0.015764369584852833, -0.31062937026679327, -0.562666951622192, -0.8984947637863987], "color": "red_9392"},
{"id": 7, "vector": [-0.33445148015177995, -0.2567135004164067, 0.8987539745369246, 0.9402995886420709, 0.5378064918413052], "color": "grey_8510"},
{"id": 8, "vector": [0.39524717779832685, 0.4000257286739164, -0.5890507376891594, -0.8650502298996872, -0.6140360785406336], "color": "white_9381"},
{"id": 9, "vector": [0.5718280481994695, 0.24070317428066512, -0.3737913482606834, -0.06726932177492717, -0.6980531615588608], "color": "purple_4976"}
],
"collectionName": "my_collection"
}'

# {
# "code": 0,
# "data": {
# "insertCount": 10,
# "insertIds": [
# 0,
# 1,
# 2,
# 3,
# 4,
# 5,
# 6,
# 7,
# 8,
# 9
# ]
# }
# }

为 dynamic field 中的标量字段建立索引

当您启用 dynamic field 时,任何未定义的标量字段都将以 JSON 格式作为键值对存储。Milvus 支持为这样的未定义标量字段创建索引,实际上是通过构建 JSON 路径索引来实现的。以下是它的工作原理:

  1. 选择要索引的 dynamic field 键。例如,上面示例中的 "color"

  2. 为该键处的值决定转换类型。Milvus 将解析 dynamic field,提取指定键下的值,并将它们转换为您配置的类型。

    • 支持的 json_cast_type 值为 bool(或 BOOL)、double(或 DOUBLE)和 varchar(或 VARCHAR)。

    • 如果解析或转换失败(例如,尝试将字符串解析为 double),这些行将在索引中被跳过。

  3. 指定该键的 JSON 路径json_path。由于 dynamic field 存储为 JSON,您可以指定类似 "color" 的内容,或者如果您有嵌套结构,您可以指定更深的路径(例如 my_json["field"]["subfield"])。

  4. 创建 INVERTED 索引。目前,JSON 路径索引仅支持 INVERTED 类型。

有关参数和注意事项的详细信息,请参阅 为 JSON 字段建立索引

以下是如何为 "color" 字段创建索引的示例:

# Prepare index parameters
index_params = client.prepare_index_params()

index_params.add_index(
field_name="color", # Name of the "column" you see in queries (the dynamic key).
index_type="INVERTED", # Currently only "INVERTED" is supported for indexing JSON fields.
index_name="color_index", # Assign a name to this index.
params={
"json_path": "color", # JSON path to the key you want to index.
"json_cast_type": "varchar" # Type to which Milvus will cast the extracted values.
}
)

# Create the index
client.create_index(
collection_name="my_collection",
index_params=index_params
)
import io.milvus.v2.common.IndexParam;

List<IndexParam> indexes = new ArrayList<>();

Map<String,Object> extraParams = new HashMap<>();
extraParams.put("json_path", "color");
extraParams.put("json_cast_type", "varchar");
indexes.add(IndexParam.builder()
.fieldName("color")
.indexName("color_index")
.indexType(IndexParam.IndexType.INVERTED)
.extraParams(extraParams)
.build());

client.createIndex(CreateIndexReq.builder()
.collectionName("my_collection")
.indexParams(indexes)
.build());
indexTask, err := client.CreateIndex(ctx, milvusclient.NewCreateIndexOption("my_collection", "color",
index.NewJSONPathIndex(index.Inverted, "varchar", "color")))
if err != nil {
fmt.Println(err.Error())
// handle error
}

err = indexTask.Await(ctx)
if err != nil {
fmt.Println(err.Error())
// handler err
}
const index_params = {
field_name: "color", // Name of the "column" you see in queries (the dynamic key).
index_type: "INVERTED", // Currently only "INVERTED" is supported for indexing JSON fields.
index_name: "color_index", // Assign a name to this index.
params:{
"json_path": "color", // JSON path to the key you want to index.
"json_cast_type": "varchar" // Type to which Milvus will cast the extracted values.
}
}

// Create the index
await client.create_index({
collection_name: "my_collection",
index_params: index_params
});
# restful
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/indexes/create" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
-d '{
"collectionName": "my_collection",
"indexParams": [
{
"fieldName": "color",
"indexName": "color_index",
"indexType": "INVERTED",
"params": {
"json_path": "color",
"json_cast_type": "varchar"
}
}
]
}'

使用 dynamic field 查询和搜索

Milvus 支持在查询和搜索过程中使用过滤表达式,允许您指定要在结果中包含的字段。以下示例演示了如何通过使用 dynamic field 来使用未在 schema 中定义的 color 字段执行查询和搜索。

query_vector = [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]

res = client.search(
collection_name="my_collection",
data=[query_vector],
limit=5,
filter='color like "red%"',
output_fields=["color"]
)

print(res)

# Output
# data: ["[{'id': 1, 'distance': 0.6290165185928345, 'entity': {'color': 'red_7025'}}, {'id': 4, 'distance': 0.5975797176361084, 'entity': {'color': 'red_4794'}}, {'id': 6, 'distance': -0.24996188282966614, 'entity': {'color': 'red_9392'}}]"]

import io.milvus.v2.service.vector.request.SearchReq
import io.milvus.v2.service.vector.request.data.FloatVec;
import io.milvus.v2.service.vector.response.SearchResp

FloatVec queryVector = new FloatVec(new float[]{0.3580376395471989f, -0.6023495712049978f, 0.18414012509913835f, -0.26286205330961354f, 0.9029438446296592f});
SearchResp resp = client.search(SearchReq.builder()
.collectionName("my_collection")
.annsField("vector")
.data(Collections.singletonList(queryVector))
.outputFields(Collections.singletonList("color"))
.filter("color like \"red%\"")
.topK(5)
.consistencyLevel(ConsistencyLevel.STRONG)
.build());

System.out.println(resp.getSearchResults());

// Output
//
// [[
// SearchResp.SearchResult(entity={color=red_7025}, score=0.6290165, id=1),
// SearchResp.SearchResult(entity={color=red_4794}, score=0.5975797, id=4),
// SearchResp.SearchResult(entity={color=red_9392}, score=-0.24996188, id=6)
//]]

const query_vector = [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]

const res = await client.search({
collection_name: "quick_setup",
data: [query_vector],
limit: 5,
# highlight-start
filters: "color like \"red%\"",
output_fields: ["color"]
# highlight-end
});
queryVector := []float32{0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592}

resultSets, err := client.Search(ctx, milvusclient.NewSearchOption(
"my_collection", // collectionName
5, // limit
[]entity.Vector{entity.FloatVector(queryVector)},
).WithFilter("color like \"red%\"").
WithANNSField("vector").
WithOutputFields("color"))
if err != nil {
fmt.Println(err.Error())
// handle error
}

for _, resultSet := range resultSets {
fmt.Println("IDs: ", resultSet.IDs.FieldData().GetScalars())
fmt.Println("Scores: ", resultSet.Scores)
fmt.Println("color: ", resultSet.GetColumn("color").FieldData().GetScalars())
}
export CLUSTER_ENDPOINT="http://localhost:19530"
export TOKEN="root:Milvus"

curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
-d '{
"collectionName": "my_collection",
"data": [
[0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]
],
"annsField": "vector",
"filter": "color like \"red%\"",
"limit": 3,
"outputFields": ["color"]
}'
# {"code":0,"cost":0,"data":[{"color":"red_7025","distance":0.6290165,"id":1},{"color":"red_4794","distance":0.5975797,"id":4},{"color":"red_9392","distance":-0.24996185,"id":6}]}

在上面代码示例中使用的过滤表达式 color like "red%" and likes > 50 中,条件指定 color 字段的值必须以 "red" 开头。在示例数据中,只有两个实体满足此条件。因此,当 limit(topK)设置为 3 或更少时,这两个实体都将被返回。

[
{
"id": 1,
"distance": 0.6290165,
"entity": {
"color": "red_7025"
}
},
{
"id": 4,
"distance": 0.5975797,
"entity": {
"color": "red_4794"
}
},
{
"id": 6,
"distance": -0.24996188
"entity": {
"color": "red_9392"
}
},
]