Skip to content

Commit

Permalink
Vectorsearch Efficient Filters and Post-Search Filters (#364)
Browse files Browse the repository at this point in the history
* Efficient filters

Signed-off-by: Finn Roblin <finnrobl@amazon.com>

* Add post (post_filter, bool) filters and update README)

Signed-off-by: Finn Roblin <finnrobl@amazon.com>

* Add script score workload + explicitly specify attributes in index mapping

Signed-off-by: Finn Roblin <finnrobl@amazon.com>

* Update README

Signed-off-by: Finn Roblin <finnrobl@amazon.com>

---------

Signed-off-by: Finn Roblin <finnrobl@amazon.com>
  • Loading branch information
finnroblin authored Sep 3, 2024
1 parent 274d84d commit 3798b08
Show file tree
Hide file tree
Showing 17 changed files with 923 additions and 4 deletions.
1 change: 1 addition & 0 deletions vectorsearch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ This workload allows the following parameters to be specified using `--workload-
| query_count | Number of queries for search operation |
| query_body | Json properties that will be merged with search body |
| search_clients | Number of clients to use for running queries |
| target_dataset_filter_attributes | Used in filter benchmarks. List of names of attribute fields in a dataset. |

#### Sample Outputs

Expand Down
62 changes: 62 additions & 0 deletions vectorsearch/indices/filters/faiss-index-attributes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"settings": {
"index": {
"knn": true
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
,"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
"target_field": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }},
{%- if train_model_id is defined %}
"model_id": "{{ train_model_id }}"
{%- else %}
"method": {
"name": "hnsw",
"space_type": "{{ target_index_space_type }}",
"engine": "faiss",
"parameters": {
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
"ef_search": {{ hnsw_ef_search }}
{%- endif %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
,
{%- endif %}
"ef_construction": {{ hnsw_ef_construction }}
{%- endif %}
{%- if hnsw_m is defined and hnsw_m %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
,
{%- endif %}
"m": {{ hnsw_m }}
{%- endif %}
}
}
{%- endif %}
},
"color": {
"type": "text"
},
"taste": {
"type": "text"
},
"age": {
"type": "integer"
}
}
}
}
55 changes: 55 additions & 0 deletions vectorsearch/indices/filters/lucene-index-attributes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"settings": {
"index": {
"knn": true
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
,"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
,"knn.algo_param.ef_search": {{ hnsw_ef_search }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
"target_field": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }},
"method": {
"name": "hnsw",
"space_type": "{{ target_index_space_type }}",
"engine": "lucene",
"parameters": {
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
"ef_construction": {{ hnsw_ef_construction }}
{%- endif %}
{%- if hnsw_m is defined and hnsw_m %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
,
{%- endif %}
"m": {{ hnsw_m }}
{%- endif %}
}
}
},
"color": {
"type": "text"
},
"taste": {
"type": "text"
},
"age": {
"type": "integer"
}
}
}
}
55 changes: 55 additions & 0 deletions vectorsearch/indices/filters/nmslib-index-attributes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"settings": {
"index": {
"knn": true
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
,"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
,"knn.algo_param.ef_search": {{ hnsw_ef_search }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
"target_field": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }},
"method": {
"name": "hnsw",
"space_type": "{{ target_index_space_type }}",
"engine": "nmslib",
"parameters": {
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
"ef_construction": {{ hnsw_ef_construction }}
{%- endif %}
{%- if hnsw_m is defined and hnsw_m %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
,
{%- endif %}
"m": {{ hnsw_m }}
{%- endif %}
}
}
},
"color": {
"type": "text"
},
"taste": {
"type": "text"
},
"age": {
"type": "integer"
}
}
}
}
35 changes: 35 additions & 0 deletions vectorsearch/indices/filters/script-index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"settings": {
"index": {
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
"target_field": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }}
},
"color": {
"type": "text"
},
"taste": {
"type": "text"
},
"age": {
"type": "integer"
}
}
}
}
76 changes: 76 additions & 0 deletions vectorsearch/params/filters/efficient/faiss-hnsw-relaxed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"target_index_name": "target_index",
"target_field_name": "target_field",
"target_index_body": "indices/filters/faiss-index-attributes.json",

"target_index_primary_shards": 1,
"target_index_dimension": 128,
"target_index_space_type": "l2",


"target_index_bulk_size": 100,
"target_index_bulk_index_data_set_format": "hdf5",
"target_index_bulk_index_data_set_path": "/tmp/filter_relaxed.hdf5",
"target_index_bulk_indexing_clients": 10,
"target_dataset_filter_attributes": ["color", "taste", "age"],

"target_index_max_num_segments": 1,
"target_index_force_merge_timeout": 300,
"hnsw_ef_search": 100,
"hnsw_ef_construction": 100,

"query_k": 100,
"query_body": {
"docvalue_fields" : ["_id"],
"stored_fields" : "_none_"
},
"filter_type": "efficient",
"filter_body": {
"bool":
{
"should":
[
{
"range":
{
"age":
{
"gte": 30,
"lte": 70
}
}
},
{
"term":
{
"color": "green"
}
},
{
"term":
{
"color": "blue"
}
},
{
"term":
{
"color": "yellow"
}
},
{
"term":
{
"taste": "sweet"
}
}
]
}
},



"query_data_set_format": "hdf5",
"query_data_set_path":"/tmp/filter_relaxed.hdf5",
"query_count": 100
}
78 changes: 78 additions & 0 deletions vectorsearch/params/filters/efficient/faiss-hnsw-restrictive.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
{
"target_index_name": "target_index",
"target_field_name": "target_field",
"target_index_body": "indices/filters/faiss-index-attributes.json",

"target_index_primary_shards": 1,
"target_index_dimension": 128,
"target_index_space_type": "l2",


"target_index_bulk_size": 100,
"target_index_bulk_index_data_set_format": "hdf5",
"target_index_bulk_index_data_set_path": "/tmp/filter_restrictive.hdf5",
"target_index_bulk_indexing_clients": 10,
"target_dataset_filter_attributes": ["color", "taste", "age"],

"target_index_max_num_segments": 1,
"target_index_force_merge_timeout": 300,
"hnsw_ef_search": 100,
"hnsw_ef_construction": 100,

"query_k": 100,
"query_body": {
"docvalue_fields" : ["_id"],
"stored_fields" : "_none_"
},
"filter_type": "efficient",
"filter_body": {
"bool":
{
"must":
[
{
"range":
{
"age":
{
"gte": 30,
"lte": 60
}
}
},
{
"term":
{
"taste": "bitter"
}
},
{
"bool":
{
"should":
[
{
"term":
{
"color": "blue"
}
},
{
"term":
{
"color": "green"
}
}
]
}
}
]
}
},



"query_data_set_format": "hdf5",
"query_data_set_path":"/tmp/filter_restrictive.hdf5",
"query_count": 100
}
Loading

0 comments on commit 3798b08

Please sign in to comment.