-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Vectorsearch Efficient Filters and Post-Search Filters (#364)
* Efficient filters Signed-off-by: Finn Roblin <finnrobl@amazon.com> * Add post (post_filter, bool) filters and update README) Signed-off-by: Finn Roblin <finnrobl@amazon.com> * Add script score workload + explicitly specify attributes in index mapping Signed-off-by: Finn Roblin <finnrobl@amazon.com> * Update README Signed-off-by: Finn Roblin <finnrobl@amazon.com> --------- Signed-off-by: Finn Roblin <finnrobl@amazon.com>
- Loading branch information
1 parent
274d84d
commit 3798b08
Showing
17 changed files
with
923 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
{ | ||
"settings": { | ||
"index": { | ||
"knn": true | ||
{%- if target_index_primary_shards is defined and target_index_primary_shards %} | ||
,"number_of_shards": {{ target_index_primary_shards }} | ||
{%- endif %} | ||
{%- if target_index_replica_shards is defined %} | ||
,"number_of_replicas": {{ target_index_replica_shards }} | ||
{%- endif %} | ||
} | ||
}, | ||
"mappings": { | ||
"dynamic": "strict", | ||
"properties": { | ||
{% if id_field_name is defined and id_field_name != "_id" %} | ||
"{{id_field_name}}": { | ||
"type": "keyword" | ||
}, | ||
{%- endif %} | ||
"target_field": { | ||
"type": "knn_vector", | ||
"dimension": {{ target_index_dimension }}, | ||
{%- if train_model_id is defined %} | ||
"model_id": "{{ train_model_id }}" | ||
{%- else %} | ||
"method": { | ||
"name": "hnsw", | ||
"space_type": "{{ target_index_space_type }}", | ||
"engine": "faiss", | ||
"parameters": { | ||
{%- if hnsw_ef_search is defined and hnsw_ef_search %} | ||
"ef_search": {{ hnsw_ef_search }} | ||
{%- endif %} | ||
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %} | ||
{%- if hnsw_ef_search is defined and hnsw_ef_search %} | ||
, | ||
{%- endif %} | ||
"ef_construction": {{ hnsw_ef_construction }} | ||
{%- endif %} | ||
{%- if hnsw_m is defined and hnsw_m %} | ||
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %} | ||
, | ||
{%- endif %} | ||
"m": {{ hnsw_m }} | ||
{%- endif %} | ||
} | ||
} | ||
{%- endif %} | ||
}, | ||
"color": { | ||
"type": "text" | ||
}, | ||
"taste": { | ||
"type": "text" | ||
}, | ||
"age": { | ||
"type": "integer" | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
{ | ||
"settings": { | ||
"index": { | ||
"knn": true | ||
{%- if target_index_primary_shards is defined and target_index_primary_shards %} | ||
,"number_of_shards": {{ target_index_primary_shards }} | ||
{%- endif %} | ||
{%- if target_index_replica_shards is defined %} | ||
,"number_of_replicas": {{ target_index_replica_shards }} | ||
{%- endif %} | ||
{%- if hnsw_ef_search is defined and hnsw_ef_search %} | ||
,"knn.algo_param.ef_search": {{ hnsw_ef_search }} | ||
{%- endif %} | ||
} | ||
}, | ||
"mappings": { | ||
"dynamic": "strict", | ||
"properties": { | ||
{% if id_field_name is defined and id_field_name != "_id" %} | ||
"{{id_field_name}}": { | ||
"type": "keyword" | ||
}, | ||
{%- endif %} | ||
"target_field": { | ||
"type": "knn_vector", | ||
"dimension": {{ target_index_dimension }}, | ||
"method": { | ||
"name": "hnsw", | ||
"space_type": "{{ target_index_space_type }}", | ||
"engine": "lucene", | ||
"parameters": { | ||
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %} | ||
"ef_construction": {{ hnsw_ef_construction }} | ||
{%- endif %} | ||
{%- if hnsw_m is defined and hnsw_m %} | ||
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %} | ||
, | ||
{%- endif %} | ||
"m": {{ hnsw_m }} | ||
{%- endif %} | ||
} | ||
} | ||
}, | ||
"color": { | ||
"type": "text" | ||
}, | ||
"taste": { | ||
"type": "text" | ||
}, | ||
"age": { | ||
"type": "integer" | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
{ | ||
"settings": { | ||
"index": { | ||
"knn": true | ||
{%- if target_index_primary_shards is defined and target_index_primary_shards %} | ||
,"number_of_shards": {{ target_index_primary_shards }} | ||
{%- endif %} | ||
{%- if target_index_replica_shards is defined %} | ||
,"number_of_replicas": {{ target_index_replica_shards }} | ||
{%- endif %} | ||
{%- if hnsw_ef_search is defined and hnsw_ef_search %} | ||
,"knn.algo_param.ef_search": {{ hnsw_ef_search }} | ||
{%- endif %} | ||
} | ||
}, | ||
"mappings": { | ||
"dynamic": "strict", | ||
"properties": { | ||
{% if id_field_name is defined and id_field_name != "_id" %} | ||
"{{id_field_name}}": { | ||
"type": "keyword" | ||
}, | ||
{%- endif %} | ||
"target_field": { | ||
"type": "knn_vector", | ||
"dimension": {{ target_index_dimension }}, | ||
"method": { | ||
"name": "hnsw", | ||
"space_type": "{{ target_index_space_type }}", | ||
"engine": "nmslib", | ||
"parameters": { | ||
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %} | ||
"ef_construction": {{ hnsw_ef_construction }} | ||
{%- endif %} | ||
{%- if hnsw_m is defined and hnsw_m %} | ||
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %} | ||
, | ||
{%- endif %} | ||
"m": {{ hnsw_m }} | ||
{%- endif %} | ||
} | ||
} | ||
}, | ||
"color": { | ||
"type": "text" | ||
}, | ||
"taste": { | ||
"type": "text" | ||
}, | ||
"age": { | ||
"type": "integer" | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
{ | ||
"settings": { | ||
"index": { | ||
{%- if target_index_primary_shards is defined and target_index_primary_shards %} | ||
"number_of_shards": {{ target_index_primary_shards }} | ||
{%- endif %} | ||
{%- if target_index_replica_shards is defined %} | ||
,"number_of_replicas": {{ target_index_replica_shards }} | ||
{%- endif %} | ||
} | ||
}, | ||
"mappings": { | ||
"dynamic": "strict", | ||
"properties": { | ||
{% if id_field_name is defined and id_field_name != "_id" %} | ||
"{{id_field_name}}": { | ||
"type": "keyword" | ||
}, | ||
{%- endif %} | ||
"target_field": { | ||
"type": "knn_vector", | ||
"dimension": {{ target_index_dimension }} | ||
}, | ||
"color": { | ||
"type": "text" | ||
}, | ||
"taste": { | ||
"type": "text" | ||
}, | ||
"age": { | ||
"type": "integer" | ||
} | ||
} | ||
} | ||
} |
76 changes: 76 additions & 0 deletions
76
vectorsearch/params/filters/efficient/faiss-hnsw-relaxed.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
{ | ||
"target_index_name": "target_index", | ||
"target_field_name": "target_field", | ||
"target_index_body": "indices/filters/faiss-index-attributes.json", | ||
|
||
"target_index_primary_shards": 1, | ||
"target_index_dimension": 128, | ||
"target_index_space_type": "l2", | ||
|
||
|
||
"target_index_bulk_size": 100, | ||
"target_index_bulk_index_data_set_format": "hdf5", | ||
"target_index_bulk_index_data_set_path": "/tmp/filter_relaxed.hdf5", | ||
"target_index_bulk_indexing_clients": 10, | ||
"target_dataset_filter_attributes": ["color", "taste", "age"], | ||
|
||
"target_index_max_num_segments": 1, | ||
"target_index_force_merge_timeout": 300, | ||
"hnsw_ef_search": 100, | ||
"hnsw_ef_construction": 100, | ||
|
||
"query_k": 100, | ||
"query_body": { | ||
"docvalue_fields" : ["_id"], | ||
"stored_fields" : "_none_" | ||
}, | ||
"filter_type": "efficient", | ||
"filter_body": { | ||
"bool": | ||
{ | ||
"should": | ||
[ | ||
{ | ||
"range": | ||
{ | ||
"age": | ||
{ | ||
"gte": 30, | ||
"lte": 70 | ||
} | ||
} | ||
}, | ||
{ | ||
"term": | ||
{ | ||
"color": "green" | ||
} | ||
}, | ||
{ | ||
"term": | ||
{ | ||
"color": "blue" | ||
} | ||
}, | ||
{ | ||
"term": | ||
{ | ||
"color": "yellow" | ||
} | ||
}, | ||
{ | ||
"term": | ||
{ | ||
"taste": "sweet" | ||
} | ||
} | ||
] | ||
} | ||
}, | ||
|
||
|
||
|
||
"query_data_set_format": "hdf5", | ||
"query_data_set_path":"/tmp/filter_relaxed.hdf5", | ||
"query_count": 100 | ||
} |
78 changes: 78 additions & 0 deletions
78
vectorsearch/params/filters/efficient/faiss-hnsw-restrictive.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
{ | ||
"target_index_name": "target_index", | ||
"target_field_name": "target_field", | ||
"target_index_body": "indices/filters/faiss-index-attributes.json", | ||
|
||
"target_index_primary_shards": 1, | ||
"target_index_dimension": 128, | ||
"target_index_space_type": "l2", | ||
|
||
|
||
"target_index_bulk_size": 100, | ||
"target_index_bulk_index_data_set_format": "hdf5", | ||
"target_index_bulk_index_data_set_path": "/tmp/filter_restrictive.hdf5", | ||
"target_index_bulk_indexing_clients": 10, | ||
"target_dataset_filter_attributes": ["color", "taste", "age"], | ||
|
||
"target_index_max_num_segments": 1, | ||
"target_index_force_merge_timeout": 300, | ||
"hnsw_ef_search": 100, | ||
"hnsw_ef_construction": 100, | ||
|
||
"query_k": 100, | ||
"query_body": { | ||
"docvalue_fields" : ["_id"], | ||
"stored_fields" : "_none_" | ||
}, | ||
"filter_type": "efficient", | ||
"filter_body": { | ||
"bool": | ||
{ | ||
"must": | ||
[ | ||
{ | ||
"range": | ||
{ | ||
"age": | ||
{ | ||
"gte": 30, | ||
"lte": 60 | ||
} | ||
} | ||
}, | ||
{ | ||
"term": | ||
{ | ||
"taste": "bitter" | ||
} | ||
}, | ||
{ | ||
"bool": | ||
{ | ||
"should": | ||
[ | ||
{ | ||
"term": | ||
{ | ||
"color": "blue" | ||
} | ||
}, | ||
{ | ||
"term": | ||
{ | ||
"color": "green" | ||
} | ||
} | ||
] | ||
} | ||
} | ||
] | ||
} | ||
}, | ||
|
||
|
||
|
||
"query_data_set_format": "hdf5", | ||
"query_data_set_path":"/tmp/filter_restrictive.hdf5", | ||
"query_count": 100 | ||
} |
Oops, something went wrong.