Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Image search example #5

Merged
merged 12 commits into from
Jul 8, 2023
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
build/
.vscode
.cache/
*.swp
models/*.bin
12 changes: 10 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,21 @@ endif()

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(CLIP_STANDALONE ON)
else()
set(CLIP_STANDALONE OFF)
endif()

#
# Option list
#

# general
option(CLIP_STATIC "CLIP: static link libraries" OFF)
option(CLIP_NO_EXAMPLES "CLIP: do not build examples" OFF)
option(CLIP_NO_TESTS "CLIP: do not build tests" OFF)
option(CLIP_BUILD_TEST "CLIP: build tests" ${CLIP_STANDALONE})
option(CLIP_BUILD_EXAMPLES "CLIP: build examples" ${CLIP_STANDALONE})
option(CLIP_BUILD_IMAGE_SEARCH "CLIP: build image-search" OFF)
option(CLIP_NATIVE "CLIP: enable -march=native flag" ON)
option(CLIP_LTO "CLIP: enable link time optimization" OFF)

Expand Down Expand Up @@ -48,6 +55,7 @@ endif()
option(CLIP_ACCELERATE "CLIP: enable Accelerate framework" ON)
option(CLIP_OPENBLAS "CLIP: use OpenBLAS" OFF)


#
# Compile flags
#
Expand Down
5 changes: 4 additions & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@

add_library(common-clip STATIC common-clip.cpp)
target_link_libraries(common-clip PRIVATE ggml)
target_include_directories(common-clip PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

if (CLIP_BUILD_IMAGE_SEARCH)
add_subdirectory(./image-search)
endif()

add_executable(main main.cpp)
target_link_libraries(main PRIVATE clip common-clip ggml)

Expand Down
25 changes: 25 additions & 0 deletions examples/image-search/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
project(image-search)

set(CXX_STANDARD_REQUIRED ON)

include(FetchContent)
FetchContent_Declare(usearch
GIT_REPOSITORY https://github.com/unum-cloud/usearch.git
GIT_TAG v0.19.3
)
FetchContent_MakeAvailable(usearch)

add_executable(image-search-build
build.cpp
)

target_link_libraries(image-search-build PRIVATE clip ggml usearch)
target_compile_features(image-search-build PUBLIC cxx_std_17)

add_executable(image-search
search.cpp
)

target_link_libraries(image-search PRIVATE clip ggml usearch)
target_compile_features(image-search PUBLIC cxx_std_11)

56 changes: 56 additions & 0 deletions examples/image-search/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Image search

This example implements basic semantic image search using [usearch](https://github.com/unum-cloud/usearch) as a vector database for accelerated similarity search.

Use `image-search-build` to build the database of images and their embeddings beforehand. Currently it does not support updating.

Use `image-search` to search for indexed images by semantic similarity.

### examples

#### build db

help:
```sh
./image-search-build -h
Usage: ./image-search-build [options] dir/with/pictures [more/dirs]

Options: -h, --help: Show this message and exit
-m <path>, --model <path>: path to model. Default: ../models/ggml-model-f16.bin
-t N, --threads N: Number of threads to use for inference. Default: 4
-v <level>, --verbose <level>: Control the level of verbosity. 0 = minimum, 2 = maximum. Default: 1
```

creating db for `tests/`:
```sh
./image-search-build -m models/openai_clip-vit-base-patch32.ggmlv0.f16.bin ./tests/
```

#### search by text

help:
```sh
./image-search -h
Usage: ./image-search [options] <search string>

Options: -h, --help: Show this message and exit
-m <path>, --model <path>: overwrite path to model. Read from images.paths by default.
-t N, --threads N: Number of threads to use for inference. Default: 4
-v <level>, --verbose <level>: Control the level of verbosity. 0 = minimum, 2 = maximum. Default: 1
-n N, --results N: Number of results to display. Default: 5
```

searching for `apple` in the db in the current directory:
```sh
./image-search apple
clip_model_load: loading model from 'models/openai_clip-vit-base-patch32.ggmlv0.f16.bin' - please wait....................................................clip_model_load: model size = 288.93 MB / num tensors = 397
clip_model_load: model loaded

search results:
distance path
0.674587 /home/xxxx/tests/red_apple.jpg
0.785591 /home/xxxx/tests/white.jpg
```

note: lower score for search results is better as it indicates the distance, not the similarity.

167 changes: 167 additions & 0 deletions examples/image-search/build.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#include "clip.h"
#include "usearch/index.hpp"

#include <fstream>
#include <filesystem>

struct my_app_params {
int32_t n_threads {4};
std::string model {"../models/ggml-model-f16.bin"};
int32_t verbose {1};
std::vector<std::string> image_directories;
};

void my_print_help(int argc, char **argv, my_app_params &params) {
printf("Usage: %s [options] dir/with/pictures [more/dirs]\n", argv[0]);
printf("\nOptions:");
printf(" -h, --help: Show this message and exit\n");
printf(" -m <path>, --model <path>: path to model. Default: %s\n", params.model.c_str());
printf(" -t N, --threads N: Number of threads to use for inference. Default: %d\n", params.n_threads);
printf(" -v <level>, --verbose <level>: Control the level of verbosity. 0 = minimum, 2 = maximum. Default: %d\n", params.verbose);
}

// returns success
bool my_app_params_parse(int argc, char **argv, my_app_params &params) {
bool invalid_param = false;
for (int i = 1; i < argc; i++) {

std::string arg = argv[i];

if (arg == "-m" || arg == "--model") {
if (++i >= argc) {
invalid_param = true;
break;
}
params.model = argv[i];
} else if (arg == "-t" || arg == "--threads") {
if (++i >= argc) {
invalid_param = true;
break;
}
params.n_threads = std::stoi(argv[i]);
} else if (arg == "-v" || arg == "--verbose") {
if (++i >= argc) {
invalid_param = true;
break;
}
params.verbose = std::stoi(argv[i]);
} else if (arg == "-h" || arg == "--help") {
my_print_help(argc, argv, params);
exit(0);
} else if (arg.starts_with('-')) {
if (i != 0) {
printf("%s: unrecognized argument: %s\n", __func__, arg.c_str());
return false;
}
} else {
// assume image directory
params.image_directories.push_back(argv[i]);
}
}

return !(invalid_param || params.image_directories.empty());
}

bool is_image_file_extension(std::string_view ext) {
if (ext == ".jpg") return true;
if (ext == ".JPG") return true;

if (ext == ".jpeg") return true;
if (ext == ".JPEG") return true;

if (ext == ".gif") return true;
if (ext == ".GIF") return true;

if (ext == ".png") return true;
if (ext == ".PNG") return true;

// TODO(green-sky): determine if we should add more formats from stbi. tga/hdr/pnm seem kinda niche.

return false;
}

int main(int argc, char** argv) {
my_app_params params;
if (!my_app_params_parse(argc, argv, params)) {
my_print_help(argc, argv, params);
return 1;
}

auto clip_ctx = clip_model_load(params.model.c_str(), params.verbose);
if (!clip_ctx) {
printf("%s: Unable to load model from %s\n", __func__, params.model.c_str());
return 1;
}

std::vector<std::string> image_file_index;
unum::usearch::index_gt<unum::usearch::cos_gt<float>> embd_index;

const size_t vec_dim = clip_ctx->vision_model.hparams.projection_dim;

size_t label = 0;

std::vector<float> vec(vec_dim);

// search for images in path and write embedding to database
for (const auto& base_dir : params.image_directories) {
fprintf(stdout, "%s: starting base dir scan of '%s'\n", __func__, base_dir.c_str());

for (auto const& dir_entry : std::filesystem::recursive_directory_iterator(base_dir)) {
if (!dir_entry.is_regular_file()) {
continue;
}

// check for image file
const auto& ext = dir_entry.path().extension();
if (ext.empty()) {
continue;
}
if (!is_image_file_extension(ext.c_str())) {
continue;
}

std::string img_path {dir_entry.path()};
if (params.verbose >= 1) {
fprintf(stdout, "%s: found image file '%s'\n", __func__, img_path.c_str());
}

clip_image_u8 img0;
if (!clip_image_load_from_file(img_path, img0)) {
fprintf(stderr, "%s: failed to load image from '%s'\n", __func__, img_path.c_str());
continue;
}

clip_image_f32 img_res;
clip_image_preprocess(clip_ctx, &img0, &img_res);

if (!clip_image_encode(clip_ctx, params.n_threads, img_res, vec.data())) {
fprintf(stderr, "%s: failed to encode image from '%s'\n", __func__, img_path.c_str());
continue;
}

if (embd_index.capacity() == embd_index.size()) {
embd_index.reserve(embd_index.size() + 32);
}

// add the image to the database
embd_index.add(label++, {vec.data(), vec.size()});
image_file_index.push_back(std::filesystem::canonical(dir_entry.path()));
}
}

clip_free(clip_ctx);

// save to disk

embd_index.save("images.usearch");

std::ofstream image_file_index_file("images.paths", std::ios::binary | std::ios::trunc);
// first line is model
image_file_index_file << params.model << "\n";
for (const auto& i_path : image_file_index) {
image_file_index_file << i_path << "\n";
}

return 0;
}

Loading