monatis · monatis · Jul 8, 2023 · Jun 17, 2023 · Jun 17, 2023 · Jun 18, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,5 @@
 build/
 .vscode
+.cache/
+*.swp
+models/*.bin
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -10,14 +10,21 @@ endif()
 
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 
+if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+    set(CLIP_STANDALONE ON)
+else()
+    set(CLIP_STANDALONE OFF)
+endif()
+
 #
 # Option list
 #
 
 # general
 option(CLIP_STATIC                 "CLIP: static link libraries"                          OFF)
-option(CLIP_NO_EXAMPLES                 "CLIP: do not build examples"                          OFF)
-option(CLIP_NO_TESTS                 "CLIP: do not build tests"                          OFF)
+option(CLIP_BUILD_TEST             "CLIP: build tests"                                    ${CLIP_STANDALONE})
+option(CLIP_BUILD_EXAMPLES         "CLIP: build examples"                                 ${CLIP_STANDALONE})
+option(CLIP_BUILD_IMAGE_SEARCH     "CLIP: build image-search"                             OFF)
 option(CLIP_NATIVE                 "CLIP: enable -march=native flag"                      ON)
 option(CLIP_LTO                    "CLIP: enable link time optimization"                  OFF)
 
@@ -48,6 +55,7 @@ endif()
 option(CLIP_ACCELERATE             "CLIP: enable Accelerate framework"                    ON)
 option(CLIP_OPENBLAS               "CLIP: use OpenBLAS"                                   OFF)
 
+
 #
 # Compile flags
 #

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -1,8 +1,11 @@
-
 add_library(common-clip STATIC common-clip.cpp)
 target_link_libraries(common-clip PRIVATE ggml)
 target_include_directories(common-clip PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 
+if (CLIP_BUILD_IMAGE_SEARCH)
+    add_subdirectory(./image-search)
+endif()
+
 add_executable(main main.cpp)
 target_link_libraries(main PRIVATE clip common-clip ggml)
 

diff --git a/examples/image-search/CMakeLists.txt b/examples/image-search/CMakeLists.txt
@@ -0,0 +1,25 @@
+project(image-search)
+
+set(CXX_STANDARD_REQUIRED ON)
+
+include(FetchContent)
+FetchContent_Declare(usearch
+    GIT_REPOSITORY https://github.com/unum-cloud/usearch.git
+    GIT_TAG v0.19.3
+)
+FetchContent_MakeAvailable(usearch)
+
+add_executable(image-search-build
+    build.cpp
+)
+
+target_link_libraries(image-search-build PRIVATE clip ggml usearch)
+target_compile_features(image-search-build PUBLIC cxx_std_17)
+
+add_executable(image-search
+    search.cpp
+)
+
+target_link_libraries(image-search PRIVATE clip ggml usearch)
+target_compile_features(image-search PUBLIC cxx_std_11)
+
diff --git a/examples/image-search/README.md b/examples/image-search/README.md
@@ -0,0 +1,56 @@
+# Image search
+
+This example implements basic semantic image search using [usearch](https://github.com/unum-cloud/usearch) as a vector database for accelerated similarity search.
+
+Use `image-search-build` to build the database of images and their embeddings beforehand. Currently it does not support updating.
+
+Use `image-search` to search for indexed images by semantic similarity.
+
+### examples
+
+#### build db
+
+help:
+```sh
+./image-search-build -h
+Usage: ./image-search-build [options] dir/with/pictures [more/dirs]
+
+Options:  -h, --help: Show this message and exit
+  -m <path>, --model <path>: path to model. Default: ../models/ggml-model-f16.bin
+  -t N, --threads N: Number of threads to use for inference. Default: 4
+  -v <level>, --verbose <level>: Control the level of verbosity. 0 = minimum, 2 = maximum. Default: 1
+```
+
+creating db for `tests/`:
+```sh
+./image-search-build -m models/openai_clip-vit-base-patch32.ggmlv0.f16.bin ./tests/
+```
+
+#### search by text
+
+help:
+```sh
+./image-search -h
+Usage: ./image-search [options] <search string>
+
+Options:  -h, --help: Show this message and exit
+  -m <path>, --model <path>: overwrite path to model. Read from images.paths by default.
+  -t N, --threads N: Number of threads to use for inference. Default: 4
+  -v <level>, --verbose <level>: Control the level of verbosity. 0 = minimum, 2 = maximum. Default: 1
+  -n N, --results N: Number of results to display. Default: 5
+```
+
+searching for `apple` in the db in the current directory:
+```sh
+./image-search apple
+clip_model_load: loading model from 'models/openai_clip-vit-base-patch32.ggmlv0.f16.bin' - please wait....................................................clip_model_load: model size =   288.93 MB / num tensors = 397
+clip_model_load: model loaded
+
+search results:
+distance path
+  0.674587 /home/xxxx/tests/red_apple.jpg
+  0.785591 /home/xxxx/tests/white.jpg
+```
+
+note: lower score for search results is better as it indicates the distance, not the similarity.
+
diff --git a/examples/image-search/build.cpp b/examples/image-search/build.cpp
@@ -0,0 +1,167 @@
+#include "clip.h"
+#include "usearch/index.hpp"
+
+#include <fstream>
+#include <filesystem>
+
+struct my_app_params {
+    int32_t n_threads {4};
+    std::string model {"../models/ggml-model-f16.bin"};
+    int32_t verbose {1};
+    std::vector<std::string> image_directories;
+};
+
+void my_print_help(int argc, char **argv, my_app_params &params) {
+    printf("Usage: %s [options] dir/with/pictures [more/dirs]\n", argv[0]);
+    printf("\nOptions:");
+    printf("  -h, --help: Show this message and exit\n");
+    printf("  -m <path>, --model <path>: path to model. Default: %s\n", params.model.c_str());
+    printf("  -t N, --threads N: Number of threads to use for inference. Default: %d\n", params.n_threads);
+    printf("  -v <level>, --verbose <level>: Control the level of verbosity. 0 = minimum, 2 = maximum. Default: %d\n", params.verbose);
+}
+
+// returns success
+bool my_app_params_parse(int argc, char **argv, my_app_params &params) {
+    bool invalid_param = false;
+    for (int i = 1; i < argc; i++) {
+
+        std::string arg = argv[i];
+
+        if (arg == "-m" || arg == "--model") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.model = argv[i];
+        } else if (arg == "-t" || arg == "--threads") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.n_threads = std::stoi(argv[i]);
+        } else if (arg == "-v" || arg == "--verbose") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.verbose = std::stoi(argv[i]);
+        } else if (arg == "-h" || arg == "--help") {
+            my_print_help(argc, argv, params);
+            exit(0);
+        } else if (arg.starts_with('-')) {
+            if (i != 0) {
+                printf("%s: unrecognized argument: %s\n", __func__, arg.c_str());
+                return false;
+            }
+        } else {
+            // assume image directory
+            params.image_directories.push_back(argv[i]);
+        }
+    }
+
+    return !(invalid_param || params.image_directories.empty());
+}
+
+bool is_image_file_extension(std::string_view ext) {
+    if (ext == ".jpg") return true;
+    if (ext == ".JPG") return true;
+
+    if (ext == ".jpeg") return true;
+    if (ext == ".JPEG") return true;
+
+    if (ext == ".gif") return true;
+    if (ext == ".GIF") return true;
+
+    if (ext == ".png") return true;
+    if (ext == ".PNG") return true;
+
+    // TODO(green-sky): determine if we should add more formats from stbi. tga/hdr/pnm seem kinda niche.
+
+    return false;
+}
+
+int main(int argc, char** argv) {
+    my_app_params params;
+    if (!my_app_params_parse(argc, argv, params)) {
+        my_print_help(argc, argv, params);
+        return 1;
+    }
+
+    auto clip_ctx = clip_model_load(params.model.c_str(), params.verbose);
+    if (!clip_ctx) {
+        printf("%s: Unable  to load model from %s\n", __func__, params.model.c_str());
+        return 1;
+    }
+
+    std::vector<std::string> image_file_index;
+    unum::usearch::index_gt<unum::usearch::cos_gt<float>> embd_index;
+
+    const size_t vec_dim = clip_ctx->vision_model.hparams.projection_dim;
+
+    size_t label = 0;
+
+    std::vector<float> vec(vec_dim);
+
+    // search for images in path and write embedding to database
+    for (const auto& base_dir : params.image_directories) {
+        fprintf(stdout, "%s: starting base dir scan of '%s'\n", __func__, base_dir.c_str());
+
+        for (auto const& dir_entry : std::filesystem::recursive_directory_iterator(base_dir)) {
+            if (!dir_entry.is_regular_file()) {
+                continue;
+            }
+
+            // check for image file
+            const auto& ext = dir_entry.path().extension();
+            if (ext.empty()) {
+                continue;
+            }
+            if (!is_image_file_extension(ext.c_str())) {
+                continue;
+            }
+
+            std::string img_path {dir_entry.path()};
+            if (params.verbose >= 1) {
+                fprintf(stdout, "%s: found image file '%s'\n", __func__, img_path.c_str());
+            }
+
+            clip_image_u8 img0;
+            if (!clip_image_load_from_file(img_path, img0)) {
+                fprintf(stderr, "%s: failed to load image from '%s'\n", __func__, img_path.c_str());
+                continue;
+            }
+
+            clip_image_f32 img_res;
+            clip_image_preprocess(clip_ctx, &img0, &img_res);
+
+            if (!clip_image_encode(clip_ctx, params.n_threads, img_res, vec.data())) {
+                fprintf(stderr, "%s: failed to encode image from '%s'\n", __func__, img_path.c_str());
+                continue;
+            }
+
+            if (embd_index.capacity() == embd_index.size()) {
+                embd_index.reserve(embd_index.size() + 32);
+            }
+
+            // add the image to the database
+            embd_index.add(label++, {vec.data(), vec.size()});
+            image_file_index.push_back(std::filesystem::canonical(dir_entry.path()));
+        }
+    }
+
+    clip_free(clip_ctx);
+
+    // save to disk
+
+    embd_index.save("images.usearch");
+
+    std::ofstream image_file_index_file("images.paths", std::ios::binary | std::ios::trunc);
+    // first line is model
+    image_file_index_file << params.model << "\n";
+    for (const auto& i_path : image_file_index) {
+        image_file_index_file << i_path << "\n";
+    }
+
+    return 0;
+}
+