diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 79fded4..4f27edf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,13 +5,13 @@ Unoptimized: ```sh -cargo build +cargo build --release -p netsaur ``` Optimized: ```sh -cargo build --release +deno run build:cpu ``` ## Building `backends/wasm` @@ -19,11 +19,33 @@ cargo build --release Unoptimized: ```sh -deno run -A https://deno.land/x/wasmbuild@0.11.0/main.ts --out src/backends/wasm/lib --debug +deno -Ar jsr:@deno/wasmbuild@0.17.2 -p netsaur --out src/backends/wasm/lib --debug ``` Optimized: ```sh -deno run -A https://deno.land/x/wasmbuild@0.11.0/main.ts --out src/backends/wasm/lib +deno run build:wasm +``` + +## Building `tokenizers` + +Unoptimized: + +```sh +deno -Ar jsr:@deno/wasmbuild@0.17.2 -p netsaur-tokenizers --out tokenizers/lib --debug +``` + +Optimized: + +```sh +deno run build:tokenizers +``` + +## Building everything + +Optimized: + +```sh +deno run build ``` diff --git a/Cargo.lock b/Cargo.lock index 042786f..3abdc93 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,9 +91,9 @@ checksum = "dc4cab390f4a32340211f015292a4551742a63e528e9ade9e0bde0d1a989d2a1" [[package]] name = "darling" -version = "0.14.4" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" dependencies = [ "darling_core", "darling_macro", @@ -101,58 +101,58 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.14.4" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn 1.0.109", + "syn", ] [[package]] name = "darling_macro" -version = "0.14.4" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 1.0.109", + "syn", ] [[package]] name = "derive_builder" -version = "0.12.0" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8" +checksum = "cd33f37ee6a119146a1781d3356a7c26028f83d779b2e04ecd45fdc75c76877b" dependencies = [ "derive_builder_macro", ] [[package]] name = "derive_builder_core" -version = "0.12.0" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f" +checksum = "7431fa049613920234f22c47fdc33e6cf3ee83067091ea4277a3f8c4587aae38" dependencies = [ "darling", "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] name = "derive_builder_macro" -version = "0.12.0" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e" +checksum = "4abae7035bf79b9877b779505d8cf3749285b80c43941eda66604841889451dc" dependencies = [ "derive_builder_core", - "syn 1.0.109", + "syn", ] [[package]] @@ -169,12 +169,13 @@ checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" [[package]] name = "fancy-regex" -version = "0.11.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" dependencies = [ "bit-set", - "regex", + "regex-automata", + "regex-syntax", ] [[package]] @@ -211,6 +212,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.9" @@ -278,9 +288,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memoffset" @@ -299,9 +309,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "monostate" -version = "0.1.9" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f370ae88093ec6b11a710dec51321a61d420fafd1bad6e30d01bd9c920e8ee" +checksum = "0d208407d7552cd041d8cdb69a1bc3303e029c598738177a3d87082004dc0e1e" dependencies = [ "monostate-impl", "serde", @@ -309,13 +319,13 @@ dependencies = [ [[package]] name = "monostate-impl" -version = "0.1.9" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "371717c0a5543d6a800cac822eac735aa7d2d2fbb41002e9856a4089532dbdce" +checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn", ] [[package]] @@ -344,7 +354,7 @@ dependencies = [ [[package]] name = "netsaur" -version = "0.3.1-patch" +version = "0.4.0" dependencies = [ "getrandom", "js-sys", @@ -358,7 +368,7 @@ dependencies = [ [[package]] name = "netsaur-gpu" -version = "0.3.1-patch" +version = "0.4.0" dependencies = [ "cudarc", "ndarray", @@ -371,7 +381,7 @@ dependencies = [ [[package]] name = "netsaur-tokenizers" -version = "0.3.1-patch" +version = "0.4.0" dependencies = [ "getrandom", "js-sys", @@ -443,18 +453,18 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.32" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -507,9 +517,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.8.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -522,15 +532,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" dependencies = [ "either", - "itertools", + "itertools 0.11.0", "rayon", ] [[package]] name = "rayon-core" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -538,9 +548,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.3" +version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" dependencies = [ "aho-corasick", "memchr", @@ -550,9 +560,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", @@ -561,9 +571,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.4" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "ryu" @@ -615,7 +625,7 @@ checksum = "be02f6cb0cd3a5ec20bbcfbcbd749f57daddb1a0882dc2e46a6c236c90b977ed" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn", ] [[package]] @@ -649,26 +659,15 @@ dependencies = [ [[package]] name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - -[[package]] -name = "syn" -version = "1.0.109" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.28" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", @@ -692,21 +691,21 @@ checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn", ] [[package]] name = "tokenizers" -version = "0.14.1" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9be88c795d8b9f9c4002b3a8f26a6d0876103a6f523b32ea3bac52d8560c17c" +checksum = "c8a24d7f7d6be5b9d1377418b893ab1808af0074f5d1bb2c64784452ddd2aa70" dependencies = [ "aho-corasick", "derive_builder", "esaxx-rs", "fancy-regex", "getrandom", - "itertools", + "itertools 0.12.1", "lazy_static", "log", "macro_rules_attribute", @@ -743,9 +742,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] name = "unicode_categories" @@ -780,7 +779,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.28", + "syn", "wasm-bindgen-shared", ] @@ -802,7 +801,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index c1f3264..657fd35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,4 @@ [workspace] - -members = [ - "crates/*" -] \ No newline at end of file +package.version = "0.4.0" +members = ["crates/*"] +resolver = "2" diff --git a/README.md b/README.md index 5be11fe..17884d4 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,15 @@ ## Powerful Machine Learning library for Deno +## Installation + +There is no installation step required. You can simply import the library and +you're good to go :) + ## Features - Lightweight and easy-to-use neural network library for - [Deno](https://deno.land). + [Deno](https://deno.com). - Blazingly fast and efficient. - Provides a simple API for creating and training neural networks. - Can run on both the CPU and the GPU (WIP). @@ -51,6 +56,7 @@ - Dean Srebnik ([@load1n9](https://github.com/load1n9)) - CarrotzRule ([@carrotzrule123](https://github.com/CarrotzRule123)) +- Pranev ([@retraigo](https://github.com/retraigo)) ### QuickStart @@ -66,9 +72,8 @@ import { Sequential, setupBackend, SigmoidLayer, - tensor1D, tensor2D, -} from "https://deno.land/x/netsaur/mod.ts"; +} from "jsr:@denosaurs/netsaur"; /** * Setup the CPU backend. This backend is fast but doesn't work on the Edge. @@ -111,8 +116,6 @@ const net = new Sequential({ cost: Cost.MSE, }); -const time = performance.now(); - /** * Train the network on the given data. */ @@ -134,8 +137,6 @@ net.train( 10000, ); -console.log(`training time: ${performance.now() - time}ms`); - /** * Predict the output of the XOR function for the given inputs. */ @@ -167,7 +168,7 @@ import { tensor1D, tensor2D, WASM, -} from "https://deno.land/x/netsaur/mod.ts"; +} from "jsr:@denosaurs/netsaur"; /** * Setup the WASM backend. This backend is slower than the CPU backend but works on the Edge. @@ -210,8 +211,6 @@ const net = new Sequential({ cost: Cost.MSE, }); -const time = performance.now(); - /** * Train the network on the given data. */ @@ -233,8 +232,6 @@ net.train( 10000, ); -console.log(`training time: ${performance.now() - time}ms`); - /** * Predict the output of the XOR function for the given inputs. */ @@ -254,7 +251,7 @@ console.log(`1 xor 1 = ${out4[0]} (should be close to 0)`); ### Documentation The full documentation for Netsaur can be found -[here](https://deno.land/x/netsaur@0.3.1-patch/mod.ts). +[here](https://deno.land/x/netsaur/mod.ts). ### License diff --git a/crates/core-gpu/Cargo.toml b/crates/core-gpu/Cargo.toml index db058c3..028a8c3 100644 --- a/crates/core-gpu/Cargo.toml +++ b/crates/core-gpu/Cargo.toml @@ -1,7 +1,7 @@ [package] edition = "2021" name = "netsaur-gpu" -version = "0.3.1-patch" +version = { workspace = true } [lib] crate-type = ["cdylib"] diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 9da1efe..e5f7cc1 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -1,7 +1,7 @@ [package] edition = "2021" name = "netsaur" -version = "0.3.1-patch" +version = { workspace = true } [lib] crate-type = ["cdylib"] @@ -9,7 +9,7 @@ crate-type = ["cdylib"] [dependencies] ndarray = "0.15.6" ndarray-rand = "0.14.0" -serde = {version = "1.0", features = ["derive"]} +serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" safetensors = "0.4.0" diff --git a/crates/tokenizers/Cargo.toml b/crates/tokenizers/Cargo.toml index 1f8a351..2538ca6 100644 --- a/crates/tokenizers/Cargo.toml +++ b/crates/tokenizers/Cargo.toml @@ -1,7 +1,7 @@ [package] edition = "2021" name = "netsaur-tokenizers" -version = "0.3.1-patch" +version = { workspace = true } [lib] crate-type = ["cdylib"] @@ -12,7 +12,7 @@ ndarray-rand = "0.14.0" serde = {version = "1.0", features = ["derive"]} serde_json = "1.0" serde-wasm-bindgen = "0.6.0" -tokenizers = { version="0.14.1", default-features=false, features = ["unstable_wasm"]} +tokenizers = { version="0.20.0", default-features=false, features = ["unstable_wasm"]} wasm-bindgen = "0.2.92" getrandom = { version = "0.2", features = ["js"] } js-sys = "0.3.69" diff --git a/data/deps.ts b/data/deps.ts deleted file mode 100644 index b236b66..0000000 --- a/data/deps.ts +++ /dev/null @@ -1 +0,0 @@ -export { CsvParseStream } from "jsr:@std/csv@0.214.0"; diff --git a/deno.json b/deno.json deleted file mode 100644 index 69f7422..0000000 --- a/deno.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "name": "@denosaurs/netsaur", - "version": "0.3.2-patch3", - "exports": { - ".": "./mod.ts", - "./web": "./web.ts", - "./tokenizers": "./tokenizers/mod.ts", - "./visualizer": "./visualizer/mod.ts", - "./data": "./data/mod.ts" - }, - "tasks": { - "example:xor": "deno run -A --unstable-ffi ./examples/xor_auto.ts", - "example:xor-option": "deno run -A --unstable-ffi ./examples/xor_option.ts", - "example:xor-cpu": "deno run -A --unstable-ffi ./examples/xor_cpu.ts", - "example:xor-gpu": "deno run -A --unstable-ffi ./examples/xor_gpu.ts", - "example:xor-wasm": "deno run -A ./examples/xor_wasm.ts", - "example:autoencoder": "deno run -A --unstable-ffi ./examples/autoencoders/example.ts", - "example:linear": "deno run -A --unstable-ffi ./examples/linear.ts", - "example:multiple-linear": "deno run -A --unstable-ffi ./examples/multiple-linear/student.ts", - "example:binary": "deno run -A --unstable-ffi ./examples/classification/binary_iris.ts", - "example:multiclass": "deno run -A --unstable-ffi ./examples/classification/iris.ts", - "example:text": "deno run -A --unstable-ffi ./examples/classification/spam.ts", - "example:filters": "deno run -A --unstable-ffi examples/filters/conv.ts ", - "example:train": "deno run -A --unstable-ffi examples/model/train.ts ", - "example:run": "deno run -A --unstable-ffi examples/model/run.ts ", - "example:mnist-download": "deno run -A --unstable-ffi examples/mnist/download.ts ", - "example:mnist-train": "deno run -A --unstable-ffi examples/mnist/train.ts ", - "example:mnist-predict": "deno run -A --unstable-ffi examples/mnist/predict.ts ", - "example:tokenizers-basic": "deno run -A examples/tokenizers/basic.ts", - "build": "deno task build:cpu && deno task build:wasm && deno task build:tokenizers", - "build:cpu": "cargo build --release -p netsaur", - "build:gpu": "cargo build --release -p netsaur-gpu", - "build:wasm": "deno run -Ar jsr:@deno/wasmbuild@0.17.1 -p netsaur --out src/backends/wasm/lib", - "build:tokenizers": "deno run -Ar jsr:@deno/wasmbuild@0.17.1 -p netsaur-tokenizers --out tokenizers/lib" - } -} diff --git a/deno.jsonc b/deno.jsonc new file mode 100644 index 0000000..76e22be --- /dev/null +++ b/deno.jsonc @@ -0,0 +1,56 @@ +{ + "name": "@denosaurs/netsaur", + "version": "0.3.2-patch3", + "exports": { + ".": "./mod.ts", + "./web": "./web.ts", + // Data + "./data": "./packages/data/mod.ts", + // Core + "./core/engine": "./packages/core/src/core/engine.ts", + "./core": "./packages/core/src/core/mod.ts", + "./core/types": "./packages/core/src/core/types.ts", + "./core/tensor": "./packages/core/src/core/tensor/tensor.ts", + "./core/layers": "./packages/core/src/core/api/layers.ts", + "./core/shape": "./packages/core/src/core/api/shape.ts", + "./core/models": "./packages/core/src/core/api/network.ts", + "./core/optimizers": "./packages/core/src/core/api/optimizer.ts", + "./core/schedulers": "./packages/core/src/core/api/scheduler.ts", + // Utilities + "./utilities": "./packages/utilities/src/mod.ts", + "./utilities/text": "./packages/utilities/src/text/mod.ts", + "./utilities/image": "./packages/utilities/src/image/mod.ts", + "./utilities/metrics": "./packages/utilities/src/metrics/mod.ts", + "./utilities/encoding": "./packages/utilities/src/encoding/mod.ts", + "./utilities/misc": "./packages/utilities/src/utils/mod.ts", + // Tokenizers + "./tokenizers": "./packages/tokenizers/mod.ts", + // Visualizer + "./visualizer": "./packages/visualizer/mod.ts" + }, + "tasks": { + "example:xor": "deno -A ./examples/xor_auto.ts", + "example:xor-option": "deno -A ./examples/xor_option.ts", + "example:xor-cpu": "deno -A ./examples/xor_cpu.ts", + "example:xor-gpu": "deno -A ./examples/xor_gpu.ts", + "example:xor-wasm": "deno -A ./examples/xor_wasm.ts", + "example:autoencoder": "deno -A ./examples/autoencoders/example.ts", + "example:linear": "deno -A ./examples/linear.ts", + "example:multiple-linear": "deno -A ./examples/multiple-linear/student.ts", + "example:binary": "deno -A ./examples/classification/binary_iris.ts", + "example:multiclass": "deno -A ./examples/classification/iris.ts", + "example:text": "deno -A ./examples/classification/spam.ts", + "example:filters": "deno -A examples/filters/conv.ts ", + "example:train": "deno -A examples/model/train.ts ", + "example:run": "deno -A examples/model/run.ts ", + "example:mnist-download": "deno -A examples/mnist/download.ts", + "example:mnist-train": "deno -A examples/mnist/train.ts", + "example:mnist-predict": "deno -A examples/mnist/predict.ts", + "example:tokenizers-basic": "deno -A examples/tokenizers/basic.ts", + "build": "deno run build:cpu && deno run build:wasm && deno run build:tokenizers", + "build:cpu": "cargo build --release -p netsaur", + "build:gpu": "cargo build --release -p netsaur-gpu", + "build:wasm": "deno -Ar jsr:@deno/wasmbuild@0.17.2 -p netsaur --out src/backends/wasm/lib", + "build:tokenizers": "deno -Ar jsr:@deno/wasmbuild@0.17.2 -p netsaur-tokenizers --out tokenizers/lib" + } +} \ No newline at end of file diff --git a/deno.lock b/deno.lock index f493492..ce3223b 100644 --- a/deno.lock +++ b/deno.lock @@ -1,156 +1,65 @@ { - "version": "3", - "packages": { - "specifiers": { - "jsr:@deno/wasmbuild@0.17.1": "jsr:@deno/wasmbuild@0.17.1", - "jsr:@denosaurs/plug@1.0.3": "jsr:@denosaurs/plug@1.0.3", - "jsr:@std/assert@^0.213.1": "jsr:@std/assert@0.213.1", - "jsr:@std/encoding@0.213.1": "jsr:@std/encoding@0.213.1", - "jsr:@std/fmt@0.213.1": "jsr:@std/fmt@0.213.1", - "jsr:@std/fs@0.213.1": "jsr:@std/fs@0.213.1", - "jsr:@std/path@0.213.1": "jsr:@std/path@0.213.1", - "jsr:@std/path@^0.213.1": "jsr:@std/path@0.213.1" + "version": "4", + "specifiers": { + "jsr:@denosaurs/plug@1.0.3": "1.0.3", + "jsr:@std/assert@~0.213.1": "0.213.1", + "jsr:@std/csv@1.0.3": "1.0.3", + "jsr:@std/encoding@0.213.1": "0.213.1", + "jsr:@std/fmt@0.213.1": "0.213.1", + "jsr:@std/fs@0.213.1": "0.213.1", + "jsr:@std/path@0.213.1": "0.213.1", + "jsr:@std/path@~0.213.1": "0.213.1" + }, + "jsr": { + "@denosaurs/plug@1.0.3": { + "integrity": "b010544e386bea0ff3a1d05e0c88f704ea28cbd4d753439c2f1ee021a85d4640", + "dependencies": [ + "jsr:@std/encoding", + "jsr:@std/fmt", + "jsr:@std/fs", + "jsr:@std/path@0.213.1" + ] + }, + "@std/assert@0.213.1": { + "integrity": "24c28178b30c8e0782c18e8e94ea72b16282207569cdd10ffb9d1d26f2edebfe" + }, + "@std/csv@1.0.3": { + "integrity": "623acf0dcb88d62ba727c3611ad005df7f109ede8cac833e3986f540744562e5" + }, + "@std/encoding@0.213.1": { + "integrity": "fcbb6928713dde941a18ca5db88ca1544d0755ec8fb20fe61e2dc8144b390c62" }, - "jsr": { - "@deno/wasmbuild@0.17.1": { - "integrity": "420fc1f1130fd3a6afa60e5ba90b8710b46e380773b1b0eb368fd2688853c248" - }, - "@denosaurs/plug@1.0.3": { - "integrity": "b010544e386bea0ff3a1d05e0c88f704ea28cbd4d753439c2f1ee021a85d4640", - "dependencies": [ - "jsr:@std/encoding@0.213.1", - "jsr:@std/fmt@0.213.1", - "jsr:@std/fs@0.213.1", - "jsr:@std/path@0.213.1" - ] - }, - "@std/assert@0.213.1": { - "integrity": "24c28178b30c8e0782c18e8e94ea72b16282207569cdd10ffb9d1d26f2edebfe" - }, - "@std/encoding@0.213.1": { - "integrity": "fcbb6928713dde941a18ca5db88ca1544d0755ec8fb20fe61e2dc8144b390c62" - }, - "@std/fmt@0.213.1": { - "integrity": "a06d31777566d874b9c856c10244ac3e6b660bdec4c82506cd46be052a1082c3" - }, - "@std/fs@0.213.1": { - "integrity": "fbcaf099f8a85c27ab0712b666262cda8fe6d02e9937bf9313ecaea39a22c501", - "dependencies": [ - "jsr:@std/assert@^0.213.1", - "jsr:@std/path@^0.213.1" - ] - }, - "@std/path@0.213.1": { - "integrity": "f187bf278a172752e02fcbacf6bd78a335ed320d080a7ed3a5a59c3e88abc673", - "dependencies": [ - "jsr:@std/assert@^0.213.1" - ] - } + "@std/fmt@0.213.1": { + "integrity": "a06d31777566d874b9c856c10244ac3e6b660bdec4c82506cd46be052a1082c3" + }, + "@std/fs@0.213.1": { + "integrity": "fbcaf099f8a85c27ab0712b666262cda8fe6d02e9937bf9313ecaea39a22c501", + "dependencies": [ + "jsr:@std/assert", + "jsr:@std/path@~0.213.1" + ] + }, + "@std/path@0.213.1": { + "integrity": "f187bf278a172752e02fcbacf6bd78a335ed320d080a7ed3a5a59c3e88abc673", + "dependencies": [ + "jsr:@std/assert" + ] } }, - "redirects": { - "https://deno.land/x/netsaur/mod.ts": "https://deno.land/x/netsaur@0.2.8/mod.ts" - }, "remote": { - "https://deno.land/std@0.184.0/_util/asserts.ts": "178dfc49a464aee693a7e285567b3d0b555dc805ff490505a8aae34f9cfb1462", - "https://deno.land/std@0.184.0/_util/os.ts": "d932f56d41e4f6a6093d56044e29ce637f8dcc43c5a90af43504a889cf1775e3", - "https://deno.land/std@0.184.0/encoding/hex.ts": "b4b1a7cb678745b0bf181ed8cf2498c7be00d121a7de244b752fbf9c7d9c48cd", - "https://deno.land/std@0.184.0/fmt/colors.ts": "d67e3cd9f472535241a8e410d33423980bec45047e343577554d3356e1f0ef4e", - "https://deno.land/std@0.184.0/fs/_util.ts": "579038bebc3bd35c43a6a7766f7d91fbacdf44bc03468e9d3134297bb99ed4f9", - "https://deno.land/std@0.184.0/fs/copy.ts": "14214efd94fc3aa6db1e4af2b4b9578e50f7362b7f3725d5a14ad259a5df26c8", - "https://deno.land/std@0.184.0/fs/empty_dir.ts": "c3d2da4c7352fab1cf144a1ecfef58090769e8af633678e0f3fabaef98594688", - "https://deno.land/std@0.184.0/fs/ensure_dir.ts": "dc64c4c75c64721d4e3fb681f1382f803ff3d2868f08563ff923fdd20d071c40", - "https://deno.land/std@0.184.0/fs/ensure_file.ts": "c38602670bfaf259d86ca824a94e6cb9e5eb73757fefa4ebf43a90dd017d53d9", - "https://deno.land/std@0.184.0/fs/ensure_link.ts": "c0f5b2f0ec094ed52b9128eccb1ee23362a617457aa0f699b145d4883f5b2fb4", - "https://deno.land/std@0.184.0/fs/ensure_symlink.ts": "5006ab2f458159c56d689b53b1e48d57e05eeb1eaf64e677f7f76a30bc4fdba1", - "https://deno.land/std@0.184.0/fs/eol.ts": "f1f2eb348a750c34500741987b21d65607f352cf7205f48f4319d417fff42842", - "https://deno.land/std@0.184.0/fs/exists.ts": "29c26bca8584a22876be7cb8844f1b6c8fc35e9af514576b78f5c6884d7ed02d", - "https://deno.land/std@0.184.0/fs/expand_glob.ts": "e4f56259a0a70fe23f05215b00de3ac5e6ba46646ab2a06ebbe9b010f81c972a", - "https://deno.land/std@0.184.0/fs/mod.ts": "bc3d0acd488cc7b42627044caf47d72019846d459279544e1934418955ba4898", - "https://deno.land/std@0.184.0/fs/move.ts": "b4f8f46730b40c32ea3c0bc8eb0fd0e8139249a698883c7b3756424cf19785c9", - "https://deno.land/std@0.184.0/fs/walk.ts": "920be35a7376db6c0b5b1caf1486fb962925e38c9825f90367f8f26b5e5d0897", - "https://deno.land/std@0.184.0/path/_constants.ts": "e49961f6f4f48039c0dfed3c3f93e963ca3d92791c9d478ac5b43183413136e0", - "https://deno.land/std@0.184.0/path/_interface.ts": "6471159dfbbc357e03882c2266d21ef9afdb1e4aa771b0545e90db58a0ba314b", - "https://deno.land/std@0.184.0/path/_util.ts": "d7abb1e0dea065f427b89156e28cdeb32b045870acdf865833ba808a73b576d0", - "https://deno.land/std@0.184.0/path/common.ts": "ee7505ab01fd22de3963b64e46cff31f40de34f9f8de1fff6a1bd2fe79380000", - "https://deno.land/std@0.184.0/path/glob.ts": "d479e0a695621c94d3fd7fe7abd4f9499caf32a8de13f25073451c6ef420a4e1", - "https://deno.land/std@0.184.0/path/mod.ts": "bf718f19a4fdd545aee1b06409ca0805bd1b68ecf876605ce632e932fe54510c", - "https://deno.land/std@0.184.0/path/posix.ts": "8b7c67ac338714b30c816079303d0285dd24af6b284f7ad63da5b27372a2c94d", - "https://deno.land/std@0.184.0/path/separator.ts": "0fb679739d0d1d7bf45b68dacfb4ec7563597a902edbaf3c59b50d5bcadd93b1", - "https://deno.land/std@0.184.0/path/win32.ts": "d186344e5583bcbf8b18af416d13d82b35a317116e6460a5a3953508c3de5bba", - "https://deno.land/std@0.188.0/_util/asserts.ts": "178dfc49a464aee693a7e285567b3d0b555dc805ff490505a8aae34f9cfb1462", - "https://deno.land/std@0.188.0/csv/_io.ts": "0f90b154e0f9c574a025f24d35d7ef617944809a66d277716243e68523a816b2", - "https://deno.land/std@0.188.0/csv/parse.ts": "96f7be0b5b5c0778bbd1c3f6f4ec689aecc406b6ce66d5059da12ca2268d2b44", - "https://deno.land/x/dir@1.5.1/data_local_dir/mod.ts": "91eb1c4bfadfbeda30171007bac6d85aadacd43224a5ed721bbe56bc64e9eb66", - "https://deno.land/x/netsaur@0.2.8/deps.ts": "ecf8d69bb639cea2aeac1c69730b5a0f5f5fd3518090449c027176cd0fdc4415", - "https://deno.land/x/netsaur@0.2.8/mod.ts": "ed16d242a2792677c47d0082b82321090eaf64e262e017e741373789bcc6c11e", - "https://deno.land/x/netsaur@0.2.8/src/backend_cpu/backend.ts": "5ef0911e6fcd682b891dc64173cafde76b141490c73cedcf66d9ba152cf87ec0", - "https://deno.land/x/netsaur@0.2.8/src/backend_cpu/mod.ts": "5871a2e5b7f6cf7619294d8761080f19ba5de1a6f55ef6e08b76a1b594b370f8", - "https://deno.land/x/netsaur@0.2.8/src/backend_cpu/util.ts": "fd51f7868ed9d7eddeec01559b60684f2898ff72a2abe11d6a2267f63239c6d0", - "https://deno.land/x/netsaur@0.2.8/src/backend_wasm/backend.ts": "1a8957ecb3219d0b02e7c3144ab6619a0d79ef548001838c7fa09f3cb8e767ac", - "https://deno.land/x/netsaur@0.2.8/src/backend_wasm/lib/netsaur.generated.js": "f91e0dd8528a13f6bf91a98908ece7d9192d2a4918180a448b98a2e967033e54", - "https://deno.land/x/netsaur@0.2.8/src/backend_wasm/mod.ts": "b07ff70c68ce179cb8a3ab7d28135fe7f0ecc727c0dbd34857a376cae17f84d4", - "https://deno.land/x/netsaur@0.2.8/src/backend_wasm/utils.ts": "cbbca54c7fa37f6cf71375833cec37d367fb0bc677af1cfa913e2832d7f1f475", - "https://deno.land/x/netsaur@0.2.8/src/core/api/error.ts": "b58811e114b9ffc9d5fa77ddca7a72e3b21d009efb060440de5b5ea5f630b0ef", - "https://deno.land/x/netsaur@0.2.8/src/core/api/layer.ts": "27c34b83c677ce2e92cf7d31211925b1df301dc19c0c76d04e283eb2e102cd34", - "https://deno.land/x/netsaur@0.2.8/src/core/api/layers.ts": "5120bd93eff110999da895172a9e8a4f77b49fc5aa26b5f88aa55678d300f696", - "https://deno.land/x/netsaur@0.2.8/src/core/api/network.ts": "de3ee840b0b169a394accfd28abcac699c731853198eda920315895a4c8ce538", - "https://deno.land/x/netsaur@0.2.8/src/core/api/optimizer.ts": "30d5f2f6e7a469910e6735dee1bb3e06e2c8ebaed48d6c55d4662bb998b473f9", - "https://deno.land/x/netsaur@0.2.8/src/core/api/shape.ts": "a65b381937751ab5e0016fb5c8afb023aff480f39612f77d200fea85249026d4", - "https://deno.land/x/netsaur@0.2.8/src/core/engine.ts": "d63750b7b6e9d5f4cffe45d47859ad2a8f3279c682fdb4c646a8f263df55c9ec", - "https://deno.land/x/netsaur@0.2.8/src/core/mod.ts": "a12c92154b5f5189eed295f3395fb6048412a88bd298ad4f6c14b2b7490ee534", - "https://deno.land/x/netsaur@0.2.8/src/core/tensor/tensor.ts": "2db0c0e5b6cdece8c50b36cc8d683c73fd38cccb101ce12375f1bf2c8859155b", - "https://deno.land/x/netsaur@0.2.8/src/core/tensor/util.ts": "d76a96380354085992731599eaec98f7cc59c2af821e03531b72296277249b34", - "https://deno.land/x/netsaur@0.2.8/src/core/types.ts": "4a2f33769cded405cfb134609cbc906202180f9d87acb3033187d59da4b23b75", - "https://deno.land/x/plug@1.0.2/deps.ts": "36846a478fafaa1d8ca18aafd91584a748aa58de47ad0f45f008881dad82f374", - "https://deno.land/x/plug@1.0.2/download.ts": "b92bc1c1ae35fdb75828847f1ebfc7e51bf462f339729740e1cffe78384e1509", - "https://deno.land/x/plug@1.0.2/mod.ts": "32e0006ed6142e7becdb4103c2aa4e1e9ef28459d7243d6cb404a028f7c4eb7e", - "https://deno.land/x/plug@1.0.2/types.ts": "0490359117c53783138f2b6692a34f85bca9237314ba8cdef8ad682d81218d21", - "https://deno.land/x/plug@1.0.2/util.ts": "ded3db6e9bb16b8003899d9073fb310e13231ca617b35d6b7dfd53f38762cc76", - "https://deno.land/x/vectorizer@v0.3.7/mod.ts": "d6b7ac161b614a7289843406ccae3f82b001ad72f4905bc75ec84a408832aac7", - "https://deno.land/x/vectorizer@v0.3.7/src/constants/stop_words.ts": "08337cc83aec6e203c042e2809af6b54c01c9f896f4518a9bc6c151846692ca7", - "https://deno.land/x/vectorizer@v0.3.7/src/encoding/categorical.ts": "3516569164f81da7353c4d79dceb6d70e23b0546362099bc50067e4cd49e410f", - "https://deno.land/x/vectorizer@v0.3.7/src/encoding/mod.ts": "0bb19870e339200ace9eeb4de9d352af379c64524326764e78577cf8b31d3d58", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/conversion/text/base.ts": "15d51c4c57607a006d7b55fc520a9d4cf6c4265a7bef6a413b5ebaee52a914b7", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/conversion/text/mod.ts": "10ae80b3362c4000099e06a94977118db542226d1be97188efa7257a119f3ca8", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/conversion/text/sparse/count_vectorizer.ts": "1b6ea7bba6e4272f73b7b67b5754ccca400904efa7a2ae90734352526fc8c8d4", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/conversion/text/sparse/index_vectorizer.ts": "767f233cbfe6ac318d980a53a8f3c049743ec41bf0c0f6041c9047aafcd2d73a", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/conversion/text/sparse/mod.ts": "567e37d623f07b22977d1fca8950096cb750f161ee905e1cb7486a0662c2dc7a", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/conversion/text/sparse/multi_hot.ts": "48d761f1fd7170655e12c4ea83ea38a5347cfc3268d7b8a9433a757d0ce829f5", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/conversion/text/sparse/tf_idf_transformer.ts": "8e65e0267a2e817579773346581c01f6d5cd211f43cb9d50309dd5d32b88196f", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/conversion/text/types.ts": "b0d11ec71273dec4f47b9eed2ee36253edc141997ac9dca6cdf81d019ad0bd39", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/extraction/image/colors/common.ts": "88d81009371cd53563d6b719644c3d3b11fce18cd111129c127416af534f5dbe", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/extraction/image/colors/histogram.ts": "fc32cf9d374ce80182943fc3af036adf6d486d2b8a02c6aeca83da797d072ac3", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/extraction/image/colors/median_cut.ts": "d299a88041ea010da3f79bd627b8200cb94cfd4492da68d5279a1ce1837358d9", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/extraction/image/colors/mod.ts": "531dc0c7ad315834a9579045b356c7e7842cd9568d5f9ead3a09d0eddac8e2f3", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/extraction/image/patches/patch_2d.ts": "fa0fdf91618c6370acd5a699bee5964c85ee239c1a3cea6b9034da1dacba8972", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/image.ts": "779149bba165f2a135fa29fbfa29733540284553404877fcd7c2941fa9dfd568", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/text.ts": "13ea9d45110a93dac34fad407bee497fea1966dc2c48b60bee80a9a4c333c1a5", - "https://deno.land/x/vectorizer@v0.3.7/src/feature/types.ts": "4da6c4de0d573827f16e02a27419163916126bb0a4f50c81e5633b31ffe7b5e4", - "https://deno.land/x/vectorizer@v0.3.7/src/metrics/classification.ts": "1745df3247d477ccee61b9cffdde3cea39b5692a0408e576dc435eb4ab831a86", - "https://deno.land/x/vectorizer@v0.3.7/src/metrics/mod.ts": "0c851b3522c9ace99f4dc37209c8aa66ece538f40089d45a09cb94c9cd2300ec", - "https://deno.land/x/vectorizer@v0.3.7/src/metrics/regression.ts": "803164aac3346268afb76cb2cafa12932350afbbe56475d339273fd77fed6fec", - "https://deno.land/x/vectorizer@v0.3.7/src/mod.ts": "10b549cb251c641c0495309e01b80c795dd20a4cb1c59531ea162d73994264b0", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/array/mod.ts": "a95df5671fcb09123d6e8a9aaa46ff2f935ad43966f3dc4d32051da31577ed0f", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/array/range.ts": "a72d2355492889e739a1987b5a3a6c859ac71f2601eb67138b58e6983a24d9d1", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/array/split.ts": "2d57882320b7fe7257845c3921c65470fb6d2691bbf999c9a78325744ecefd6b", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/array/unique.ts": "d81d591c077ad81123ecf612445bcfc657a409b0e10a1667c184c675d7ee895a", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/common_types.ts": "1d02a8e0a9fc3f3a12ccc4072413630bf7d25a7651f01aecd067f5afc97bfdb0", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/datetime/duration.ts": "ad1704be1a47b75d581e8030a677b881c92d8f4a23348669066c01b1e478ed19", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/datetime/mod.ts": "30f93d011754ff647495d38fa38eb9790ae1c55258df9ba5ee615b96dfd30958", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/datetime/time.ts": "e67edba8d60c682ea87fd6457a6460fbbc92a61e78afa41f0536bab8c26b38ac", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/misc/get_constructor.ts": "8588ace02567b46669de92de172e8958124a05050d5ce080b066b81d8fd87a08", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/misc/image.ts": "acfd832d82ff8983abef350277f244c0f893dd2d2b7bc560893b5230f8383a2b", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/misc/matrix.ts": "d1294cbc93f49ba6b009c953003d353684c26df1d18cf899d44ce82a8486e922", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/misc/mod.ts": "f4641debc43d0777e527fa4f289c38574fb10c37ac0e2cf00b50f76860bde896", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/mod.ts": "adcbb62e22a87ca518971d78783b7c5b056085f1f78d01d24fbaaa25bb9b6039", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/random/mod.ts": "8b1d03fd04bad86ece5c1b8b2efa20f56231d9a8bac962cf10571d321d989116", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/random/normal.ts": "c6f0bac64e0e147f8df6f9882eeb7c0bc335bd7a00e06897c6147118333af497", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/random/rearrange.ts": "1c1571664845896663b563029f3f2572fccd38c6d92ec3fd11dc6d7ea866cb4b", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/random/rng.ts": "213dff1b1a39ae4621d35330eb19a3f96170949e8ca429e8028a1fdf86c61f22", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/random/shuffle.ts": "66f966851d5b6dd4c81117d32311742c319ea3042aad22a7874d2b10da4e743f", - "https://deno.land/x/vectorizer@v0.3.7/src/utils/random/weighted.ts": "c4eeb309477ecf431a0b9fd1bf0f04540825194c7ed30ba8c71c9e45e14112e6", - "https://deno.land/x/wasmbuild@0.15.6/loader/cache.ts": "362d03a2ceb3d3e130373102a5de6d54ffda659a0911c2fcc090c29c3555f7ef", - "https://deno.land/x/wasmbuild@0.15.6/loader/fetch.ts": "7d015203a93cbadd05034cbb6e726d35f47e1f51140d91e69aeddc5d71b06ec8" + "https://deno.land/std@0.106.0/encoding/base64.ts": "eecae390f1f1d1cae6f6c6d732ede5276bf4b9cd29b1d281678c054dc5cc009e", + "https://deno.land/x/canvas@v1.4.1/deps.ts": "e956026d98094946166e06d7b799290b732db015813870d84e04e33ab88e98f3", + "https://deno.land/x/canvas@v1.4.1/mod.ts": "a4e16972647ceafef58612a377a218372454c99d2c9da615a132694597114f80", + "https://deno.land/x/canvas@v1.4.1/src/base64.ts": "0928031fdba0c43b617154fbe2eb7578366460c04da1422933ae5e936d3d0349", + "https://deno.land/x/canvas@v1.4.1/src/canvas.ts": "58119999b04f68ebeed2627485c5c24c5b0c029707edde0b6568814f9049a3a8", + "https://deno.land/x/canvas@v1.4.1/src/canvaskit.ts": "c3d807472cbb3e1d9fc01bb43ff974ef796c4b010178d1595be5fa793cce5e7d", + "https://deno.land/x/canvas@v1.4.1/src/color_util.ts": "28f1072f0a5acbe7add7fac2f452311a47b44c080806fc4057de2d2e405c6c1c", + "https://deno.land/x/canvas@v1.4.1/src/lib.js": "bb21711589bfbc8997b455cdf53e3150e23289f3b44809188041b1d2fc7924fa", + "https://deno.land/x/canvas@v1.4.1/src/types.ts": "67d5800f8f4b0a407e0251676a03ae91b5f50a3ed53e6b72dc5984113cb93128", + "https://deno.land/x/canvas@v1.4.1/src/wasm.js": "449d72cc14fc4142a5853f944df49a744d852981d09c5515528ede8aebb0afda", + "https://deno.land/x/lz4@v0.1.2/mod.ts": "4decfc1a3569d03fd1813bd39128b71c8f082850fe98ecfdde20025772916582", + "https://deno.land/x/lz4@v0.1.2/wasm.js": "b9c65605327ba273f0c76a6dc596ec534d4cda0f0225d7a94ebc606782319e46", + "https://deno.land/x/pngs@0.1.1/mod.ts": "9dc8a7daed1497b94a77b68c954164a9f0b2a6f40866481bdfdbbaf015b5f764", + "https://deno.land/x/pngs@0.1.1/wasm.js": "e3d4a8f293b267c9859a2164ca7b4603869bc92fe0d5ad4f109925858bce0c4c" } } diff --git a/examples/autoencoders/example.ts b/examples/autoencoders/example.ts index ea8ddb0..352551f 100644 --- a/examples/autoencoders/example.ts +++ b/examples/autoencoders/example.ts @@ -1,24 +1,25 @@ -import { Matrix } from "https://deno.land/x/vectorizer@v0.3.6/mod.ts"; +import { Matrix } from "../../packages/utilities/mod.ts"; import { - Sequential, - setupBackend, + AdamOptimizer, + Cost, CPU, DenseLayer, - AdamOptimizer, - Shape2D, ReluLayer, + Sequential, + setupBackend, + type Shape2D, tensor, - Cost, - OneCycle -} from "../../mod.ts";; +} from "../../packages/core/mod.ts"; -import { parse } from "https://deno.land/std@0.188.0/csv/parse.ts"; +import { parse } from "jsr:@std/csv@1.0.3/parse"; -const data = parse(Deno.readTextFileSync("examples/autoencoders/winequality-red.csv")) -data.shift() +const data = parse( + Deno.readTextFileSync("examples/autoencoders/winequality-red.csv"), +); +data.shift(); -const x_data = data.slice(0, 20).map((fl, i) => fl.slice(0, 11).map(Number)); -const X = new Matrix<"f32">(Float32Array.from(x_data.flat()), [x_data.length]) +const x_data = data.slice(0, 20).map((fl) => fl.slice(0, 11).map(Number)); +const X = new Matrix(x_data, "f32"); await setupBackend(CPU); @@ -31,7 +32,7 @@ const net = new Sequential({ ReluLayer(), DenseLayer({ size: [4] }), ReluLayer(), - DenseLayer({size: [2]}), + DenseLayer({ size: [2] }), // Decoder DenseLayer({ size: [4] }), ReluLayer(), @@ -42,38 +43,41 @@ const net = new Sequential({ cost: Cost.MSE, patience: 50, optimizer: AdamOptimizer(), -// scheduler: OneCycle() + // scheduler: OneCycle() }); -const input = tensor(X.data, X.shape) +const input = tensor(X); -const timeStart = performance.now() -net.train([{inputs: input, outputs: input}], 10000, 1, 0.01) -console.log(`Trained in ${performance.now() - timeStart}ms`) +const timeStart = performance.now(); +net.train([{ inputs: input, outputs: input }], 10000, 1, 0.01); +console.log(`Trained in ${performance.now() - timeStart}ms`); function saveTable(name: string, data: Matrix<"f32">) { - Deno.writeTextFileSync(`examples/autoencoders/${name}.html`, data.html) + Deno.writeTextFileSync(`examples/autoencoders/${name}.html`, data.html); } -saveTable("input", X) +saveTable("input", X); -console.log("Running Whole Net") -const output = await net.predict(input) +console.log("Running Whole Net"); +const output = await net.predict(input); -const output_mat = new Matrix<"f32">(output.data, output.shape as Shape2D) +const output_mat = new Matrix<"f32">(output.data, output.shape as Shape2D); -saveTable("output", output_mat) +saveTable("output", output_mat); -console.log("Running Encoder") -const encoded = await net.predict(input, [0, 5]) +console.log("Running Encoder"); +const encoded = await net.predict(input, [0, 5]); -const encoded_mat = new Matrix<"f32">(encoded.data, encoded.shape as Shape2D) +const encoded_mat = new Matrix<"f32">(encoded.data, encoded.shape as Shape2D); -saveTable("encoded", encoded_mat) +saveTable("encoded", encoded_mat); -console.log("Running Decoder") -const decoded = await net.predict(tensor(encoded_mat.data, encoded_mat.shape), [5, 10]) +console.log("Running Decoder"); +const decoded = await net.predict(tensor(encoded_mat), [ + 5, + 10, +]); -const decoded_mat = new Matrix<"f32">(decoded.data, decoded.shape as Shape2D) +const decoded_mat = new Matrix<"f32">(decoded.data, decoded.shape as Shape2D); -saveTable("decoded", decoded_mat) \ No newline at end of file +saveTable("decoded", decoded_mat); diff --git a/examples/classification/binary_iris.ts b/examples/classification/binary_iris.ts index 01d647b..7378241 100644 --- a/examples/classification/binary_iris.ts +++ b/examples/classification/binary_iris.ts @@ -6,16 +6,16 @@ import { setupBackend, SigmoidLayer, tensor2D, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; -import { parse } from "https://deno.land/std@0.204.0/csv/parse.ts"; +import { parse } from "jsr:@std/csv@1.0.3/parse"; // Import helpers for metrics import { ClassificationReport, // Split the dataset useSplit, -} from "https://deno.land/x/vectorizer@v0.3.7/mod.ts"; +} from "../../packages/utilities/mod.ts"; // Define classes const classes = ["Setosa", "Versicolor"]; @@ -29,10 +29,7 @@ const x = data.map((fl) => fl.slice(0, 4).map(Number)); const y = data.map((fl) => classes.indexOf(fl[4])); // Split the dataset for training and testing -const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) as [ - [typeof x, typeof y], - [typeof x, typeof y], -]; +const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) // Setup the CPU backend for Netsaur await setupBackend(CPU); diff --git a/examples/classification/iris.ts b/examples/classification/iris.ts index a2e0036..725a314 100644 --- a/examples/classification/iris.ts +++ b/examples/classification/iris.ts @@ -1,5 +1,4 @@ import { - AdamOptimizer, Cost, CPU, DenseLayer, @@ -9,22 +8,21 @@ import { Sequential, setupBackend, SoftmaxLayer, - Tensor, + type Tensor, tensor, tensor2D, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; -import { parse } from "https://deno.land/std@0.204.0/csv/parse.ts"; +import { parse } from "jsr:@std/csv@1.0.3/parse"; // Import helpers for metrics import { // One-hot encoding of targets CategoricalEncoder, ClassificationReport, - Matrix, // Split the dataset useSplit, -} from "jsr:@lala/appraisal@0.7.3"; +} from "../../packages/utilities/mod.ts"; // Read the training dataset const _data = Deno.readTextFileSync("examples/classification/iris.csv"); @@ -40,10 +38,7 @@ const y = encoder.fit(y_pre).transform(y_pre, "f32"); // Split the dataset for training and testing // @ts-ignore Matrices can be split -const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) as [ - [typeof x, typeof y], - [typeof x, typeof y] -]; +const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) // Setup the CPU backend for Netsaur await setupBackend(CPU); @@ -89,7 +84,7 @@ net.train( // Train for 300 epochs 400, 1, - 0.02 + 0.02, ); console.log(`training time: ${performance.now() - time}ms`); @@ -97,7 +92,7 @@ console.log(`training time: ${performance.now() - time}ms`); // Calculate metrics const res = await net.predict(tensor2D(test[0])); const y1 = encoder.untransform( - CategoricalEncoder.fromSoftmax(res as Tensor<2>) + CategoricalEncoder.fromSoftmax(res as Tensor<2>), ); const y0 = encoder.untransform(test[1]); @@ -106,5 +101,5 @@ const cMatrix = new ClassificationReport(y0, y1); console.log(cMatrix); console.log( "Total Accuracy: ", - y1.filter((x, i) => x === y0[i]).length / y1.length + y1.filter((x, i) => x === y0[i]).length / y1.length, ); diff --git a/examples/classification/spam.ts b/examples/classification/spam.ts index 04dacf8..16629f2 100644 --- a/examples/classification/spam.ts +++ b/examples/classification/spam.ts @@ -1,4 +1,4 @@ -import { parse } from "https://deno.land/std@0.188.0/csv/parse.ts"; +import { parse } from "jsr:@std/csv@1.0.3/parse"; import { Cost, CPU, @@ -7,18 +7,19 @@ import { ReluLayer, Sequential, setupBackend, - SigmoidLayer, tensor, tensor2D, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; // Import helpers for metrics import { ClassificationReport, - TextVectorizer, + CountVectorizer, + SplitTokenizer, + TfIdfTransformer, // Split the dataset useSplit, -} from "https://deno.land/x/vectorizer@v0.3.7/mod.ts"; +} from "../../packages/utilities/mod.ts"; // Define classes const ymap = ["spam", "ham"]; @@ -31,21 +32,25 @@ const data = parse(_data); const x = data.map((msg) => msg[1]); // Get the classes -const y = data.map((msg) => ymap.indexOf(msg[0]) === 0 ? -1 : 1); +const y = data.map((msg) => (ymap.indexOf(msg[0]) === 0 ? -1 : 1)); // Split the dataset for training and testing -const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) as [ - [typeof x, typeof y], - [typeof x, typeof y], -]; +const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y); // Vectorize the text messages -const vec = new TextVectorizer({ - mode: "tfidf", - config: { skipWords: "english", standardize: { lowercase: true } }, + +const tokenizer = new SplitTokenizer({ + skipWords: "english", + standardize: { lowercase: true }, }).fit(train[0]); -const x_vec = vec.transform(train[0], "f32"); +const vec = new CountVectorizer(tokenizer.vocabulary.size); + +const x_vec = vec.transform(tokenizer.transform(train[0]), "f32") + +const tfidf = new TfIdfTransformer(); + +const x_tfidf = tfidf.fit(x_vec).transform(x_vec) // Setup the CPU backend for Netsaur await setupBackend(CPU); @@ -72,10 +77,10 @@ const net = new Sequential({ // We are using Log Loss for finding cost cost: Cost.Hinge, - optimizer: NadamOptimizer() + optimizer: NadamOptimizer(), }); -const inputs = tensor(x_vec.data, x_vec.shape); +const inputs = tensor(x_tfidf); const time = performance.now(); // Train the network @@ -89,15 +94,15 @@ net.train( // Train for 20 epochs 20, 2, - 0.01, + 0.01 ); console.log(`training time: ${performance.now() - time}ms`); -const x_vec_test = vec.transform(test[0]); +const x_vec_test = tfidf.transform(vec.transform(tokenizer.transform(test[0]), "f32")); // Calculate metrics -const res = await net.predict(tensor(x_vec_test.data, x_vec_test.shape)); -const y1 = res.data.map((i) => i < 0 ? -1 : 1); +const res = await net.predict(tensor(x_vec_test)); +const y1 = res.data.map((i) => (i < 0 ? -1 : 1)); const cMatrix = new ClassificationReport(test[1], y1); console.log("Confusion Matrix: ", cMatrix); diff --git a/examples/filters/conv.ts b/examples/filters/conv.ts index 47e11d2..4696970 100644 --- a/examples/filters/conv.ts +++ b/examples/filters/conv.ts @@ -13,10 +13,10 @@ import { setupBackend, Tensor, tensor4D, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; import { decode } from "https://deno.land/x/pngs@0.1.1/mod.ts"; import { createCanvas } from "https://deno.land/x/canvas@v1.4.1/mod.ts"; -import type { Layer } from "../../src/core/api/layer.ts"; +import type { Layer } from "../../packages/core/src/core/api/layer.ts"; const canvas = createCanvas(600, 600); const ctx = canvas.getContext("2d"); diff --git a/examples/filters/conv_wasm.ts b/examples/filters/conv_wasm.ts index 4a7291e..1b8aadf 100644 --- a/examples/filters/conv_wasm.ts +++ b/examples/filters/conv_wasm.ts @@ -13,10 +13,10 @@ import { Tensor, tensor4D, WASM, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; import { decode } from "https://deno.land/x/pngs@0.1.1/mod.ts"; import { createCanvas } from "https://deno.land/x/canvas@v1.4.1/mod.ts"; -import type { Layer } from "../../src/core/api/layer.ts"; +import type { Layer } from "../../packages/core/src/core/api/layer.ts"; const canvas = createCanvas(600, 600); const ctx = canvas.getContext("2d"); diff --git a/examples/linear.ts b/examples/linear.ts index 70b8015..ac287f4 100644 --- a/examples/linear.ts +++ b/examples/linear.ts @@ -9,7 +9,7 @@ import { Sequential, setupBackend, tensor2D, -} from "../mod.ts"; +} from "../packages/core/mod.ts"; /** * The test data used for predicting the output of the function y = 2x + 1 diff --git a/examples/mnist/common.ts b/examples/mnist/common.ts index 236ea6a..41b75a2 100644 --- a/examples/mnist/common.ts +++ b/examples/mnist/common.ts @@ -1,5 +1,4 @@ -import type { DataSet } from "../../src/core/types.ts"; -import { Tensor } from "../../mod.ts"; +import { Tensor, type DataSet } from "../../packages/core/mod.ts"; export function assert(condition: boolean, message?: string) { if (!condition) { diff --git a/examples/mnist/predict.ts b/examples/mnist/predict.ts index e6b8553..321a042 100644 --- a/examples/mnist/predict.ts +++ b/examples/mnist/predict.ts @@ -6,7 +6,7 @@ import { type Shape, type Tensor, tensor, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; import { loadDataset } from "./common.ts"; await setupBackend(CPU); diff --git a/examples/mnist/train.ts b/examples/mnist/train.ts index ae22f79..61013c8 100644 --- a/examples/mnist/train.ts +++ b/examples/mnist/train.ts @@ -10,7 +10,7 @@ import { Sequential, setupBackend, SoftmaxLayer, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; import { loadDataset } from "./common.ts"; diff --git a/examples/mnist/train_batchnorm.ts b/examples/mnist/train_batchnorm.ts index 6461153..0ba5815 100644 --- a/examples/mnist/train_batchnorm.ts +++ b/examples/mnist/train_batchnorm.ts @@ -11,7 +11,7 @@ import { Sequential, setupBackend, SoftmaxLayer, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; import { loadDataset } from "./common.ts"; diff --git a/examples/mnist/train_dropout.ts b/examples/mnist/train_dropout.ts index b1570fc..94ddbc7 100644 --- a/examples/mnist/train_dropout.ts +++ b/examples/mnist/train_dropout.ts @@ -11,7 +11,7 @@ import { Sequential, setupBackend, SoftmaxLayer, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; import { loadDataset } from "./common.ts"; diff --git a/examples/model/run.ts b/examples/model/run.ts index adbfacc..be9b7c5 100644 --- a/examples/model/run.ts +++ b/examples/model/run.ts @@ -1,4 +1,4 @@ -import { CPU, Sequential, setupBackend, tensor2D } from "../../mod.ts"; +import { CPU, Sequential, setupBackend, tensor2D } from "../../packages/core/mod.ts"; /** * Setup the CPU backend. This backend is fast but doesn't work on the Edge. diff --git a/examples/model/train.ts b/examples/model/train.ts index 49350c6..76f7a72 100644 --- a/examples/model/train.ts +++ b/examples/model/train.ts @@ -6,7 +6,7 @@ import { setupBackend, SigmoidLayer, tensor2D, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; /** * Setup the CPU backend. This backend is fast but doesn't work on the Edge. diff --git a/examples/multiple-linear/student.ts b/examples/multiple-linear/student.ts index 12e88b7..bc6eb13 100644 --- a/examples/multiple-linear/student.ts +++ b/examples/multiple-linear/student.ts @@ -6,14 +6,13 @@ import { OneCycle, Sequential, setupBackend, - tensor1D, tensor2D, -} from "../../mod.ts"; +} from "../../packages/core/mod.ts"; -import { parse } from "https://deno.land/std@0.214.0/csv/parse.ts"; +import { parse } from "jsr:@std/csv@1.0.3/parse"; // Import helpers for splitting dataset -import { useSplit } from "https://deno.land/x/vectorizer@v0.2.1/mod.ts"; +import { useSplit } from "../../packages/utilities/mod.ts"; // Read the training dataset const _data = Deno.readTextFileSync("examples/multiple-linear/student.csv"); @@ -36,7 +35,7 @@ const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) as [ // Setup the CPU backend for Netsaur await setupBackend(CPU); -console.log(train) +console.log(train); // Create a sequential neural network const net = new Sequential({ diff --git a/examples/tokenizers/basic.ts b/examples/tokenizers/basic.ts index 916daad..747f5ae 100644 --- a/examples/tokenizers/basic.ts +++ b/examples/tokenizers/basic.ts @@ -1,4 +1,4 @@ -import { init, Tokenizer } from "../../tokenizers/mod.ts"; +import { init, Tokenizer } from "../../packages/tokenizers/mod.ts"; await init(); diff --git a/examples/xor_auto.ts b/examples/xor_auto.ts index 973ca6b..cc072ba 100644 --- a/examples/xor_auto.ts +++ b/examples/xor_auto.ts @@ -10,7 +10,7 @@ import { setupBackend, SigmoidLayer, tensor2D, -} from "../mod.ts"; +} from "../packages/core/mod.ts"; /** * Setup the AUTO backend. This backend is chosen automatically based on the environment. diff --git a/examples/xor_cpu.ts b/examples/xor_cpu.ts index 214b49d..0081393 100644 --- a/examples/xor_cpu.ts +++ b/examples/xor_cpu.ts @@ -10,7 +10,7 @@ import { setupBackend, SigmoidLayer, tensor2D, -} from "../mod.ts"; +} from "../packages/core/mod.ts"; /** * Setup the CPU backend. This backend is fast but doesn't work on the Edge. diff --git a/examples/xor_gpu.ts b/examples/xor_gpu.ts index 81c6ee5..6bdc150 100644 --- a/examples/xor_gpu.ts +++ b/examples/xor_gpu.ts @@ -10,7 +10,7 @@ import { setupBackend, SigmoidLayer, tensor2D, -} from "../mod.ts"; +} from "../packages/core/mod.ts"; /** * Setup the GPU backend. This backend is fast but doesn't work on the Edge. diff --git a/examples/xor_option.ts b/examples/xor_option.ts index caae41c..e838669 100644 --- a/examples/xor_option.ts +++ b/examples/xor_option.ts @@ -12,7 +12,7 @@ import { SigmoidLayer, tensor2D, WASM, -} from "../mod.ts"; +} from "../packages/core/mod.ts"; /** * Setup the backend with the OPTION function. The backend is chosen automatically based on the environment. diff --git a/examples/xor_wasm.ts b/examples/xor_wasm.ts index 6bd87a9..b1d61df 100644 --- a/examples/xor_wasm.ts +++ b/examples/xor_wasm.ts @@ -10,7 +10,7 @@ import { SigmoidLayer, tensor2D, WASM, -} from "../mod.ts"; +} from "../packages/core/mod.ts"; /** * Setup the WASM backend. This backend is slower than the CPU backend but works on the Edge. diff --git a/mod.ts b/mod.ts index fad38c3..5b77cd7 100644 --- a/mod.ts +++ b/mod.ts @@ -1,17 +1,17 @@ -export { setupBackend } from "./src/core/engine.ts"; -export * from "./src/core/mod.ts"; -export * from "./src/core/types.ts"; -export * from "./src/core/tensor/tensor.ts"; -export * from "./src/core/api/layers.ts"; -export * from "./src/core/api/shape.ts"; -export * from "./src/core/api/network.ts"; -export * from "./src/core/api/optimizer.ts"; -export * from "./src/core/api/scheduler.ts"; -export { GPU } from "./src/backends/gpu/mod.ts"; +export { setupBackend } from "./packages/core/core/engine.ts"; +export * from "./packages/core/core/mod.ts"; +export * from "./packages/core/core/types.ts"; +export * from "./packages/core/core/tensor/tensor.ts"; +export * from "./packages/core/core/api/layers.ts"; +export * from "./packages/core/core/api/shape.ts"; +export * from "./packages/core/core/api/network.ts"; +export * from "./packages/core/core/api/optimizer.ts"; +export * from "./packages/core/core/api/scheduler.ts"; +export { GPU } from "./packages/core/backends/gpu/mod.ts"; -import { CPU, type CPUBackendLoader } from "./src/backends/cpu/mod.ts"; -import { WASM, type WASMBackendLoader } from "./src/backends/wasm/mod.ts"; -import type { BackendLoader } from "./src/core/engine.ts"; +import { CPU, type CPUBackendLoader } from "./packages/core/backends/cpu/mod.ts"; +import { WASM, type WASMBackendLoader } from "./packages/core/backends/wasm/mod.ts"; +import type { BackendLoader } from "./packages/core/core/engine.ts"; onerror = () => { if (typeof Deno == "undefined") { diff --git a/src/.DS_Store b/packages/core/.DS_Store similarity index 100% rename from src/.DS_Store rename to packages/core/.DS_Store diff --git a/packages/core/mod.ts b/packages/core/mod.ts new file mode 100644 index 0000000..9b52ef0 --- /dev/null +++ b/packages/core/mod.ts @@ -0,0 +1,10 @@ +export { setupBackend } from "./src/core/engine.ts"; +export * from "./src/core/mod.ts"; +export * from "./src/core/types.ts"; +export * from "./src/core/tensor/tensor.ts"; +export * from "./src/core/api/layers.ts"; +export * from "./src/core/api/shape.ts"; +export * from "./src/core/api/network.ts"; +export * from "./src/core/api/optimizer.ts"; +export * from "./src/core/api/scheduler.ts"; +export * from "./src/backend_loader.ts" \ No newline at end of file diff --git a/packages/core/src/backend_loader.ts b/packages/core/src/backend_loader.ts new file mode 100644 index 0000000..4ab5c3e --- /dev/null +++ b/packages/core/src/backend_loader.ts @@ -0,0 +1,33 @@ +export { GPU } from "./backends/gpu/mod.ts"; + +import { CPU, type CPUBackendLoader } from "./backends/cpu/mod.ts"; +import { WASM, type WASMBackendLoader } from "./backends/wasm/mod.ts"; +import type { BackendLoader } from "./core/engine.ts"; + +onerror = () => { + if (typeof Deno == "undefined") { + throw new Error( + "Warning: Deno is not defined. Did you mean to import from ./web.ts instead of ./mod.ts?", + ); + } +}; + +/** + * The AUTO backend is chosen automatically based on the environment. + */ +const AUTO: WASMBackendLoader | CPUBackendLoader = Deno.dlopen === undefined + ? WASM + : CPU; + +/** + * The OPTION function is used to choose a backend from a list of options. + */ +export function OPTION(...backends: BackendLoader[]): BackendLoader { + for (const backend of backends) { + if (backend.isSupported()) { + return backend; + } + } + throw new Error("No provided backend is supported"); +} +export { AUTO, CPU, WASM }; diff --git a/src/backends/.DS_Store b/packages/core/src/backends/.DS_Store similarity index 100% rename from src/backends/.DS_Store rename to packages/core/src/backends/.DS_Store diff --git a/src/backends/cpu/backend.ts b/packages/core/src/backends/cpu/backend.ts similarity index 100% rename from src/backends/cpu/backend.ts rename to packages/core/src/backends/cpu/backend.ts diff --git a/src/backends/cpu/mod.ts b/packages/core/src/backends/cpu/mod.ts similarity index 97% rename from src/backends/cpu/mod.ts rename to packages/core/src/backends/cpu/mod.ts index 11bd85a..e640f38 100644 --- a/src/backends/cpu/mod.ts +++ b/packages/core/src/backends/cpu/mod.ts @@ -1,4 +1,4 @@ -import { dlopen, type FetchOptions } from "../../../deps.ts"; +import { dlopen, type FetchOptions } from "../../../../../deps.ts"; import { CPUBackend } from "./backend.ts"; import { NoBackendError } from "../../core/api/error.ts"; import { type BackendLoader, Engine } from "../../core/engine.ts"; diff --git a/src/backends/cpu/util.ts b/packages/core/src/backends/cpu/util.ts similarity index 100% rename from src/backends/cpu/util.ts rename to packages/core/src/backends/cpu/util.ts diff --git a/src/backends/gpu/backend.ts b/packages/core/src/backends/gpu/backend.ts similarity index 100% rename from src/backends/gpu/backend.ts rename to packages/core/src/backends/gpu/backend.ts diff --git a/src/backends/gpu/mod.ts b/packages/core/src/backends/gpu/mod.ts similarity index 97% rename from src/backends/gpu/mod.ts rename to packages/core/src/backends/gpu/mod.ts index ddb1c08..a98b4f3 100644 --- a/src/backends/gpu/mod.ts +++ b/packages/core/src/backends/gpu/mod.ts @@ -1,4 +1,4 @@ -import { dlopen, type FetchOptions } from "../../../deps.ts"; +import { dlopen, type FetchOptions } from "../../../../../deps.ts"; import { GPUBackend } from "./backend.ts"; import { NoBackendError } from "../../core/api/error.ts"; import { type BackendLoader, Engine } from "../../core/engine.ts"; diff --git a/src/backends/gpu/util.ts b/packages/core/src/backends/gpu/util.ts similarity index 100% rename from src/backends/gpu/util.ts rename to packages/core/src/backends/gpu/util.ts diff --git a/src/backends/wasm/backend.ts b/packages/core/src/backends/wasm/backend.ts similarity index 100% rename from src/backends/wasm/backend.ts rename to packages/core/src/backends/wasm/backend.ts diff --git a/src/backends/wasm/lib/netsaur.generated.d.ts b/packages/core/src/backends/wasm/lib/netsaur.generated.d.ts similarity index 100% rename from src/backends/wasm/lib/netsaur.generated.d.ts rename to packages/core/src/backends/wasm/lib/netsaur.generated.d.ts diff --git a/src/backends/wasm/lib/netsaur.generated.js b/packages/core/src/backends/wasm/lib/netsaur.generated.js similarity index 99% rename from src/backends/wasm/lib/netsaur.generated.js rename to packages/core/src/backends/wasm/lib/netsaur.generated.js index de1703a..05940c6 100644 --- a/src/backends/wasm/lib/netsaur.generated.js +++ b/packages/core/src/backends/wasm/lib/netsaur.generated.js @@ -4,7 +4,7 @@ // deno-fmt-ignore-file /// -// source-hash: af6cb6fb1e5d51212d81be60c88a3b2eb20dfbc5 +// source-hash: 24bcfd72e3631f0f05524ea722a11a2b75b29185 let wasm; let cachedInt32Memory0; @@ -12,12 +12,21 @@ const heap = new Array(128).fill(undefined); heap.push(undefined, null, true, false); +let heap_next = heap.length; + +function addHeapObject(obj) { + if (heap_next === heap.length) heap.push(heap.length + 1); + const idx = heap_next; + heap_next = heap[idx]; + + heap[idx] = obj; + return idx; +} + function getObject(idx) { return heap[idx]; } -let heap_next = heap.length; - function dropObject(idx) { if (idx < 132) return; heap[idx] = heap_next; @@ -30,15 +39,6 @@ function takeObject(idx) { return ret; } -function addHeapObject(obj) { - if (heap_next === heap.length) heap.push(heap.length + 1); - const idx = heap_next; - heap_next = heap[idx]; - - heap[idx] = obj; - return idx; -} - const cachedTextDecoder = typeof TextDecoder !== "undefined" ? new TextDecoder("utf-8", { ignoreBOM: true, fatal: true }) : { @@ -214,16 +214,16 @@ function handleError(f, args) { const imports = { __wbindgen_placeholder__: { - __wbindgen_object_drop_ref: function (arg0) { - takeObject(arg0); - }, - __wbg_log_67e9366454666686: function (arg0, arg1) { + __wbg_log_023d7669e382bddf: function (arg0, arg1) { console.log(getStringFromWasm0(arg0, arg1)); }, __wbindgen_number_new: function (arg0) { const ret = arg0; return addHeapObject(ret); }, + __wbindgen_object_drop_ref: function (arg0) { + takeObject(arg0); + }, __wbg_crypto_c48a774b022d20ac: function (arg0) { const ret = getObject(arg0).crypto; return addHeapObject(ret); @@ -249,10 +249,6 @@ const imports = { const ret = typeof (getObject(arg0)) === "string"; return ret; }, - __wbg_msCrypto_bcb970640f50a1e8: function (arg0) { - const ret = getObject(arg0).msCrypto; - return addHeapObject(ret); - }, __wbg_require_8f08ceecec0f4fee: function () { return handleError(function () { const ret = module.require; @@ -267,6 +263,10 @@ const imports = { const ret = getStringFromWasm0(arg0, arg1); return addHeapObject(ret); }, + __wbg_msCrypto_bcb970640f50a1e8: function (arg0) { + const ret = getObject(arg0).msCrypto; + return addHeapObject(ret); + }, __wbg_randomFillSync_dc1e9a60c158336d: function () { return handleError(function (arg0, arg1) { getObject(arg0).randomFillSync(takeObject(arg1)); diff --git a/packages/core/src/backends/wasm/lib/netsaur_bg.wasm b/packages/core/src/backends/wasm/lib/netsaur_bg.wasm new file mode 100644 index 0000000..46420ab Binary files /dev/null and b/packages/core/src/backends/wasm/lib/netsaur_bg.wasm differ diff --git a/src/backends/wasm/mod.ts b/packages/core/src/backends/wasm/mod.ts similarity index 100% rename from src/backends/wasm/mod.ts rename to packages/core/src/backends/wasm/mod.ts diff --git a/src/backends/wasm/utils.ts b/packages/core/src/backends/wasm/utils.ts similarity index 100% rename from src/backends/wasm/utils.ts rename to packages/core/src/backends/wasm/utils.ts diff --git a/src/core/api/error.ts b/packages/core/src/core/api/error.ts similarity index 100% rename from src/core/api/error.ts rename to packages/core/src/core/api/error.ts diff --git a/src/core/api/layer.ts b/packages/core/src/core/api/layer.ts similarity index 100% rename from src/core/api/layer.ts rename to packages/core/src/core/api/layer.ts diff --git a/src/core/api/layers.ts b/packages/core/src/core/api/layers.ts similarity index 100% rename from src/core/api/layers.ts rename to packages/core/src/core/api/layers.ts diff --git a/src/core/api/network.ts b/packages/core/src/core/api/network.ts similarity index 100% rename from src/core/api/network.ts rename to packages/core/src/core/api/network.ts diff --git a/src/core/api/optimizer.ts b/packages/core/src/core/api/optimizer.ts similarity index 93% rename from src/core/api/optimizer.ts rename to packages/core/src/core/api/optimizer.ts index 739cfc9..c8ef7b1 100644 --- a/src/core/api/optimizer.ts +++ b/packages/core/src/core/api/optimizer.ts @@ -35,7 +35,9 @@ export function NadamOptimizer(config: AdamOptimizerConfig = {}): Optimizer { return { type: OptimizerType.Nadam, config }; } -export function RMSPropOptimizer(config: RMSPropOptimizerConfig = {}): Optimizer { +export function RMSPropOptimizer( + config: RMSPropOptimizerConfig = {}, +): Optimizer { config.decayRate = config.decayRate || 0.9; config.epsilon = config.epsilon || 1e-8; return { type: OptimizerType.RMSProp, config }; diff --git a/src/core/api/scheduler.ts b/packages/core/src/core/api/scheduler.ts similarity index 100% rename from src/core/api/scheduler.ts rename to packages/core/src/core/api/scheduler.ts diff --git a/src/core/api/shape.ts b/packages/core/src/core/api/shape.ts similarity index 100% rename from src/core/api/shape.ts rename to packages/core/src/core/api/shape.ts diff --git a/src/core/engine.ts b/packages/core/src/core/engine.ts similarity index 100% rename from src/core/engine.ts rename to packages/core/src/core/engine.ts diff --git a/src/core/mod.ts b/packages/core/src/core/mod.ts similarity index 100% rename from src/core/mod.ts rename to packages/core/src/core/mod.ts diff --git a/src/core/tensor/tensor.ts b/packages/core/src/core/tensor/tensor.ts similarity index 100% rename from src/core/tensor/tensor.ts rename to packages/core/src/core/tensor/tensor.ts diff --git a/src/core/tensor/util.ts b/packages/core/src/core/tensor/util.ts similarity index 100% rename from src/core/tensor/util.ts rename to packages/core/src/core/tensor/util.ts diff --git a/src/core/types.ts b/packages/core/src/core/types.ts similarity index 98% rename from src/core/types.ts rename to packages/core/src/core/types.ts index 607f34e..7901fd9 100644 --- a/src/core/types.ts +++ b/packages/core/src/core/types.ts @@ -17,7 +17,7 @@ export interface Backend { datasets: DataSet[], epochs: number, batches: number, - rate: number + rate: number, ): void; /** @@ -28,7 +28,7 @@ export interface Backend { predict( input: Tensor, layers?: number[], - outputShape?: Shape + outputShape?: Shape, ): Promise>; /** @@ -161,13 +161,13 @@ export enum Cost { MAE = "mae", /** - * Huber is a cost function for regression and is less sensitive to outliers than the + * Huber is a cost function for regression and is less sensitive to outliers than the * squared error loss */ Huber = "huber", /** - * Smoothed hinge is a variant of the Huber cost function used for binary classification. + * Smoothed hinge is a variant of the Huber cost function used for binary classification. * It is a smoothed version of hinge and is more robust to outliers. */ SmoothHinge = "smoothhinge", diff --git a/src/plugins/mod.ts b/packages/core/src/plugins/mod.ts similarity index 80% rename from src/plugins/mod.ts rename to packages/core/src/plugins/mod.ts index b094f55..9f9e8ff 100644 --- a/src/plugins/mod.ts +++ b/packages/core/src/plugins/mod.ts @@ -1,5 +1,5 @@ import type { Plugin } from "./types.ts"; -import type { NeuralNetwork } from "../../mod.ts"; +import type { NeuralNetwork } from "../../../../mod.ts"; /** * Load a plugin into a NeuralNetwork instance. diff --git a/src/plugins/types.ts b/packages/core/src/plugins/types.ts similarity index 100% rename from src/plugins/types.ts rename to packages/core/src/plugins/types.ts diff --git a/data/data.ts b/packages/data/data.ts similarity index 91% rename from data/data.ts rename to packages/data/data.ts index 0cfb8d3..3db7bd7 100644 --- a/data/data.ts +++ b/packages/data/data.ts @@ -1,4 +1,4 @@ -import type { Rank, Tensor } from "../mod.ts"; +import type { Rank, Tensor } from "../core/mod.ts"; import { type CsvLoaderConfig, loadCsv } from "./datasets/csv.ts"; import type { DataLike } from "./types.ts"; diff --git a/data/datasets/csv.ts b/packages/data/datasets/csv.ts similarity index 97% rename from data/datasets/csv.ts rename to packages/data/datasets/csv.ts index ded3a5b..869b2a0 100644 --- a/data/datasets/csv.ts +++ b/packages/data/datasets/csv.ts @@ -1,4 +1,4 @@ -import { tensor2D } from "../../mod.ts"; +import { tensor2D } from "../../core/mod.ts"; import type { DataLike } from "../types.ts"; import { CsvParseStream } from "../deps.ts"; diff --git a/data/datasets/text.ts b/packages/data/datasets/text.ts similarity index 100% rename from data/datasets/text.ts rename to packages/data/datasets/text.ts diff --git a/packages/data/deps.ts b/packages/data/deps.ts new file mode 100644 index 0000000..3532b0b --- /dev/null +++ b/packages/data/deps.ts @@ -0,0 +1 @@ +export { CsvParseStream } from "jsr:@std/csv@1.0.3"; diff --git a/data/mod.ts b/packages/data/mod.ts similarity index 100% rename from data/mod.ts rename to packages/data/mod.ts diff --git a/data/types.ts b/packages/data/types.ts similarity index 85% rename from data/types.ts rename to packages/data/types.ts index 52a951c..ee2f076 100644 --- a/data/types.ts +++ b/packages/data/types.ts @@ -1,4 +1,4 @@ -import type { Rank, Tensor } from "../mod.ts"; +import type { Rank, Tensor } from "../core/mod.ts"; export interface DataLike { /** diff --git a/tokenizers/lib/netsaur_tokenizers.generated.d.ts b/packages/tokenizers/lib/netsaur_tokenizers.generated.d.ts similarity index 100% rename from tokenizers/lib/netsaur_tokenizers.generated.d.ts rename to packages/tokenizers/lib/netsaur_tokenizers.generated.d.ts diff --git a/tokenizers/lib/netsaur_tokenizers.generated.js b/packages/tokenizers/lib/netsaur_tokenizers.generated.js similarity index 99% rename from tokenizers/lib/netsaur_tokenizers.generated.js rename to packages/tokenizers/lib/netsaur_tokenizers.generated.js index 3f7591b..97e299c 100644 --- a/tokenizers/lib/netsaur_tokenizers.generated.js +++ b/packages/tokenizers/lib/netsaur_tokenizers.generated.js @@ -4,7 +4,7 @@ // deno-fmt-ignore-file /// -// source-hash: 1c51c6670646b6ae9e407e9c7104be0bc13ca7ad +// source-hash: 7411764ea128cc65962626e00ed109dfa6e5ba46 let wasm; const heap = new Array(128).fill(undefined); @@ -380,23 +380,23 @@ const imports = { const ret = new Error(getStringFromWasm0(arg0, arg1)); return addHeapObject(ret); }, + __wbindgen_number_new: function (arg0) { + const ret = arg0; + return addHeapObject(ret); + }, + __wbindgen_string_new: function (arg0, arg1) { + const ret = getStringFromWasm0(arg0, arg1); + return addHeapObject(ret); + }, __wbindgen_is_object: function (arg0) { const val = getObject(arg0); const ret = typeof val === "object" && val !== null; return ret; }, - __wbindgen_number_new: function (arg0) { - const ret = arg0; - return addHeapObject(ret); - }, __wbindgen_object_clone_ref: function (arg0) { const ret = getObject(arg0); return addHeapObject(ret); }, - __wbindgen_string_new: function (arg0, arg1) { - const ret = getStringFromWasm0(arg0, arg1); - return addHeapObject(ret); - }, __wbg_set_bd72c078edfa51ad: function (arg0, arg1, arg2) { getObject(arg0)[takeObject(arg1)] = takeObject(arg2); }, @@ -416,10 +416,6 @@ const imports = { const ret = getObject(arg0).node; return addHeapObject(ret); }, - __wbg_msCrypto_bcb970640f50a1e8: function (arg0) { - const ret = getObject(arg0).msCrypto; - return addHeapObject(ret); - }, __wbg_require_8f08ceecec0f4fee: function () { return handleError(function () { const ret = module.require; @@ -430,6 +426,10 @@ const imports = { const ret = typeof (getObject(arg0)) === "function"; return ret; }, + __wbg_msCrypto_bcb970640f50a1e8: function (arg0) { + const ret = getObject(arg0).msCrypto; + return addHeapObject(ret); + }, __wbg_randomFillSync_dc1e9a60c158336d: function () { return handleError(function (arg0, arg1) { getObject(arg0).randomFillSync(takeObject(arg1)); diff --git a/packages/tokenizers/lib/netsaur_tokenizers_bg.wasm b/packages/tokenizers/lib/netsaur_tokenizers_bg.wasm new file mode 100644 index 0000000..3181915 Binary files /dev/null and b/packages/tokenizers/lib/netsaur_tokenizers_bg.wasm differ diff --git a/tokenizers/mod.ts b/packages/tokenizers/mod.ts similarity index 100% rename from tokenizers/mod.ts rename to packages/tokenizers/mod.ts diff --git a/packages/utilities/examples/encoding/categorical.ipynb b/packages/utilities/examples/encoding/categorical.ipynb new file mode 100644 index 0000000..0150430 --- /dev/null +++ b/packages/utilities/examples/encoding/categorical.ipynb @@ -0,0 +1,439 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import { CategoricalEncoder } from \"../../src/encoding/categorical.ts\";\n", + "import { parse } from \"jsr:@std/csv@1.0.3/parse\";" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[\n", + " [ \u001b[32m\"5.1\"\u001b[39m, \u001b[32m\"3.5\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.9\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.7\"\u001b[39m, \u001b[32m\"3.2\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.6\"\u001b[39m, \u001b[32m\"3.1\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"3.6\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.4\"\u001b[39m, \u001b[32m\"3.9\"\u001b[39m, \u001b[32m\"1.7\"\u001b[39m, \u001b[32m\"0.4\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.6\"\u001b[39m, \u001b[32m\"3.4\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.3\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"3.4\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.4\"\u001b[39m, \u001b[32m\"2.9\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.9\"\u001b[39m, \u001b[32m\"3.1\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.1\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.4\"\u001b[39m, \u001b[32m\"3.7\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.8\"\u001b[39m, \u001b[32m\"3.4\"\u001b[39m, \u001b[32m\"1.6\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.8\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.1\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.3\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"1.1\"\u001b[39m, \u001b[32m\"0.1\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.8\"\u001b[39m, \u001b[32m\"4.0\"\u001b[39m, \u001b[32m\"1.2\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.7\"\u001b[39m, \u001b[32m\"4.4\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.4\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.4\"\u001b[39m, \u001b[32m\"3.9\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"0.4\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.1\"\u001b[39m, \u001b[32m\"3.5\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.3\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.7\"\u001b[39m, \u001b[32m\"3.8\"\u001b[39m, \u001b[32m\"1.7\"\u001b[39m, \u001b[32m\"0.3\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.1\"\u001b[39m, \u001b[32m\"3.8\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.3\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.4\"\u001b[39m, \u001b[32m\"3.4\"\u001b[39m, \u001b[32m\"1.7\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.1\"\u001b[39m, \u001b[32m\"3.7\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.4\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.6\"\u001b[39m, \u001b[32m\"3.6\"\u001b[39m, \u001b[32m\"1.0\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.1\"\u001b[39m, \u001b[32m\"3.3\"\u001b[39m, \u001b[32m\"1.7\"\u001b[39m, \u001b[32m\"0.5\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.8\"\u001b[39m, \u001b[32m\"3.4\"\u001b[39m, \u001b[32m\"1.9\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"1.6\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"3.4\"\u001b[39m, \u001b[32m\"1.6\"\u001b[39m, \u001b[32m\"0.4\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.2\"\u001b[39m, \u001b[32m\"3.5\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.2\"\u001b[39m, \u001b[32m\"3.4\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.7\"\u001b[39m, \u001b[32m\"3.2\"\u001b[39m, \u001b[32m\"1.6\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.8\"\u001b[39m, \u001b[32m\"3.1\"\u001b[39m, \u001b[32m\"1.6\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.4\"\u001b[39m, \u001b[32m\"3.4\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.4\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.2\"\u001b[39m, \u001b[32m\"4.1\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.1\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.5\"\u001b[39m, \u001b[32m\"4.2\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.9\"\u001b[39m, \u001b[32m\"3.1\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.1\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"3.2\"\u001b[39m, \u001b[32m\"1.2\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.5\"\u001b[39m, \u001b[32m\"3.5\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.9\"\u001b[39m, \u001b[32m\"3.1\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.1\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.4\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.1\"\u001b[39m, \u001b[32m\"3.4\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"3.5\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"0.3\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.5\"\u001b[39m, \u001b[32m\"2.3\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"0.3\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.4\"\u001b[39m, \u001b[32m\"3.2\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"3.5\"\u001b[39m, \u001b[32m\"1.6\"\u001b[39m, \u001b[32m\"0.6\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.1\"\u001b[39m, \u001b[32m\"3.8\"\u001b[39m, \u001b[32m\"1.9\"\u001b[39m, \u001b[32m\"0.4\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.8\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.3\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.1\"\u001b[39m, \u001b[32m\"3.8\"\u001b[39m, \u001b[32m\"1.6\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"4.6\"\u001b[39m, \u001b[32m\"3.2\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.3\"\u001b[39m, \u001b[32m\"3.7\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"3.3\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"0.2\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m ],\n", + " [ \u001b[32m\"7.0\"\u001b[39m, \u001b[32m\"3.2\"\u001b[39m, \u001b[32m\"4.7\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.4\"\u001b[39m, \u001b[32m\"3.2\"\u001b[39m, \u001b[32m\"4.5\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.9\"\u001b[39m, \u001b[32m\"3.1\"\u001b[39m, \u001b[32m\"4.9\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.5\"\u001b[39m, \u001b[32m\"2.3\"\u001b[39m, \u001b[32m\"4.0\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.5\"\u001b[39m, \u001b[32m\"2.8\"\u001b[39m, \u001b[32m\"4.6\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.7\"\u001b[39m, \u001b[32m\"2.8\"\u001b[39m, \u001b[32m\"4.5\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.3\"\u001b[39m, \u001b[32m\"3.3\"\u001b[39m, \u001b[32m\"4.7\"\u001b[39m, \u001b[32m\"1.6\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"4.9\"\u001b[39m, \u001b[32m\"2.4\"\u001b[39m, \u001b[32m\"3.3\"\u001b[39m, \u001b[32m\"1.0\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.6\"\u001b[39m, \u001b[32m\"2.9\"\u001b[39m, \u001b[32m\"4.6\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.2\"\u001b[39m, \u001b[32m\"2.7\"\u001b[39m, \u001b[32m\"3.9\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"2.0\"\u001b[39m, \u001b[32m\"3.5\"\u001b[39m, \u001b[32m\"1.0\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.9\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"4.2\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.0\"\u001b[39m, \u001b[32m\"2.2\"\u001b[39m, \u001b[32m\"4.0\"\u001b[39m, \u001b[32m\"1.0\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.1\"\u001b[39m, \u001b[32m\"2.9\"\u001b[39m, \u001b[32m\"4.7\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.6\"\u001b[39m, \u001b[32m\"2.9\"\u001b[39m, \u001b[32m\"3.6\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.7\"\u001b[39m, \u001b[32m\"3.1\"\u001b[39m, \u001b[32m\"4.4\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.6\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"4.5\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.8\"\u001b[39m, \u001b[32m\"2.7\"\u001b[39m, \u001b[32m\"4.1\"\u001b[39m, \u001b[32m\"1.0\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.2\"\u001b[39m, \u001b[32m\"2.2\"\u001b[39m, \u001b[32m\"4.5\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.6\"\u001b[39m, \u001b[32m\"2.5\"\u001b[39m, \u001b[32m\"3.9\"\u001b[39m, \u001b[32m\"1.1\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.9\"\u001b[39m, \u001b[32m\"3.2\"\u001b[39m, \u001b[32m\"4.8\"\u001b[39m, \u001b[32m\"1.8\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.1\"\u001b[39m, \u001b[32m\"2.8\"\u001b[39m, \u001b[32m\"4.0\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.3\"\u001b[39m, \u001b[32m\"2.5\"\u001b[39m, \u001b[32m\"4.9\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.1\"\u001b[39m, \u001b[32m\"2.8\"\u001b[39m, \u001b[32m\"4.7\"\u001b[39m, \u001b[32m\"1.2\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.4\"\u001b[39m, \u001b[32m\"2.9\"\u001b[39m, \u001b[32m\"4.3\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.6\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"4.4\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.8\"\u001b[39m, \u001b[32m\"2.8\"\u001b[39m, \u001b[32m\"4.8\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.7\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"1.7\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.0\"\u001b[39m, \u001b[32m\"2.9\"\u001b[39m, \u001b[32m\"4.5\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.7\"\u001b[39m, \u001b[32m\"2.6\"\u001b[39m, \u001b[32m\"3.5\"\u001b[39m, \u001b[32m\"1.0\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.5\"\u001b[39m, \u001b[32m\"2.4\"\u001b[39m, \u001b[32m\"3.8\"\u001b[39m, \u001b[32m\"1.1\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.5\"\u001b[39m, \u001b[32m\"2.4\"\u001b[39m, \u001b[32m\"3.7\"\u001b[39m, \u001b[32m\"1.0\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.8\"\u001b[39m, \u001b[32m\"2.7\"\u001b[39m, \u001b[32m\"3.9\"\u001b[39m, \u001b[32m\"1.2\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.0\"\u001b[39m, \u001b[32m\"2.7\"\u001b[39m, \u001b[32m\"5.1\"\u001b[39m, \u001b[32m\"1.6\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.4\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"4.5\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.0\"\u001b[39m, \u001b[32m\"3.4\"\u001b[39m, \u001b[32m\"4.5\"\u001b[39m, \u001b[32m\"1.6\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.7\"\u001b[39m, \u001b[32m\"3.1\"\u001b[39m, \u001b[32m\"4.7\"\u001b[39m, \u001b[32m\"1.5\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.3\"\u001b[39m, \u001b[32m\"2.3\"\u001b[39m, \u001b[32m\"4.4\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.6\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"4.1\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.5\"\u001b[39m, \u001b[32m\"2.5\"\u001b[39m, \u001b[32m\"4.0\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.5\"\u001b[39m, \u001b[32m\"2.6\"\u001b[39m, \u001b[32m\"4.4\"\u001b[39m, \u001b[32m\"1.2\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.1\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"4.6\"\u001b[39m, \u001b[32m\"1.4\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.8\"\u001b[39m, \u001b[32m\"2.6\"\u001b[39m, \u001b[32m\"4.0\"\u001b[39m, \u001b[32m\"1.2\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.0\"\u001b[39m, \u001b[32m\"2.3\"\u001b[39m, \u001b[32m\"3.3\"\u001b[39m, \u001b[32m\"1.0\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.6\"\u001b[39m, \u001b[32m\"2.7\"\u001b[39m, \u001b[32m\"4.2\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.7\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"4.2\"\u001b[39m, \u001b[32m\"1.2\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.7\"\u001b[39m, \u001b[32m\"2.9\"\u001b[39m, \u001b[32m\"4.2\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"6.2\"\u001b[39m, \u001b[32m\"2.9\"\u001b[39m, \u001b[32m\"4.3\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.1\"\u001b[39m, \u001b[32m\"2.5\"\u001b[39m, \u001b[32m\"3.0\"\u001b[39m, \u001b[32m\"1.1\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " [ \u001b[32m\"5.7\"\u001b[39m, \u001b[32m\"2.8\"\u001b[39m, \u001b[32m\"4.1\"\u001b[39m, \u001b[32m\"1.3\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m ],\n", + " ... 50 more items\n", + "]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "// Read the training dataset\n", + "const _data = Deno.readTextFileSync(\"iris.csv\");\n", + "const data = parse(_data);\n", + "data;" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "const cat = new CategoricalEncoder();" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " ... 50 more items\n", + "]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "const targets = data.map((x) => x[4]);\n", + "targets;" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "const res = cat.fit(targets).transform(targets, \"f32\");" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
idx012
0100
1100
2100
3100
4100
5100
6100
7100
8100
9100
10100
11100
12100
13100
14100
15100
16100
17100
18100
19100
20100
21100
22100
23100
24100
25100
26100
27100
28100
29100
30100
31100
32100
33100
34100
35100
36100
37100
38100
39100
40100
41100
42100
43100
44100
45100
46100
47100
48100
49100
50010
51010
52010
53010
54010
55010
56010
57010
58010
59010
60010
61010
62010
63010
64010
65010
66010
67010
68010
69010
70010
71010
72010
73010
74010
75010
76010
77010
78010
79010
80010
81010
82010
83010
84010
85010
86010
87010
88010
89010
90010
91010
92010
93010
94010
95010
96010
97010
98010
99010
100001
101001
102001
103001
104001
105001
106001
107001
108001
109001
110001
111001
112001
113001
114001
115001
116001
117001
118001
119001
120001
121001
122001
123001
124001
125001
126001
127001
128001
129001
130001
131001
132001
133001
134001
135001
136001
137001
138001
139001
140001
141001
142001
143001
144001
145001
146001
147001
148001
149001
" + ], + "text/plain": [ + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "1\t0\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t1\t0\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n", + "0\t0\t1\n" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res;" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m, \u001b[32m\"setosa\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m, \u001b[32m\"versicolor\"\u001b[39m,\n", + " ... 50 more items\n", + "]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat.untransform(res);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Deno", + "language": "typescript", + "name": "deno" + }, + "language_info": { + "file_extension": ".ts", + "mimetype": "text/x.typescript", + "name": "typescript", + "nb_converter": "script", + "pygments_lexer": "typescript", + "version": "5.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/packages/utilities/examples/encoding/iris.csv b/packages/utilities/examples/encoding/iris.csv new file mode 100644 index 0000000..876131d --- /dev/null +++ b/packages/utilities/examples/encoding/iris.csv @@ -0,0 +1,151 @@ +5.1,3.5,1.4,0.2,setosa +4.9,3.0,1.4,0.2,setosa +4.7,3.2,1.3,0.2,setosa +4.6,3.1,1.5,0.2,setosa +5.0,3.6,1.4,0.2,setosa +5.4,3.9,1.7,0.4,setosa +4.6,3.4,1.4,0.3,setosa +5.0,3.4,1.5,0.2,setosa +4.4,2.9,1.4,0.2,setosa +4.9,3.1,1.5,0.1,setosa +5.4,3.7,1.5,0.2,setosa +4.8,3.4,1.6,0.2,setosa +4.8,3.0,1.4,0.1,setosa +4.3,3.0,1.1,0.1,setosa +5.8,4.0,1.2,0.2,setosa +5.7,4.4,1.5,0.4,setosa +5.4,3.9,1.3,0.4,setosa +5.1,3.5,1.4,0.3,setosa +5.7,3.8,1.7,0.3,setosa +5.1,3.8,1.5,0.3,setosa +5.4,3.4,1.7,0.2,setosa +5.1,3.7,1.5,0.4,setosa +4.6,3.6,1.0,0.2,setosa +5.1,3.3,1.7,0.5,setosa +4.8,3.4,1.9,0.2,setosa +5.0,3.0,1.6,0.2,setosa +5.0,3.4,1.6,0.4,setosa +5.2,3.5,1.5,0.2,setosa +5.2,3.4,1.4,0.2,setosa +4.7,3.2,1.6,0.2,setosa +4.8,3.1,1.6,0.2,setosa +5.4,3.4,1.5,0.4,setosa +5.2,4.1,1.5,0.1,setosa +5.5,4.2,1.4,0.2,setosa +4.9,3.1,1.5,0.1,setosa +5.0,3.2,1.2,0.2,setosa +5.5,3.5,1.3,0.2,setosa +4.9,3.1,1.5,0.1,setosa +4.4,3.0,1.3,0.2,setosa +5.1,3.4,1.5,0.2,setosa +5.0,3.5,1.3,0.3,setosa +4.5,2.3,1.3,0.3,setosa +4.4,3.2,1.3,0.2,setosa +5.0,3.5,1.6,0.6,setosa +5.1,3.8,1.9,0.4,setosa +4.8,3.0,1.4,0.3,setosa +5.1,3.8,1.6,0.2,setosa +4.6,3.2,1.4,0.2,setosa +5.3,3.7,1.5,0.2,setosa +5.0,3.3,1.4,0.2,setosa +7.0,3.2,4.7,1.4,versicolor +6.4,3.2,4.5,1.5,versicolor +6.9,3.1,4.9,1.5,versicolor +5.5,2.3,4.0,1.3,versicolor +6.5,2.8,4.6,1.5,versicolor +5.7,2.8,4.5,1.3,versicolor +6.3,3.3,4.7,1.6,versicolor +4.9,2.4,3.3,1.0,versicolor +6.6,2.9,4.6,1.3,versicolor +5.2,2.7,3.9,1.4,versicolor +5.0,2.0,3.5,1.0,versicolor +5.9,3.0,4.2,1.5,versicolor +6.0,2.2,4.0,1.0,versicolor +6.1,2.9,4.7,1.4,versicolor +5.6,2.9,3.6,1.3,versicolor +6.7,3.1,4.4,1.4,versicolor +5.6,3.0,4.5,1.5,versicolor +5.8,2.7,4.1,1.0,versicolor +6.2,2.2,4.5,1.5,versicolor +5.6,2.5,3.9,1.1,versicolor +5.9,3.2,4.8,1.8,versicolor +6.1,2.8,4.0,1.3,versicolor +6.3,2.5,4.9,1.5,versicolor +6.1,2.8,4.7,1.2,versicolor +6.4,2.9,4.3,1.3,versicolor +6.6,3.0,4.4,1.4,versicolor +6.8,2.8,4.8,1.4,versicolor +6.7,3.0,5.0,1.7,versicolor +6.0,2.9,4.5,1.5,versicolor +5.7,2.6,3.5,1.0,versicolor +5.5,2.4,3.8,1.1,versicolor +5.5,2.4,3.7,1.0,versicolor +5.8,2.7,3.9,1.2,versicolor +6.0,2.7,5.1,1.6,versicolor +5.4,3.0,4.5,1.5,versicolor +6.0,3.4,4.5,1.6,versicolor +6.7,3.1,4.7,1.5,versicolor +6.3,2.3,4.4,1.3,versicolor +5.6,3.0,4.1,1.3,versicolor +5.5,2.5,4.0,1.3,versicolor +5.5,2.6,4.4,1.2,versicolor +6.1,3.0,4.6,1.4,versicolor +5.8,2.6,4.0,1.2,versicolor +5.0,2.3,3.3,1.0,versicolor +5.6,2.7,4.2,1.3,versicolor +5.7,3.0,4.2,1.2,versicolor +5.7,2.9,4.2,1.3,versicolor +6.2,2.9,4.3,1.3,versicolor +5.1,2.5,3.0,1.1,versicolor +5.7,2.8,4.1,1.3,versicolor +6.3,3.3,6.0,2.5,virginica +5.8,2.7,5.1,1.9,virginica +7.1,3.0,5.9,2.1,virginica +6.3,2.9,5.6,1.8,virginica +6.5,3.0,5.8,2.2,virginica +7.6,3.0,6.6,2.1,virginica +4.9,2.5,4.5,1.7,virginica +7.3,2.9,6.3,1.8,virginica +6.7,2.5,5.8,1.8,virginica +7.2,3.6,6.1,2.5,virginica +6.5,3.2,5.1,2.0,virginica +6.4,2.7,5.3,1.9,virginica +6.8,3.0,5.5,2.1,virginica +5.7,2.5,5.0,2.0,virginica +5.8,2.8,5.1,2.4,virginica +6.4,3.2,5.3,2.3,virginica +6.5,3.0,5.5,1.8,virginica +7.7,3.8,6.7,2.2,virginica +7.7,2.6,6.9,2.3,virginica +6.0,2.2,5.0,1.5,virginica +6.9,3.2,5.7,2.3,virginica +5.6,2.8,4.9,2.0,virginica +7.7,2.8,6.7,2.0,virginica +6.3,2.7,4.9,1.8,virginica +6.7,3.3,5.7,2.1,virginica +7.2,3.2,6.0,1.8,virginica +6.2,2.8,4.8,1.8,virginica +6.1,3.0,4.9,1.8,virginica +6.4,2.8,5.6,2.1,virginica +7.2,3.0,5.8,1.6,virginica +7.4,2.8,6.1,1.9,virginica +7.9,3.8,6.4,2.0,virginica +6.4,2.8,5.6,2.2,virginica +6.3,2.8,5.1,1.5,virginica +6.1,2.6,5.6,1.4,virginica +7.7,3.0,6.1,2.3,virginica +6.3,3.4,5.6,2.4,virginica +6.4,3.1,5.5,1.8,virginica +6.0,3.0,4.8,1.8,virginica +6.9,3.1,5.4,2.1,virginica +6.7,3.1,5.6,2.4,virginica +6.9,3.1,5.1,2.3,virginica +5.8,2.7,5.1,1.9,virginica +6.8,3.2,5.9,2.3,virginica +6.7,3.3,5.7,2.5,virginica +6.7,3.0,5.2,2.3,virginica +6.3,2.5,5.0,1.9,virginica +6.5,3.0,5.2,2.0,virginica +6.2,3.4,5.4,2.3,virginica +5.9,3.0,5.1,1.8,virginica + diff --git a/packages/utilities/examples/extract-colors/extract-colors.ts b/packages/utilities/examples/extract-colors/extract-colors.ts new file mode 100644 index 0000000..20a761a --- /dev/null +++ b/packages/utilities/examples/extract-colors/extract-colors.ts @@ -0,0 +1,33 @@ +import { + createCanvas, + loadImage, +} from "https://deno.land/x/canvas@v1.4.1/mod.ts"; +import { extractColors, Image } from "../../mod.ts"; +import { Color } from "https://deno.land/x/colors@v1.2.0/mod.ts"; + +const image = await loadImage("utilities/examples/extract-colors/kagu.png"); + +const canvas = createCanvas(image.width(), image.height()); + +const ctx = canvas.getContext("2d"); + +ctx.drawImage(image, 0, 0); + +const data = ctx.getImageData(0, 0, canvas.width, canvas.height); + +const img = new Image(data); + +const colors = extractColors(img, 32); + +const newCan = createCanvas(300, colors.length * 100); + +const newCtx = newCan.getContext("2d"); + +colors.forEach((color, i) => { + newCtx.fillStyle = new Color(...color).toString(); + newCtx.fillRect(0, i * 100, 300, 100); +}); +Deno.writeFile( + "utilities/examples/extract-colors/out.png", + newCan.toBuffer("image/png"), +); diff --git a/packages/utilities/examples/extract-colors/kagu.png b/packages/utilities/examples/extract-colors/kagu.png new file mode 100644 index 0000000..a5afca4 Binary files /dev/null and b/packages/utilities/examples/extract-colors/kagu.png differ diff --git a/packages/utilities/examples/extract-colors/out.png b/packages/utilities/examples/extract-colors/out.png new file mode 100644 index 0000000..34856ba Binary files /dev/null and b/packages/utilities/examples/extract-colors/out.png differ diff --git a/packages/utilities/examples/metrics/binary_iris.csv b/packages/utilities/examples/metrics/binary_iris.csv new file mode 100644 index 0000000..01be6f2 --- /dev/null +++ b/packages/utilities/examples/metrics/binary_iris.csv @@ -0,0 +1,100 @@ +5.1,3.5,1.4,.2,"Setosa" +4.9,3,1.4,.2,"Setosa" +4.7,3.2,1.3,.2,"Setosa" +4.6,3.1,1.5,.2,"Setosa" +5,3.6,1.4,.2,"Setosa" +5.4,3.9,1.7,.4,"Setosa" +4.6,3.4,1.4,.3,"Setosa" +5,3.4,1.5,.2,"Setosa" +4.4,2.9,1.4,.2,"Setosa" +4.9,3.1,1.5,.1,"Setosa" +5.4,3.7,1.5,.2,"Setosa" +4.8,3.4,1.6,.2,"Setosa" +4.8,3,1.4,.1,"Setosa" +4.3,3,1.1,.1,"Setosa" +5.8,4,1.2,.2,"Setosa" +5.7,4.4,1.5,.4,"Setosa" +5.4,3.9,1.3,.4,"Setosa" +5.1,3.5,1.4,.3,"Setosa" +5.7,3.8,1.7,.3,"Setosa" +5.1,3.8,1.5,.3,"Setosa" +5.4,3.4,1.7,.2,"Setosa" +5.1,3.7,1.5,.4,"Setosa" +4.6,3.6,1,.2,"Setosa" +5.1,3.3,1.7,.5,"Setosa" +4.8,3.4,1.9,.2,"Setosa" +5,3,1.6,.2,"Setosa" +5,3.4,1.6,.4,"Setosa" +5.2,3.5,1.5,.2,"Setosa" +5.2,3.4,1.4,.2,"Setosa" +4.7,3.2,1.6,.2,"Setosa" +4.8,3.1,1.6,.2,"Setosa" +5.4,3.4,1.5,.4,"Setosa" +5.2,4.1,1.5,.1,"Setosa" +5.5,4.2,1.4,.2,"Setosa" +4.9,3.1,1.5,.2,"Setosa" +5,3.2,1.2,.2,"Setosa" +5.5,3.5,1.3,.2,"Setosa" +4.9,3.6,1.4,.1,"Setosa" +4.4,3,1.3,.2,"Setosa" +5.1,3.4,1.5,.2,"Setosa" +5,3.5,1.3,.3,"Setosa" +4.5,2.3,1.3,.3,"Setosa" +4.4,3.2,1.3,.2,"Setosa" +5,3.5,1.6,.6,"Setosa" +5.1,3.8,1.9,.4,"Setosa" +4.8,3,1.4,.3,"Setosa" +5.1,3.8,1.6,.2,"Setosa" +4.6,3.2,1.4,.2,"Setosa" +5.3,3.7,1.5,.2,"Setosa" +5,3.3,1.4,.2,"Setosa" +7,3.2,4.7,1.4,"Versicolor" +6.4,3.2,4.5,1.5,"Versicolor" +6.9,3.1,4.9,1.5,"Versicolor" +5.5,2.3,4,1.3,"Versicolor" +6.5,2.8,4.6,1.5,"Versicolor" +5.7,2.8,4.5,1.3,"Versicolor" +6.3,3.3,4.7,1.6,"Versicolor" +4.9,2.4,3.3,1,"Versicolor" +6.6,2.9,4.6,1.3,"Versicolor" +5.2,2.7,3.9,1.4,"Versicolor" +5,2,3.5,1,"Versicolor" +5.9,3,4.2,1.5,"Versicolor" +6,2.2,4,1,"Versicolor" +6.1,2.9,4.7,1.4,"Versicolor" +5.6,2.9,3.6,1.3,"Versicolor" +6.7,3.1,4.4,1.4,"Versicolor" +5.6,3,4.5,1.5,"Versicolor" +5.8,2.7,4.1,1,"Versicolor" +6.2,2.2,4.5,1.5,"Versicolor" +5.6,2.5,3.9,1.1,"Versicolor" +5.9,3.2,4.8,1.8,"Versicolor" +6.1,2.8,4,1.3,"Versicolor" +6.3,2.5,4.9,1.5,"Versicolor" +6.1,2.8,4.7,1.2,"Versicolor" +6.4,2.9,4.3,1.3,"Versicolor" +6.6,3,4.4,1.4,"Versicolor" +6.8,2.8,4.8,1.4,"Versicolor" +6.7,3,5,1.7,"Versicolor" +6,2.9,4.5,1.5,"Versicolor" +5.7,2.6,3.5,1,"Versicolor" +5.5,2.4,3.8,1.1,"Versicolor" +5.5,2.4,3.7,1,"Versicolor" +5.8,2.7,3.9,1.2,"Versicolor" +6,2.7,5.1,1.6,"Versicolor" +5.4,3,4.5,1.5,"Versicolor" +6,3.4,4.5,1.6,"Versicolor" +6.7,3.1,4.7,1.5,"Versicolor" +6.3,2.3,4.4,1.3,"Versicolor" +5.6,3,4.1,1.3,"Versicolor" +5.5,2.5,4,1.3,"Versicolor" +5.5,2.6,4.4,1.2,"Versicolor" +6.1,3,4.6,1.4,"Versicolor" +5.8,2.6,4,1.2,"Versicolor" +5,2.3,3.3,1,"Versicolor" +5.6,2.7,4.2,1.3,"Versicolor" +5.7,3,4.2,1.2,"Versicolor" +5.7,2.9,4.2,1.3,"Versicolor" +6.2,2.9,4.3,1.3,"Versicolor" +5.1,2.5,3,1.1,"Versicolor" +5.7,2.8,4.1,1.3,"Versicolor" \ No newline at end of file diff --git a/packages/utilities/examples/metrics/metrics.ipynb b/packages/utilities/examples/metrics/metrics.ipynb new file mode 100644 index 0000000..166967a --- /dev/null +++ b/packages/utilities/examples/metrics/metrics.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training time: 140.43580000003567ms\n" + ] + } + ], + "source": [ + "import {\n", + " Cost,\n", + " CPU,\n", + " DenseLayer,\n", + " Sequential,\n", + " setupBackend,\n", + " SigmoidLayer,\n", + " tensor1D,\n", + " tensor2D,\n", + "} from \"https://deno.land/x/netsaur@0.2.14/mod.ts\";\n", + "\n", + "import { parse } from \"jsr:@std/csv@1.0.3/parse\";\n", + "\n", + "// Import helpers for metrics\n", + "import {\n", + " // Metrics\n", + " ClassificationReport,\n", + " // Split the dataset\n", + " useSplit,\n", + "} from \"../../mod.ts\";\n", + "\n", + "// Define classes\n", + "const classes = [\"Setosa\", \"Versicolor\"];\n", + "\n", + "// Read the training dataset\n", + "const _data = Deno.readTextFileSync(\"binary_iris.csv\");\n", + "const data = parse(_data);\n", + "\n", + "// Get the predictors (x) and targets (y)\n", + "const x = data.map((fl) => fl.slice(0, 4).map(Number));\n", + "const y = data.map((fl) => classes.indexOf(fl[4]));\n", + "\n", + "// Split the dataset for training and testing\n", + "const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) as [\n", + " [typeof x, typeof y],\n", + " [typeof x, typeof y],\n", + "];\n", + "\n", + "// Setup the CPU backend for Netsaur\n", + "await setupBackend(CPU);\n", + "\n", + "// Create a sequential neural network\n", + "const net = new Sequential({\n", + " // Set number of minibatches to 4\n", + " // Set size of output to 4\n", + " size: [4, 4],\n", + "\n", + " // Disable logging during training\n", + " silent: true,\n", + "\n", + " // Define each layer of the network\n", + " layers: [\n", + " // A dense layer with 4 neurons\n", + " DenseLayer({ size: [4] }),\n", + " // A sigmoid activation layer\n", + " SigmoidLayer(),\n", + " // A dense layer with 1 neuron\n", + " DenseLayer({ size: [1] }),\n", + " // Another sigmoid layer\n", + " SigmoidLayer(),\n", + " ],\n", + " // We are using MSE for finding cost\n", + " cost: Cost.MSE,\n", + "});\n", + "\n", + "const time = performance.now();\n", + "\n", + "// Train the network\n", + "net.train(\n", + " [\n", + " {\n", + " inputs: tensor2D(train[0]),\n", + " outputs: tensor2D(train[1].map((x) => [x])),\n", + " },\n", + " ],\n", + " // Train for 10000 epochs\n", + " 10000,\n", + ");\n", + "\n", + "console.log(`training time: ${performance.now() - time}ms`);" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Classification Report


Number of classes:\t2\n", + "

Confusion Matrices



Class: 0
PN
P160
N014

Accuracy: 1
Precision: 1
Recall: 1
Specificity: 1
F1 Score: 1

Class: 1
PN
P140
N016

Accuracy: 1
Precision: 1
Recall: 1
Specificity: 1
F1 Score: 1" + ], + "text/plain": [ + "Classification Report\n", + "Number of classes:\t2\n", + "\n", + "==================\n", + "Confusion Matrices\n", + "==================\n", + "\n", + "Class: 0\n", + "\tP\tN\n", + "P\t16\t0\n", + "N\t0\t14\n", + "Accuracy: 1\n", + "Precision: 1\n", + "Recall: 1\n", + "Specificity: 1\n", + "F1 Score: 1\n", + "\n", + "Class: 1\n", + "\tP\tN\n", + "P\t14\t0\n", + "N\t0\t16\n", + "Accuracy: 1\n", + "Precision: 1\n", + "Recall: 1\n", + "Specificity: 1\n", + "F1 Score: 1\n" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "const res = await Promise.all(\n", + " test[0].map((input) => net.predict(tensor1D(input))),\n", + ");\n", + "const y1 = res.map((x) => x.data[0] < 0.5 ? 0 : 1);\n", + "const cMatrix = new ClassificationReport(test[1], y1);\n", + "cMatrix;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Deno", + "language": "typescript", + "name": "deno" + }, + "language_info": { + "file_extension": ".ts", + "mimetype": "text/x.typescript", + "name": "typescript", + "nb_converter": "script", + "pygments_lexer": "typescript", + "version": "5.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/packages/utilities/examples/metrics/metrics.ts b/packages/utilities/examples/metrics/metrics.ts new file mode 100644 index 0000000..8c32548 --- /dev/null +++ b/packages/utilities/examples/metrics/metrics.ts @@ -0,0 +1,86 @@ +import { + Cost, + CPU, + DenseLayer, + Sequential, + setupBackend, + SigmoidLayer, + tensor2D, +} from "https://deno.land/x/netsaur@0.3.0/mod.ts"; + +import { parse } from "jsr:@std/csv@1.0.3/parse"; + +// Import helpers for metrics +import { + // Metrics + ClassificationReport, + // Split the dataset + useSplit, +} from "../../mod.ts"; + +// Define classes +const classes = ["Setosa", "Versicolor"]; + +// Read the training dataset +const _data = Deno.readTextFileSync("examples/metrics/binary_iris.csv"); +const data = parse(_data); + +// Get the predictors (x) and targets (y) +const x = data.map((fl) => fl.slice(0, 4).map(Number)); +const y = data.map((fl) => classes.indexOf(fl[4])); + +// Split the dataset for training and testing +const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) as [ + [typeof x, typeof y], + [typeof x, typeof y], +]; + +// Setup the CPU backend for Netsaur +await setupBackend(CPU); + +// Create a sequential neural network +const net = new Sequential({ + // Set number of minibatches to 4 + // Set size of output to 4 + size: [4, 4], + + // Disable logging during training + silent: true, + + // Define each layer of the network + layers: [ + // A dense layer with 4 neurons + DenseLayer({ size: [4] }), + // A sigmoid activation layer + SigmoidLayer(), + // A dense layer with 1 neuron + DenseLayer({ size: [1] }), + // Another sigmoid layer + SigmoidLayer(), + ], + // We are using MSE for finding cost + cost: Cost.MSE, +}); + +const time = performance.now(); + +// Train the network +net.train( + [ + { + inputs: tensor2D(train[0]), + outputs: tensor2D(train[1].map((x) => [x])), + }, + ], + // Train for 10000 epochs + 10000, +); + +console.log(`training time: ${performance.now() - time}ms`); + +const res = await net.predict(tensor2D(test[0])); +const y1 = res.data.map( + (x) => (x < 0.5 ? 0 : 1), +); +const cMatrix = new ClassificationReport(test[1], y1); +console.log("Confusion Matrix: ", cMatrix); diff --git a/packages/utilities/mod.ts b/packages/utilities/mod.ts new file mode 100644 index 0000000..8518201 --- /dev/null +++ b/packages/utilities/mod.ts @@ -0,0 +1,37 @@ +/** + * Machine Learning utilities for TypeScript. + * + * @example + * ```ts + * const data = [ + * "twinkle twinkle little star", + * "How I wonder what you are", + * "up above the world so high", + * "like a diamond in the sky", + * ]; + * + * // Clean the text + * const cleaner = new TextCleaner({ + * lowercase: true, + * stripHtml: true, + * stripNewlines: true, + * normalizeWhiteSpaces: true, + * }); + * x = cleaner.clean(x); + * + * // Tokenize the text + * const tokenizer = new SplitTokenizer(); + * tokenizer.fit(x); + * const x_tokens = tokenizer.transform(x); + * + * // Vectorize the tokens + * const vectorizer = new CountVectorizer(tokenizer.vocabulary.size); + * const x_vec = vectorizer.transform(x_tokens, "f32"); + * + * // Apply Tf-Idf transformation + * const transformer = new TfIdfTransformer(); + * console.log(transformer.fit(x_vec).transform(x_vec)); + * ``` + * @module + */ +export * from "./src/mod.ts"; diff --git a/packages/utilities/src/constants/stop_words.ts b/packages/utilities/src/constants/stop_words.ts new file mode 100644 index 0000000..d5ac7b4 --- /dev/null +++ b/packages/utilities/src/constants/stop_words.ts @@ -0,0 +1,182 @@ +/** List obtained from NLTK */ +export const DefaultIgnoreList = [ + "i", + "me", + "my", + "myself", + "we", + "our", + "ours", + "ourselves", + "you", + "you're", + "you've", + "you'll", + "you'd", + "your", + "yours", + "yourself", + "yourselves", + "he", + "him", + "his", + "himself", + "she", + "she's", + "her", + "hers", + "herself", + "it", + "it's", + "its", + "itself", + "they", + "them", + "their", + "theirs", + "themselves", + "what", + "which", + "who", + "whom", + "this", + "that", + "that'll", + "these", + "those", + "am", + "is", + "are", + "was", + "were", + "be", + "been", + "being", + "have", + "has", + "had", + "having", + "do", + "does", + "did", + "doing", + "a", + "an", + "the", + "and", + "but", + "if", + "or", + "because", + "as", + "until", + "while", + "of", + "at", + "by", + "for", + "with", + "about", + "against", + "between", + "into", + "through", + "during", + "before", + "after", + "above", + "below", + "to", + "from", + "up", + "down", + "in", + "out", + "on", + "off", + "over", + "under", + "again", + "further", + "then", + "once", + "here", + "there", + "when", + "where", + "why", + "how", + "all", + "any", + "both", + "each", + "few", + "more", + "most", + "other", + "some", + "such", + "no", + "nor", + "not", + "only", + "own", + "same", + "so", + "than", + "too", + "very", + "s", + "t", + "can", + "will", + "just", + "don", + "don't", + "should", + "should've", + "now", + "d", + "ll", + "m", + "o", + "re", + "ve", + "y", + "ain", + "aren", + "aren't", + "couldn", + "couldn't", + "didn", + "didn't", + "doesn", + "doesn't", + "hadn", + "hadn't", + "hasn", + "hasn't", + "haven", + "haven't", + "isn", + "isn't", + "ma", + "mightn", + "mightn't", + "mustn", + "mustn't", + "needn", + "needn't", + "shan", + "shan't", + "shouldn", + "shouldn't", + "wasn", + "wasn't", + "weren", + "weren't", + "won", + "won't", + "wouldn", + "wouldn't", +]; diff --git a/packages/utilities/src/encoding/categorical.ts b/packages/utilities/src/encoding/categorical.ts new file mode 100644 index 0000000..a7d5700 --- /dev/null +++ b/packages/utilities/src/encoding/categorical.ts @@ -0,0 +1,92 @@ +import { Matrix, type MatrixLike } from "../mod.ts"; +import type { DataType, DType, DTypeValue } from "../utils/common_types.ts"; + +/** Convert class labels into categorical variables (one-hot) */ +export class CategoricalEncoder { + /** Map categories to indices */ + mapping: Map; + /** An internal counter for remembering the last index in mapping. */ + #lastToken: Uint32Array; + constructor() { + this.mapping = new Map(); + this.#lastToken = new Uint32Array(1); + } + /** Construct a mapping from a given set of text. */ + fit(targets: T[]): this { + let i = 0; + while (i < targets.length) { + if (!this.mapping.has(targets[i])) { + const token = this.#incrementToken(); + this.mapping.set(targets[i], token); + } + i += 1; + } + return this; + } + /** One-hot encoding of categorical values */ + transform
(targets: T[], dType: DT): Matrix
{ + const res = new Matrix
(dType, [targets.length, this.#lastToken[0]]); + let i = 0; + while (i < targets.length) { + const index = this.mapping.get(targets[i]); + if (index !== 0 && !index) { + i += 1; + continue; + } + res.setCell(i, index, 1); + i += 1; + } + return res; + } + untransform
(data: MatrixLike
): T[] { + const matrix = new Matrix(data); + const res = new Array(matrix.nRows); + for (let i = 0; i < res.length; i += 1) { + const idx = matrix.row(i).findIndex((x) => x === 1); + res[i] = this.getOg(idx) || "__unknown__"; + } + return res; + } + getOg(data: number): T | undefined { + for (const [k, v] of this.mapping.entries()) { + if (v === data) { + return k; + } + } + return undefined; + } + #incrementToken(): number { + return Atomics.add(this.#lastToken, 0, 1); + } + /** + * Convert softmax outputs into categorical outputs + * This method mutates the original matrix. + * @returns The modified matrix. + */ + static fromSoftmax
(data: MatrixLike
): Matrix
{ + const matrix = new Matrix(data); + for (let i = 0; i < matrix.nRows; i += 1) { + const max = matrix + .row(i) + // @ts-ignore It can reduce. + .reduce( + (acc: number, curr: DTypeValue
, i: number, arr: DType
) => + arr[acc] > curr ? acc : i, + 0, + ); + if ( + data.data instanceof BigInt64Array || + data.data instanceof BigUint64Array + ) { + const newR = new Array(matrix.nCols).fill(0n); + newR[max] = 1n; + matrix.setRow(i, newR); + } else { + const newR = new Array(matrix.nCols).fill(0); + newR[max] = 1; + matrix.setRow(i, newR); + } + } + return matrix; + } +} diff --git a/packages/utilities/src/encoding/mod.ts b/packages/utilities/src/encoding/mod.ts new file mode 100644 index 0000000..26408b2 --- /dev/null +++ b/packages/utilities/src/encoding/mod.ts @@ -0,0 +1,6 @@ +/** + * Encoding data into different representations. + * @module + */ + +export * from "./categorical.ts"; diff --git a/packages/utilities/src/image/colors/common.ts b/packages/utilities/src/image/colors/common.ts new file mode 100644 index 0000000..7bb0ecf --- /dev/null +++ b/packages/utilities/src/image/colors/common.ts @@ -0,0 +1,90 @@ +import type { Image } from "../../utils/mod.ts"; +import type { Pixel } from "../../utils/common_types.ts"; +import type { ColorHistogram } from "./histogram.ts"; + +export function getAverageColor( + vbox: ColorRange, + histo: ColorHistogram, +): Pixel { + let total = 0; + let totalR = 0, totalG = 0, totalB = 0; + let ri = vbox.r.min; + while (ri <= vbox.r.max) { + let gi = vbox.g.min; + while (gi <= vbox.g.max) { + let bi = vbox.b.min; + while (bi <= vbox.b.max) { + const count = histo.getQuantized([ri, gi, bi]) || 0; + total += count; + totalR += count * (ri + 0.5) * 8; + totalG += count * (gi + 0.5) * 8; + totalB += count * (bi + 0.5) * 8; + bi += 1; + } + gi += 1; + } + ri += 1; + } + if (total) { + return [ + ~~(totalR / total), + ~~(totalG / total), + ~~(totalB / total), + 255, + ]; + } + // In case box is empty + return [ + Math.trunc(8 * (vbox.r.min + vbox.r.max + 1) / 2), + Math.trunc(8 * (vbox.g.min + vbox.g.max + 1) / 2), + Math.trunc(8 * (vbox.b.min + vbox.b.max + 1) / 2), + 255, + ]; +} + +/** The vbox */ +export interface ColorRange { + r: { min: number; max: number }; + g: { min: number; max: number }; + b: { min: number; max: number }; +} + +/** Get the minimum and maximum RGB values. */ +export function getColorRange( + image: Image, + sigBits = 5, +): ColorRange { + const quantizeBy = 8 - sigBits; + const range = { + r: { min: 1000, max: 0 }, + g: { min: 1000, max: 0 }, + b: { min: 1000, max: 0 }, + }; + let i = 0; + while (i < image.pixels) { + const pixel = image.getNthPixel(i).map((x) => x ? x >> quantizeBy : 0); + if (pixel[0] < range.r.min) { + range.r.min = pixel[0]; + } + if (pixel[0] > range.r.max) { + range.r.max = pixel[0]; + } + + if (pixel[1] < range.g.min) { + range.g.min = pixel[1]; + } + if (pixel[1] > range.g.max) { + range.g.max = pixel[1]; + } + + if (pixel[2] < range.b.min) { + range.b.min = pixel[2]; + } + if (pixel[2] > range.b.max) { + range.b.max = pixel[2]; + } + + i += 1; + } + return range; +} diff --git a/packages/utilities/src/image/colors/histogram.ts b/packages/utilities/src/image/colors/histogram.ts new file mode 100644 index 0000000..2f94dd9 --- /dev/null +++ b/packages/utilities/src/image/colors/histogram.ts @@ -0,0 +1,61 @@ +import type { Image } from "../../utils/mod.ts"; +import type { Pixel } from "../../utils/common_types.ts"; + +/** + * Histogram of colors with reduced space + * Effectively quantizes the image into 32768 colors + */ +export class ColorHistogram { + #data: Uint32Array; + #quantizeBy: number; + sigBits: number; + constructor(sigBits: number) { + this.sigBits = sigBits; + this.#quantizeBy = 8 - sigBits; + this.#data = new Uint32Array(1 << (sigBits * 3)); + } + #getIndex([r, g, b]: [number, number, number, number?]) { + // ignore alpha + const index = ((r >> this.#quantizeBy) << (this.sigBits << 1)) + + ((g >> this.#quantizeBy) << this.sigBits) + + (b >> this.#quantizeBy); + return index; + } + get(color: Pixel): number { + const index = this.#getIndex(color); + return this.#data[index]; + } + getQuantized(color: Pixel): number { + const index = (color[0] << 10) + (color[1] << 5) + color[2]; + return this.#data[index]; + } + add(color: Pixel, amount: number): number { + const index = this.#getIndex(color); + return Atomics.add(this.#data, index, amount); + } + get raw(): Uint32Array { + return this.#data; + } + get length(): number { + return this.#data.filter((x) => x).length; + } + static getColor(index: number, sigBits: number): Pixel { + const quantizeBy = 8 - sigBits; + const ri = index >> 10; + const gi = (index - (ri << 10)) >> 5; + const bi = index - (ri << 10) - (gi << 5); + return [ri << quantizeBy, gi << quantizeBy, bi << quantizeBy, 255]; + } +} + +/** Get a histogram of frequency of colors. */ +export function getHistogram(image: Image, sigBits = 5): ColorHistogram { + const histo = new ColorHistogram(sigBits); + let i = 0; + while (i < image.pixels) { + const hIndex = image.getNthPixel(i); + histo.add(hIndex, 1); + i += 1; + } + return histo; +} diff --git a/packages/utilities/src/image/colors/median_cut.ts b/packages/utilities/src/image/colors/median_cut.ts new file mode 100644 index 0000000..3dd32e3 --- /dev/null +++ b/packages/utilities/src/image/colors/median_cut.ts @@ -0,0 +1,282 @@ +import { type ColorHistogram, getHistogram } from "./histogram.ts"; +import { getAverageColor, getColorRange } from "./common.ts"; +import type { ColorRange } from "./common.ts"; +import type { Pixel } from "../../utils/common_types.ts"; +import type { Image } from "../../utils/mod.ts"; + +/// Uses Modified Median Cut Algorithm +/// TypeScript port of Leptonica +/// http://www.leptonica.org/ + +export function quantizeByMedianCut( + image: Image, + extractCount: number, + sigBits = 5, +): Pixel[] { + if (sigBits <= 4) console.warn("Setting sigBits less than 5 may not work."); + const vbox = getColorRange(image, sigBits); + const histo = getHistogram(image, sigBits); + return quantize(vbox, histo, extractCount); +} + +function quantize( + vbox: ColorRange, + histo: ColorHistogram, + extractCount: number, +): Pixel[] { + const vboxes: ColorRange[] = [vbox]; + + // Avoid an infinite loop + const maxIter = 1000; + let i = 0; + + const firstExtractCount = ~~(extractCount >> 1); + let generated = 1; + + while (i < maxIter) { + const lastBox = vboxes.shift(); + if (!lastBox) break; // This shouldn't happen + if (!vboxSize(lastBox, histo)) { + vboxes.push(lastBox); + i += 1; + continue; + } + const cut = medianCutApply(lastBox, histo); + if (cut) { + vboxes.push(cut[0], cut[1]); + generated += 1; + } else vboxes.push(lastBox); + if (generated >= firstExtractCount) break; + i += 1; + } + + vboxes.sort((a, b) => + (vboxSize(b, histo) * vboxVolume(b)) - (vboxSize(a, histo) * vboxVolume(a)) + ); + const secondExtractCount = extractCount - vboxes.length; + i = 0; + generated = 0; + + while (i < maxIter) { + const lastBox = vboxes.shift(); + if (!lastBox) break; // This shouldn't happen + if (!vboxSize(lastBox, histo)) { + vboxes.push(lastBox); + i += 1; + continue; + } + const cut = medianCutApply(lastBox, histo); + + if (cut) { + vboxes.push(cut[0], cut[1]); + generated += 1; + } else vboxes.push(lastBox); + if (generated >= secondExtractCount) break; + i += 1; + } + vboxes.sort((a, b) => vboxSize(b, histo) - vboxSize(a, histo)); + return vboxes.map((x) => getAverageColor(x, histo)).slice(0, extractCount); +} + +/** Get number of colors in vbox */ +function vboxSize(vbox: ColorRange, histo: ColorHistogram): number { + let count = 0; + let ri = vbox.r.min; + while (ri <= vbox.r.max) { + let gi = vbox.g.min; + while (gi <= vbox.g.max) { + let bi = vbox.b.min; + while (bi <= vbox.b.max) { + count += histo.get([ri, gi, bi, 255]) || 0; + bi += 1; + } + gi += 1; + } + ri += 1; + } + return count; +} + +/** Get volume by dimensions of vbox */ +function vboxVolume(vbox: ColorRange): number { + return ~~(vbox.r.max - vbox.r.min) * ~~(vbox.g.max - vbox.g.min) * + ~~(vbox.b.max - vbox.b.min); +} + +/** Cut vbox into two */ +function medianCutApply( + vbox: ColorRange, + histo: ColorHistogram, +): [ColorRange, ColorRange] | false { + const count = vboxSize(vbox, histo); + + if (!count || count === 1) return false; + const rw = vbox.r.max - vbox.r.min + 1; + const gw = vbox.g.max - vbox.g.min + 1; + const bw = vbox.b.max - vbox.b.min + 1; + + const axis = Math.max(rw, gw, bw); + + // Find partial sums along each axis + const sumAlongAxis = []; + // avoid running another loop to compute sum + let totalSum = 0; + switch (axis) { + case rw: { + let i = vbox.r.min; + while (i <= vbox.r.max) { + let tempSum = 0; + let j = vbox.g.min; + while (j < vbox.g.max) { + let k = vbox.b.min; + while (k < vbox.b.max) { + tempSum += histo.getQuantized([i, j, k]) || 0; + k += 1; + } + j += 1; + } + totalSum += tempSum; + sumAlongAxis[i] = totalSum; + i += 1; + } + break; + } + case gw: { + let i = vbox.g.min; + while (i <= vbox.g.max) { + let tempSum = 0; + let j = vbox.r.min; + while (j < vbox.r.max) { + let k = vbox.b.min; + while (k < vbox.b.max) { + tempSum += histo.getQuantized([j, i, k]) || 0; + k += 1; + } + j += 1; + } + totalSum += tempSum; + sumAlongAxis[i] = totalSum; + i += 1; + } + break; + } + default: { + let i = vbox.b.min; + while (i <= vbox.b.max) { + let tempSum = 0; + let j = vbox.r.min; + while (j < vbox.r.max) { + let k = vbox.g.min; + while (k < vbox.g.max) { + tempSum += histo.getQuantized([j, k, i]) || 0; + k += 1; + } + j += 1; + } + totalSum += tempSum; + sumAlongAxis[i] = totalSum; + i += 1; + } + break; + } + } + // Apply median cut + switch (axis) { + case rw: { + let i = vbox.r.min; + while (i <= vbox.r.max) { + // Find the mid point through linear search + if (sumAlongAxis[i] < totalSum / 2) { + let cutAt = 0; + const vbox1 = { + r: { min: vbox.r.min, max: vbox.r.max }, + g: { min: vbox.g.min, max: vbox.g.max }, + b: { min: vbox.b.min, max: vbox.b.max }, + }; + const vbox2 = { + r: { min: vbox.r.min, max: vbox.r.max }, + g: { min: vbox.g.min, max: vbox.g.max }, + b: { min: vbox.b.min, max: vbox.b.max }, + }; + const left = i - vbox.r.min; + const right = vbox.r.max - i; + if (left <= right) { + cutAt = Math.min(vbox.r.max - 1, Math.trunc(i + right / 2)); + } else cutAt = Math.max(vbox.r.min, Math.trunc(i - 1 - left / 2)); + + while (!sumAlongAxis[cutAt]) cutAt += 1; + + vbox1.r.max = cutAt; + vbox2.r.min = cutAt + 1; + return [vbox1, vbox2]; + } + i += 1; + } + break; + } + case gw: { + let i = vbox.g.min; + while (i <= vbox.g.max) { + // Find the mid point through linear search + if (sumAlongAxis[i] < totalSum / 2) { + let cutAt = 0; + const vbox1 = { + r: { min: vbox.r.min, max: vbox.r.max }, + g: { min: vbox.g.min, max: vbox.g.max }, + b: { min: vbox.b.min, max: vbox.b.max }, + }; + const vbox2 = { + r: { min: vbox.r.min, max: vbox.r.max }, + g: { min: vbox.g.min, max: vbox.g.max }, + b: { min: vbox.b.min, max: vbox.b.max }, + }; + const left = i - vbox.g.min; + const right = vbox.g.max - i; + if (left <= right) { + cutAt = Math.min(vbox.g.max - 1, Math.trunc(i + right / 2)); + } else cutAt = Math.max(vbox.g.min, Math.trunc(i - 1 - left / 2)); + while (!sumAlongAxis[cutAt]) cutAt += 1; + + vbox1.g.max = cutAt; + vbox2.g.min = cutAt + 1; + return [vbox1, vbox2]; + } + i += 1; + } + break; + } + default: { + let i = vbox.b.min; + while (i <= vbox.b.max) { + // Find the mid point through linear search + if (sumAlongAxis[i] < totalSum / 2) { + let cutAt = 0; + const vbox1 = { + r: { min: vbox.r.min, max: vbox.r.max }, + g: { min: vbox.g.min, max: vbox.g.max }, + b: { min: vbox.b.min, max: vbox.b.max }, + }; + const vbox2 = { + r: { min: vbox.r.min, max: vbox.r.max }, + g: { min: vbox.g.min, max: vbox.g.max }, + b: { min: vbox.b.min, max: vbox.b.max }, + }; + const left = i - vbox.b.min; + const right = vbox.b.max - i; + if (left <= right) { + cutAt = Math.min(vbox.b.max - 1, Math.trunc(i + right / 2)); + } else cutAt = Math.max(vbox.b.min, Math.trunc(i - 1 - left / 2)); + while (!sumAlongAxis[cutAt]) cutAt += 1; + + vbox1.b.max = cutAt; + vbox2.b.min = cutAt + 1; + return [vbox1, vbox2]; + } + i += 1; + } + break; + } + } + + return false; +} diff --git a/packages/utilities/src/image/colors/mod.ts b/packages/utilities/src/image/colors/mod.ts new file mode 100644 index 0000000..91acca6 --- /dev/null +++ b/packages/utilities/src/image/colors/mod.ts @@ -0,0 +1,16 @@ +/** + * Image-related utilities for machine learning. + * @module + */ + +import { Image } from "../../utils/mod.ts"; +import type { Pixel } from "../../utils/common_types.ts"; +import { quantizeByMedianCut } from "./median_cut.ts"; + +/** Extract colors from an image. */ +export function extractColors(image: Image, nColors: number): Pixel[] { + return quantizeByMedianCut(image, nColors, 5); +} + +export { getHistogram } from "./histogram.ts"; +export { Image }; diff --git a/packages/utilities/src/image/mod.ts b/packages/utilities/src/image/mod.ts new file mode 100644 index 0000000..5c71f8b --- /dev/null +++ b/packages/utilities/src/image/mod.ts @@ -0,0 +1,7 @@ +/** + * Image-related utilities for Machine Learning + * and Data Science. + * @module + */ +export * from "./colors/mod.ts"; +export * from "./patches/patch_2d.ts"; diff --git a/packages/utilities/src/image/patches/patch_2d.ts b/packages/utilities/src/image/patches/patch_2d.ts new file mode 100644 index 0000000..d48bd2f --- /dev/null +++ b/packages/utilities/src/image/patches/patch_2d.ts @@ -0,0 +1,81 @@ +import type { Image } from "../../utils/mod.ts"; +import type { Patch2d, PatchCollection } from "../../utils/common_types.ts"; + +/** + * Extract patches from a 2d image. + */ + +/** + * Get the row and column of the nth element of a + * 2d array. + * @param n Current Position + * @param width Width of a row + * @returns [row, column] + */ +function clamp(n: number, width: number): [number, number] { + if (n >= width) return [~~(n / width), n % width]; + return [0, n]; +} + +/** Private function to extract patches */ +function extract(image: Image, options: Patch2d): [Uint8ClampedArray, number] { + /** Get number of possible patches in each dimension */ + const nX = image.width - options.width + 1; + const nY = image.height - options.height + 1; + /** Total number of patches is nX * nY */ + const nPatches = nX * nY; + /** Area of each patch, used to calculate offset of resulting array */ + const patchArea = options.width * options.height; + + const res = new Uint8ClampedArray( + options.width * options.height * image.channels * nPatches, + ); + + let i = 0; + while (i < nPatches) { + const [row, col] = clamp(i, nX); + /** Starting index of the current patch */ + const offset = row * image.width + col; + let j = 0; + while (j < options.height) { + /** Starting index of the current subrow */ + const patchRow = j * image.width; + /** Copy the entire patchArea-size patch into the resulting array */ + res.set( + image.data.slice( + (offset + patchRow) * image.channels, + (offset + options.width + patchRow) * image.channels, + ), + (i * patchArea + j * options.width) * image.channels, + ); + j += 1; + } + i += 1; + } + return [res, nPatches]; +} + +/** + * Extract patches from a 2d image + * @param image Source image to extract patches from + * @param options Dimensions of a single patch + * @returns A collection of patches as a single Uint8ClampedArray + */ +export function patch2d(image: Image, options: Patch2d): PatchCollection { + if (image.width < options.width) { + throw new Error("Patch width cannot be greater than image width."); + } + if (image.height < options.height) { + throw new Error("Patch height cannot be greater than image width."); + } + + const [patches, n] = extract(image, options); + + return { + width: options.width, + height: options.height, + channels: image.channels, + size: n, + data: patches, + }; +} diff --git a/packages/utilities/src/metrics/classification.ts b/packages/utilities/src/metrics/classification.ts new file mode 100644 index 0000000..07f3786 --- /dev/null +++ b/packages/utilities/src/metrics/classification.ts @@ -0,0 +1,223 @@ +import { useUnique } from "../utils/mod.ts"; + +interface Report { + c: ConfusionMatrix; + precision: number; + recall: number; + f1: number; + support: number; +} + +/** A report with metrics for classification results */ +export class ClassificationReport { + /** Number of elements classified correctly */ + true: number; + /** Number of elements classified incorrectly */ + false: number; + /** Total number of elements */ + size: number; + labels: string[]; + reports: Map; + constructor(y: ArrayLike, y1: ArrayLike) { + const unique = useUnique(y); + if (unique.length <= 1) { + throw new Error( + `Cannot create a classification report for less than 1 class.`, + ); + } + this.true = 0; + this.false = 0; + this.size = y.length; + this.labels = unique.map((x) => `${x}`); + this.reports = new Map(); + for (const label of unique) { + let [tp, fn, fp, tn] = [0, 0, 0, 0]; + for (let i = 0; i < y.length; i += 1) { + if (y1[i] !== label && y[i] !== label) tn += 1; + else if (y1[i] !== label && y[i] === unique[0]) fn += 1; + else if (y1[i] === label && y[i] !== label) fp += 1; + else tp += 1; + } + this.true += tp + tn; + this.false += fp + fn; + const cMatrix = new ConfusionMatrix([tp, fn, fp, tn]); + this.reports.set(`${label}`, { + c: cMatrix, + precision: precisionScore(cMatrix), + f1: f1Score(cMatrix), + recall: recallScore(cMatrix), + support: cMatrix.truePositive + cMatrix.falseNegative, + }); + } + } + toString(): string { + let res = `Classification Report`; + res += `\nNumber of classes:\t${this.labels.length}\n`; + res += `Class\tPreci\tF1\tRec\tSup`; + for (const [label, report] of this.reports.entries()) { + res += `\n${label}`; + res += + `\t${report.precision}\t${report.f1}\t${report.recall}\t${report.support}`; + } + res += `\nAccuracy\t\t${ + this.true / (this.true + this.false) + }\t${this.size}`; + return res; + } + toHtml(): string { + let res = + ``; + for (const [label, report] of this.reports.entries()) { + res += + ``; + } + res += ``; + res += `
ClassPrecisionF1ScoreRecallSupport
${label}${report.precision}${report.f1}${report.recall}${report.support}
Accuracy${ + this.true / (this.true + this.false) + }${this.size}
`; + return res; + } + [Symbol.for("Deno.customInspect")](): string { + return this.toString(); + } + [Symbol.for("Jupyter.display")](): Record { + return { + // Plain text content + "text/plain": this.toString(), + + // HTML output + "text/html": this.toHtml(), + }; + } + toJson(): { + class: string[]; + precision: number[]; + f1: number[]; + recall: number[]; + support: number[]; + } { + const reports = Array.from(this.reports.entries()); + return { + class: reports.map((x) => x[0]), + precision: reports.map((x) => x[1].precision), + f1: reports.map((x) => x[1].f1), + recall: reports.map((x) => x[1].recall), + support: reports.map((x) => x[1].support), + }; + } + /* + [Symbol.for("Deno.customInspect")]() { + let res = `\t${this.labels.join("\t")}` + for (const label of this.labels) { + res += `\n${label}` + for (const label1 of this.labels) { + res += `\t${}` + } + } + return `\n\t${this.labelP}\t${this.labelN}\n${this.labelP}\t${this.truePositive}\t${this.falseNegative}\n${this.labelN}\t${this.falsePositive}\t${this.trueNegative}`; + } + */ +} + +/** Confusion matrix for the result. */ +export class ConfusionMatrix { + /** Number of positive elements classified correctly */ + truePositive: number; + /** Number of negative elements classified incorrectly */ + falsePositive: number; + /** Number of negative elements classified correctly */ + trueNegative: number; + /** Number of positive elements classified incorrectly */ + falseNegative: number; + /** Number of elements classified correctly */ + true: number; + /** Number of elements classified incorrectly */ + false: number; + /** Total number of elements */ + size: number; + /** Label for positive elements */ + labelP: string; + /** Label for negative elements */ + labelN: string; + constructor( + [tp, fn, fp, tn]: [number, number, number, number], + [label1, label2]: [string?, string?] = [], + ) { + this.truePositive = tp; + this.falseNegative = fn; + this.falsePositive = fp; + this.trueNegative = tn; + this.true = tn + tp; + this.false = fn + fp; + this.size = this.true + this.false; + this.labelP = label1 || "P"; + this.labelN = label2 || "N"; + } + valueOf(): [number, number, number, number] { + return [ + this.truePositive, + this.falseNegative, + this.falsePositive, + this.trueNegative, + ]; + } + [Symbol.for("Deno.customInspect")](): string { + return `\n\t${this.labelP}\t${this.labelN}\n${this.labelP}\t${this.truePositive}\t${this.falseNegative}\n${this.labelN}\t${this.falsePositive}\t${this.trueNegative}`; + } + static fromResults( + y: ArrayLike, + y1: ArrayLike, + ): ConfusionMatrix { + const unique = useUnique(y); + if (unique.length !== 2) { + throw new Error( + `Cannot create confusion matrix for ${unique.length} classes. Try ClassificationReport instead.`, + ); + } + let [tp, fn, fp, tn] = [0, 0, 0, 0]; + for (let i = 0; i < y.length; i += 1) { + if (y1[i] === unique[1] && y[i] === unique[1]) tn += 1; + else if (y1[i] === unique[1] && y[i] === unique[0]) fn += 1; + else if (y1[i] === unique[0] && y[i] === unique[1]) fp += 1; + else tp += 1; + } + return new this([tp, fn, fp, tn], [`${unique[0]}`, `${unique[1]}`]); + } +} + +/** The fraction of predictions that were correct */ +export function accuracyScore(cMatrix: ConfusionMatrix): number { + return cMatrix.true / cMatrix.size; +} +/** The fraction of "positive" predictions that were actually positive */ +export function precisionScore(cMatrix: ConfusionMatrix): number { + return cMatrix.truePositive / (cMatrix.truePositive + cMatrix.falsePositive); +} +/** The fraction of positives that were predicted correctly */ +export function sensitivityScore(cMatrix: ConfusionMatrix): number { + return cMatrix.truePositive / (cMatrix.truePositive + cMatrix.falseNegative); +} +/** The fraction of positives that were predicted correctly */ +export const recallScore = sensitivityScore; +/** The fraction of negatives that were predicted correctly */ +export function specificityScore(cMatrix: ConfusionMatrix): number { + return cMatrix.trueNegative / (cMatrix.trueNegative + cMatrix.falsePositive); +} +/** Compute F1 Score */ +export function f1Score(cMatrix: ConfusionMatrix): number { + return ( + (2 * cMatrix.truePositive) / + (2 * cMatrix.truePositive + cMatrix.falsePositive + cMatrix.falseNegative) + ); +} + +/** Compute Cohen's Kappa to find Agreement */ +export function cohensKappa(cMatrix: ConfusionMatrix): number { + const actualAgreement = accuracyScore(cMatrix); + const expectedAgreement = + ((cMatrix.truePositive + cMatrix.falsePositive) / cMatrix.size) * + ((cMatrix.truePositive + cMatrix.falseNegative) / cMatrix.size) + + ((cMatrix.falsePositive + cMatrix.trueNegative) / cMatrix.size) * + ((cMatrix.falseNegative + cMatrix.trueNegative) / cMatrix.size); + return (actualAgreement - expectedAgreement) / (1 - expectedAgreement); +} diff --git a/packages/utilities/src/metrics/mod.ts b/packages/utilities/src/metrics/mod.ts new file mode 100644 index 0000000..15b7232 --- /dev/null +++ b/packages/utilities/src/metrics/mod.ts @@ -0,0 +1,7 @@ +/** + * Metrics for Machine Learning outcomes. + * @module + */ + +export * from "./classification.ts"; +export * from "./regression.ts"; diff --git a/packages/utilities/src/metrics/regression.ts b/packages/utilities/src/metrics/regression.ts new file mode 100644 index 0000000..a2cce5a --- /dev/null +++ b/packages/utilities/src/metrics/regression.ts @@ -0,0 +1,37 @@ +/** Mean Absolute Error */ +export function mae(y: ArrayLike, y1: ArrayLike): number { + let err = 0; + for (let i = 0; i < y.length; i += 1) { + err += (y[i] - y1[i]) ** 2; + } + return err / y.length; +} + +/** Mean Square Error */ +export function mse(y: ArrayLike, y1: ArrayLike): number { + let err = 0; + for (let i = 0; i < y.length; i += 1) { + err += (y[i] - y1[i]) ** 2; + } + return err / y.length; +} + +/** Root Mean Square Error */ +export function rmse(y: ArrayLike, y1: ArrayLike): number { + return Math.sqrt(mse(y, y1)); +} + +/** R2 Score for regression */ +export function r2(y: ArrayLike, y1: ArrayLike): number { + let mean = 0; + for (let i = 0; i < y.length; i += 1) { + mean += y[i]; + } + mean /= y.length; + let ssr = 0, sst = 0; + for (let i = 0; i < y.length; i += 1) { + ssr += Math.pow(y1[i] - mean, 2); + sst += Math.pow(y[i] - mean, 2); + } + return ssr / sst; +} diff --git a/packages/utilities/src/mod.ts b/packages/utilities/src/mod.ts new file mode 100644 index 0000000..8a2ce59 --- /dev/null +++ b/packages/utilities/src/mod.ts @@ -0,0 +1,5 @@ +export * from "./image/mod.ts"; +export * from "./utils/mod.ts"; +export * from "./metrics/mod.ts"; +export * from "./encoding/mod.ts"; +export * from "./text/mod.ts"; diff --git a/packages/utilities/src/text/mod.ts b/packages/utilities/src/text/mod.ts new file mode 100644 index 0000000..84e8bda --- /dev/null +++ b/packages/utilities/src/text/mod.ts @@ -0,0 +1,7 @@ +/** + * Text-related utilities for Machine Learning + * and Data Science. + * @module + */ + +export * from "./preprocess/mod.ts"; diff --git a/packages/utilities/src/text/preprocess/cleaner.ts b/packages/utilities/src/text/preprocess/cleaner.ts new file mode 100644 index 0000000..575bd28 --- /dev/null +++ b/packages/utilities/src/text/preprocess/cleaner.ts @@ -0,0 +1,53 @@ +import type { StandardizeConfig } from "../../utils/common_types.ts"; + +/** Simple text cleaner */ +export class TextCleaner implements StandardizeConfig { + stripHtml: boolean; + lowercase: boolean; + normalizeWhiteSpaces: boolean; + stripNewlines: boolean; + constructor({ + stripHtml = false, + lowercase = false, + normalizeWhiteSpaces = true, + stripNewlines = true, + }: StandardizeConfig = {}) { + this.stripHtml = stripHtml; + this.lowercase = lowercase; + this.normalizeWhiteSpaces = normalizeWhiteSpaces; + this.stripNewlines = stripNewlines; + } + clean(text: string): string; + clean(text: string[]): string[]; + clean(text: string | string[]) { + if (Array.isArray(text)) { + return text.map((line) => preprocess(line, this)); + } + return preprocess(text, this); + } +} + +/** Function for quick cleaning of text */ +export function preprocess( + text: string, + { + stripHtml = false, + lowercase = false, + normalizeWhiteSpaces = true, + stripNewlines = true, + }: StandardizeConfig = {}, +): string { + if (lowercase) { + text = text.toLowerCase(); + } + if (stripHtml) { + text = text.replace(/<([^>]+)>/g, " "); + } + if (stripNewlines) { + text = text.replace(/\n/g, " "); + } + if (normalizeWhiteSpaces) { + text = text.replace(/\s\s+/g, " "); + } + return text; +} diff --git a/packages/utilities/src/text/preprocess/mod.ts b/packages/utilities/src/text/preprocess/mod.ts new file mode 100644 index 0000000..246a7c3 --- /dev/null +++ b/packages/utilities/src/text/preprocess/mod.ts @@ -0,0 +1,29 @@ +import type { + Cleaner, + Tokenizer, + Transformer, + Vectorizer, +} from "../../utils/common_types.ts"; + +// import { TextCleaner } from "./cleaner.ts"; +// import { SplitTokenizer } from "./tokenize/mod.ts"; +// import { CountVectorizer } from "./vectorize/mod.ts"; +// import { TfIdfTransformer } from "./transformer/mod.ts"; + +/** TODO */ +interface PreprocessorConfig { + vectorizer: Vectorizer; + tokenizer: Tokenizer; + cleaner: Cleaner; + transformer: Transformer; +}; + +/** TODO */ +export class TextPreprocessor implements Partial { + // todo +} + +export * from "./cleaner.ts"; +export * from "./tokenize/mod.ts"; +export * from "./transformer/mod.ts"; +export * from "./vectorize/mod.ts"; diff --git a/packages/utilities/src/text/preprocess/tokenize/mod.ts b/packages/utilities/src/text/preprocess/tokenize/mod.ts new file mode 100644 index 0000000..80a9941 --- /dev/null +++ b/packages/utilities/src/text/preprocess/tokenize/mod.ts @@ -0,0 +1 @@ +export { SplitTokenizer } from "./split.ts"; diff --git a/packages/utilities/src/text/preprocess/tokenize/split.ts b/packages/utilities/src/text/preprocess/tokenize/split.ts new file mode 100644 index 0000000..046d370 --- /dev/null +++ b/packages/utilities/src/text/preprocess/tokenize/split.ts @@ -0,0 +1,112 @@ +import { DefaultIgnoreList } from "../../../constants/stop_words.ts"; +import type { BaseTokenizerOptions } from "../../../utils/common_types.ts"; + +/** Tokenize text based on separator (whitespace) */ +export class SplitTokenizer { + /** Words to ignore from vocabulary */ + skipWords: "english" | false | string[]; + /** Configuration / Function for preprocessing */ + vocabulary: Map; + /** An internal counter for remembering the last index in vocabulary. */ + #lastToken: Uint32Array; + constructor( + options: Partial = {}, + ) { + this.skipWords = options.skipWords ?? false; + this.vocabulary = options.vocabulary ?? new Map(); + this.#lastToken = new Uint32Array(1); + if (options.indices && !this.vocabulary.size) { + this.#lastToken[0] = 2; + this.vocabulary.set("__pad__", 0); + this.vocabulary.set("__unk__", 1); + } + if (this.vocabulary.size) { + this.#lastToken[0] = this.vocabulary.size; + } + } + get lastToken(): number { + return Atomics.load(this.#lastToken, 0); + } + /** Construct a vocabulary from a given set of text. */ + fit(text: string | string[]): this { + if (Array.isArray(text)) { + let i = 0; + while (i < text.length) { + this.fit(text[i]); + i += 1; + } + } else { + const words = this.split(text); + let i = 0; + while (i < words.length) { + if (!this.vocabulary.has(words[i])) { + if (this.skipWords === "english") { + if (DefaultIgnoreList.includes(words[i])) { + i += 1; + continue; + } + } else if (Array.isArray(this.skipWords)) { + if (this.skipWords.includes(words[i])) { + i += 1; + continue; + } + } + const token = this.#incrementToken(); + this.vocabulary.set(words[i], token); + } + i += 1; + } + } + return this; + } + #incrementToken(): number { + return Atomics.add(this.#lastToken, 0, 1); + } + /** + * Convert a document (string | array of strings) into vectors. + */ + transform(text: string | string[]): number[][] { + if (!this.vocabulary.size) { + throw new Error( + "Tokenizer vocabulary not initialized yet. Call `Tokenizer()` with a custom vocabulary or use `.fit()` on text.", + ); + } + if (Array.isArray(text)) { + const size = Math.max(...text.map((x) => this.split(x).length)); + const res = Array(text.length); + let i = 0; + while (i < text.length) { + res[i] = this.#transform(text[i], size); + i += 1; + } + return res; + } else { + return [this.#transform(text, 0)]; + } + } + #transform(text: string, size: number): number[] { + const words = this.split(text); + if (!size) size = words.length; + const res = new Array(size); + res.fill(this.vocabulary.get("__pad__") || 0); + let i = 0; + while (i < words.length && i < size) { + if (this.vocabulary.has(words[i])) { + const index = this.vocabulary.get(words[i]); + if (typeof index === "number") { + res[i] = index; + } else { + res[i] = this.vocabulary.get("__unk__") || 0; + } + } else { + res[i] = this.vocabulary.get("__unk__") || 0; + } + i += 1; + } + return res; + } + // TODO: Support custom split modes + split(text: string): string[] { + return text.split(" "); + } +} diff --git a/packages/utilities/src/text/preprocess/transformer/mod.ts b/packages/utilities/src/text/preprocess/transformer/mod.ts new file mode 100644 index 0000000..c10dbfe --- /dev/null +++ b/packages/utilities/src/text/preprocess/transformer/mod.ts @@ -0,0 +1 @@ +export { TfIdfTransformer } from "./tfidf.ts"; diff --git a/packages/utilities/src/text/preprocess/transformer/tfidf.ts b/packages/utilities/src/text/preprocess/transformer/tfidf.ts new file mode 100644 index 0000000..cef8ea4 --- /dev/null +++ b/packages/utilities/src/text/preprocess/transformer/tfidf.ts @@ -0,0 +1,42 @@ +import type { DataType } from "../../../utils/common_types.ts"; +import type { Matrix, MatrixLike } from "../../../mod.ts"; +import { multiplyDiags } from "../../../utils/math.ts"; + +/** Convert tf features (CountVectorizer) into tf-idf features. */ +export class TfIdfTransformer { + idf: null | Float64Array; + constructor({ idf }: { idf?: Float64Array } = {}) { + this.idf = idf ?? null; + } + /** + * Get idf matrix from tf features. + * @param data tf features from CountVectorizer + * @returns Tf-Idf transformer + */ + fit(data: Matrix): TfIdfTransformer { + const shape = { + features: data.nCols, + samples: data.nRows, + }; + const freq = data.rowSum(); + + const idf = new Float64Array(freq.length); + + let i = 0; + while (i < idf.length) { + idf[i] = Math.log(shape.samples / Number(freq[i])) + 1; + i += 1; + } + this.idf = idf; + return this; + } + /** + * Transform an tf features into tf-idf features. + * @param data tf features from CountVectorizer + * @returns Sparse matrix of Tf-Idf features + */ + transform(data: MatrixLike): Matrix { + if (this.idf === null) throw new Error("IDF not initialized yet."); + return multiplyDiags(data, this.idf); + } +} diff --git a/packages/utilities/src/text/preprocess/vectorize/count_vectorizer.ts b/packages/utilities/src/text/preprocess/vectorize/count_vectorizer.ts new file mode 100644 index 0000000..189fe0f --- /dev/null +++ b/packages/utilities/src/text/preprocess/vectorize/count_vectorizer.ts @@ -0,0 +1,40 @@ +import type { DataType, DType } from "../../../utils/common_types.ts"; +import { getConstructor } from "../../../utils/mod.ts"; +import { Matrix } from "../../../mod.ts"; + +/** + * Convert tokens into vectors based on term frequency + */ +export class CountVectorizer { + #vocabSize: number; + constructor(vocabSize: number) { + this.#vocabSize = vocabSize; + } + /** + * Convert a document (string | array of strings) into vectors. + */ + transform(tokens: number[][], dType: T): Matrix { + if (!this.#vocabSize) { + throw new Error("Vocab not initialized."); + } + const res = new Matrix(dType, [tokens.length, this.#vocabSize]); + let i = 0; + while (i < tokens.length) { + res.setRow(i, this.#transform(tokens[i], dType)); + i += 1; + } + return res as Matrix; + } + #transform(tokens: number[], dType: T): DType { + const res = new (getConstructor(dType))(this.#vocabSize); + let i = 0; + while (i < tokens.length) { + if (tokens[i] < this.#vocabSize) { + // @ts-ignore No error here + res[tokens[i]] += typeof res[tokens[i]] === "bigint" ? 1n : 1; + } + i += 1; + } + return res as DType; + } +} diff --git a/packages/utilities/src/text/preprocess/vectorize/mod.ts b/packages/utilities/src/text/preprocess/vectorize/mod.ts new file mode 100644 index 0000000..2f9fa65 --- /dev/null +++ b/packages/utilities/src/text/preprocess/vectorize/mod.ts @@ -0,0 +1,2 @@ +export { CountVectorizer } from "./count_vectorizer.ts"; +export { MultiHotVectorizer } from "./multi_hot.ts"; diff --git a/packages/utilities/src/text/preprocess/vectorize/multi_hot.ts b/packages/utilities/src/text/preprocess/vectorize/multi_hot.ts new file mode 100644 index 0000000..ba7800a --- /dev/null +++ b/packages/utilities/src/text/preprocess/vectorize/multi_hot.ts @@ -0,0 +1,39 @@ +import type { DataType, DType } from "../../../utils/common_types.ts"; +import { getConstructor } from "../../../utils/mod.ts"; +import { Matrix } from "../../../mod.ts"; + +/** + * Convert tokens into vectors based on term frequency + */ +export class MultiHotVectorizer { + #vocabSize: number; + constructor(vocabSize: number) { + this.#vocabSize = vocabSize; + } + /** + * Convert a document (string | array of strings) into vectors. + */ + transform(tokens: number[][], dType: T): Matrix { + if (!this.#vocabSize) { + throw new Error("Vocab not initialized."); + } + const res = new Matrix(dType, [tokens.length, this.#vocabSize]); + let i = 0; + while (i < tokens.length) { + res.setRow(i, this.#transform(tokens[i], dType)); + i += 1; + } + return res as Matrix; + } + #transform(tokens: number[], dType: T): DType { + const res = new (getConstructor(dType))(this.#vocabSize); + let i = 0; + while (i < tokens.length) { + if (tokens[i] < this.#vocabSize) { + res[tokens[i]] = typeof res[tokens[i]] === "bigint" ? 1n : 1; + } + i += 1; + } + return res as DType; + } +} diff --git a/packages/utilities/src/utils/array/mod.ts b/packages/utilities/src/utils/array/mod.ts new file mode 100644 index 0000000..38ed82f --- /dev/null +++ b/packages/utilities/src/utils/array/mod.ts @@ -0,0 +1,7 @@ +/** + * Utilities for working with arrays in appraisal. + * @module + */ +export { useRange, useSeries } from "./range.ts"; +export { useUnique } from "./unique.ts"; +export { useSplit } from "./split.ts"; diff --git a/packages/utilities/src/utils/array/range.ts b/packages/utilities/src/utils/array/range.ts new file mode 100644 index 0000000..f92243c --- /dev/null +++ b/packages/utilities/src/utils/array/range.ts @@ -0,0 +1,38 @@ +/** + * Get n evenly distributed numbers in a range. + * @param n Number of numbers to generate. + * @param min Lower limit of range (inclusive). + * @param max Upper limit of range (exclusive). + * @returns Array of n evenly distributed numbers. + */ +export function useRange(n: number, min = 0, max = 1): number[] { + const res = new Array(n); + let i = 0; + while (i < n) { + res[i] = min + ((i * (max - min)) / n); + i += 1; + } + return res; +} + +/** + * Get an array of numbers between a given range, + * incremented by a step. + * @param min Lower limit of range (inclusive). + * @param max Upper limit of range (exclusive). + * @param step step to increment by + * @returns Array of numbers + */ +export function useSeries(max: number): number[]; +export function useSeries(min: number, max: number, step?: number): number[]; +export function useSeries(min: number, max?: number, step = 1): number[] { + if (typeof max === "undefined") [min, max] = [0, min]; + const res = new Array(~~((max - min) / step)); + res[0] = min; + let i = 1; + while (i < res.length) { + res[i] = res[i - 1] + step; + i += 1; + } + return res; +} diff --git a/packages/utilities/src/utils/array/split.ts b/packages/utilities/src/utils/array/split.ts new file mode 100644 index 0000000..790d0ca --- /dev/null +++ b/packages/utilities/src/utils/array/split.ts @@ -0,0 +1,49 @@ +import type { Sliceable } from "../common_types.ts"; +import { useShuffle } from "../random/shuffle.ts"; + +interface SplitOptions { + ratio: [number, number]; + shuffle?: boolean; +} + +/** Split arrays by their first axis */ +export function useSplit( + options: SplitOptions = { ratio: [7, 3], shuffle: false }, + ...arr: T +): [typeof arr, typeof arr] { + if (!arr.every((x) => x.length === arr[0].length)) { + throw new Error("All arrays must have equal length!"); + } + const { ratio, shuffle } = options; + const idx = Math.floor(arr[0].length * (ratio[0] / (ratio[0] + ratio[1]))); + if (!shuffle) { + return [arr.map((x) => x.slice(0, idx)), arr.map((x) => x.slice(idx))] as [ + T, + T, + ]; + } else { + const shuffled = useShuffle(0, arr[0].length); + const x1 = shuffled.slice(0, idx); + const x2 = shuffled.slice(idx); + return [ + arr.map((x) => + x.filter( + ((_, i, __) => x1.includes(i)) as ( + value: unknown, + index: number, + array: unknown[], + ) => value is typeof x, + ) + ) as typeof arr, + arr.map((x) => + x.filter( + ((_, i, __) => x2.includes(i)) as ( + value: unknown, + index: number, + array: unknown[], + ) => value is typeof x, + ) + ) as typeof arr, + ]; + } +} diff --git a/packages/utilities/src/utils/array/unique.ts b/packages/utilities/src/utils/array/unique.ts new file mode 100644 index 0000000..6a4dcea --- /dev/null +++ b/packages/utilities/src/utils/array/unique.ts @@ -0,0 +1,10 @@ +/** + * Remove duplicate values in an array. + * Uses a strict = for identifying duplicates. + * @param {T[]} arr Array to remove duplicates from. + * @returns {T[]} Array with only unique elements. + */ +export function useUnique(arr: ArrayLike): T[] { + const array = Array.from(arr); + return array.filter((x, i) => array.indexOf(x) === i); +} diff --git a/packages/utilities/src/utils/common_types.ts b/packages/utilities/src/utils/common_types.ts new file mode 100644 index 0000000..d373484 --- /dev/null +++ b/packages/utilities/src/utils/common_types.ts @@ -0,0 +1,231 @@ +import type { Matrix } from "./mod.ts"; + +export type DataType = + | "u8" + | "u16" + | "u32" + | "u64" + | "i8" + | "i16" + | "i32" + | "i64" + | "f32" + | "f64"; + +export interface TypedArrayMapping { + u8: Uint8Array; + u16: Uint16Array; + u32: Uint32Array; + u64: BigUint64Array; + i8: Int8Array; + i16: Int16Array; + i32: Int32Array; + i64: BigInt64Array; + f32: Float32Array; + f64: Float64Array; +} + +export interface TypedArrayConstructorMapping { + u8: Uint8ArrayConstructor; + u16: Uint16ArrayConstructor; + u32: Uint32ArrayConstructor; + u64: BigUint64ArrayConstructor; + i8: Int8ArrayConstructor; + i16: Int16ArrayConstructor; + i32: Int32ArrayConstructor; + i64: BigInt64ArrayConstructor; + f32: Float32ArrayConstructor; + f64: Float64ArrayConstructor; +} + +interface TypedArrayValueMapping { + u8: number; + u16: number; + u32: number; + u64: bigint; + i8: number; + i16: number; + i32: number; + i64: bigint; + f32: number; + f64: number; +} + +export type DTypeValue = T extends + keyof TypedArrayValueMapping ? TypedArrayValueMapping[T] : never; + +type AddableTypes = number | bigint; + +export type AddDTypeValues< + T1 extends AddableTypes, + T2 extends AddableTypes, +> = T1 extends number ? T2 extends number ? number + : T2 extends bigint ? bigint + : never + : T1 extends bigint ? T2 extends number ? bigint + : T2 extends bigint ? bigint + : never + : never; + +export type DType = T extends + keyof TypedArrayMapping ? TypedArrayMapping[T] : never; + +export type DTypeConstructor = + T extends keyof TypedArrayConstructorMapping ? TypedArrayConstructorMapping[T] + : never; + +export type TypedArray = + | Uint8Array + | Uint16Array + | Uint32Array + | BigUint64Array + | Int8Array + | Int16Array + | Int32Array + | BigInt64Array + | Float32Array + | Float64Array; + +export type Constructor = new (length: number) => T; + +export interface Sliceable { + filter( + predicate: ( + value: unknown, + index: number, + array: unknown[], + ) => value is unknown, + ): Sliceable; + slice(start?: number, end?: number): Sliceable; + length: number; +} + +export function getDataType
(data: DType
): DT { + return ( + data instanceof Uint8Array + ? "u8" + : data instanceof Uint16Array + ? "u16" + : data instanceof Uint32Array + ? "u32" + : data instanceof Int8Array + ? "i8" + : data instanceof Int16Array + ? "i16" + : data instanceof Int32Array + ? "i32" + : data instanceof Float32Array + ? "f32" + : data instanceof Float64Array + ? "f64" + : "u8" + ) as DT; // shouldn't reach "u8" +} + +export interface Image2d { + /** Width of the image */ + width: number; + /** Height of the image */ + height: number; + /** + * Number of channels in the image + * For a regular RGBA image, the value + * will be 4. + */ + channels: number; + /** Array of length width * height * channels */ + data: Uint8ClampedArray; +} + +export interface Patch2d { + /** Width of the patch */ + width: number; + /** Height of the patch */ + height: number; +} +export interface PatchCollection extends Patch2d { + /** + * Number of channels in the image + * For a regular RGBA image, the value + * will be 4. + */ + channels: number; + /** Number of patches in the collection */ + size: number; + data: Uint8ClampedArray; +} + +export type Pixel = [number, number, number, number?]; + +export interface StandardizeConfig { + /** Whether to convert everything to lowercase before fitting / transforming */ + lowercase?: boolean; + /** Whether to strip HTML tags */ + stripHtml?: boolean; + /** Whether to replace multiple whitespaces. */ + normalizeWhiteSpaces?: boolean; + /** Strip Newlines */ + stripNewlines?: boolean; +} + +export type VectorizerMode = "count" | "indices" | "multihot" | "tfidf"; + +export type VectorizerModeConfig = + | { + mode: "count"; + config?: Partial; + } + | { + mode: "indices"; + config?: Partial; + } + | { + mode: "multihot"; + config?: Partial; + } + | { + mode: "tfidf"; + config?: Partial; + }; + +export interface TokenizerModeConfig { + mode: "whitespace"; + config?: Partial; +} + +export interface BaseVectorizerOptions { + /** Map words to indices */ + vocabulary: Map; + /** Options for standardizing text */ + standardize: StandardizeConfig | ((s: string) => string); + /** Words to ignore from vocabulary */ + skipWords: "english" | false | string[]; +} + +export interface BaseTokenizerOptions { + /** Map words to indices */ + vocabulary: Map; + /** Options for standardizing text */ + standardize: StandardizeConfig | ((s: string) => string); + /** Words to ignore from vocabulary */ + skipWords: "english" | false | string[]; +} + +export interface Tokenizer { + fit(text: string | string[]): unknown; + transform(text: string | string[]): number[]; +} + +export interface Cleaner { + clean(text: string): string; + clean(text: string[]): string[]; +} + +export interface Vectorizer { + transform(tokens: number[][], dType: T): Matrix; +} + +export interface Transformer { + fit(data: Matrix): Transformer; + transform(data: Matrix): Matrix; +} diff --git a/packages/utilities/src/utils/math.ts b/packages/utilities/src/utils/math.ts new file mode 100644 index 0000000..cf52d1b --- /dev/null +++ b/packages/utilities/src/utils/math.ts @@ -0,0 +1,31 @@ +import type { DataType } from "./common_types.ts"; +import { Matrix, type MatrixLike } from "./misc/matrix.ts"; + +/** A very basic, low-effort multiplication. */ +export function multiplyDiags( + x: MatrixLike, + y: ArrayLike, +): Matrix { + const res = new Matrix(x); + if (y.length !== res.nCols) { + throw new Error( + `Expected diagonal vector of shape (${res.nCols}, 1). Found (${y.length}, 1).`, + ); + } + let i = 0; + while (i < res.nRows) { + const offset = i * res.nCols; + let j = 0; + while (j < y.length) { + res.setCell( + i, + j, + // @ts-ignore types will always match + x.data[offset + j] * (typeof res[0] === "bigint" ? BigInt(y[j]) : y[j]), + ); + j += 1; + } + i += 1; + } + return res as Matrix; +} diff --git a/packages/utilities/src/utils/misc/get_constructor.ts b/packages/utilities/src/utils/misc/get_constructor.ts new file mode 100644 index 0000000..42b30c9 --- /dev/null +++ b/packages/utilities/src/utils/misc/get_constructor.ts @@ -0,0 +1,30 @@ +import type { DataType, DTypeConstructor } from "../common_types.ts"; + +export function getConstructor
( + dType: DT, +): DTypeConstructor
{ + switch (dType) { + case "u8": + return Uint8Array as DTypeConstructor
; + case "u16": + return Uint16Array as DTypeConstructor
; + case "u32": + return Uint32Array as DTypeConstructor
; + case "u64": + return BigUint64Array as DTypeConstructor
; + case "i8": + return Int8Array as DTypeConstructor
; + case "i16": + return Int16Array as DTypeConstructor
; + case "i32": + return Int32Array as DTypeConstructor
; + case "i64": + return BigInt64Array as DTypeConstructor
; + case "f32": + return Float32Array as DTypeConstructor
; + case "f64": + return Float64Array as DTypeConstructor
; + default: + throw new Error(`Unknown data type ${dType}.`); + } +} diff --git a/packages/utilities/src/utils/misc/image.ts b/packages/utilities/src/utils/misc/image.ts new file mode 100644 index 0000000..7e6d2c8 --- /dev/null +++ b/packages/utilities/src/utils/misc/image.ts @@ -0,0 +1,101 @@ +import type { Pixel } from "../common_types.ts"; + +export type ImageData = { + data: Uint8ClampedArray; + width: number; + height: number; + channels: number; // always 4, for compat with retraigo/vectorizer + colorSpace: "srgb" | "display-p3"; +}; + +type ImageOptions = { + data: Uint8ClampedArray; + width: number; + height?: number; + channels?: number; +}; + +export class Image implements ImageData { + data: Uint8ClampedArray; + width: number; + height: number; + channels: number; // always 4, for compat with retraigo/vectorizer + colorSpace: "srgb" | "display-p3"; + constructor(data: ImageOptions) { + this.data = Uint8ClampedArray.from(data.data); + // N-channels is always 4 + this.channels = 4; + this.width = data.width; + this.height = data.height ?? + this.data.length / (this.width * this.channels); + // If height is not an integer or width is incorrect + if (this.height !== ~~this.height) { + throw new TypeError( + `Height must be an integer. Received ${this.height}.`, + ); + } + // Only srgb is supported + this.colorSpace = "srgb"; + } + get pixels(): number { + return this.width * this.height; + } + getNthPixel(n: number): [number, number, number, number] { + const offset = n << 2; + return [ + this.data[offset], + this.data[offset + 1], + this.data[offset + 2], + this.data[offset + 3], + ]; + } + getPixel(row: number, col: number): Pixel { + if (row >= this.height) { + throw new RangeError( + `Requested row ${row} is outside of bounds 0..${this.height}.`, + ); + } + if (col >= this.width) { + throw new RangeError( + `Requested column ${col} is outside of bounds 0..${this.width}.`, + ); + } + const offset = row * this.width + col; + const [r, g, b, a] = this.data.slice(offset, offset + 4); + return [r, g, b, a]; + } + setPixel(row: number, col: number, [r, g, b, a]: Pixel) { + if (row >= this.height) { + throw new RangeError( + `Requested row ${row} is outside of bounds 0..${this.height}.`, + ); + } + if (col >= this.width) { + throw new RangeError( + `Requested column ${col} is outside of bounds 0..${this.width}.`, + ); + } + const offset = row * this.width + col; + this.data.set(typeof a !== "undefined" ? [r, g, b, a] : [r, g, b], offset); + } + updatePixel( + row: number, + col: number, + color: Pixel, + ) { + if (row >= this.height) { + throw new RangeError( + `Requested row ${row} is outside of bounds 0..${this.height}.`, + ); + } + if (col >= this.width) { + throw new RangeError( + `Requested column ${col} is outside of bounds 0..${this.width}.`, + ); + } + const offset = row * this.width + col; + for (let i = 0; i < color.length; i += 1) { + this.data[offset + i] += color[i] ?? 0; + } + } +} diff --git a/packages/utilities/src/utils/misc/matrix.ts b/packages/utilities/src/utils/misc/matrix.ts new file mode 100644 index 0000000..677353a --- /dev/null +++ b/packages/utilities/src/utils/misc/matrix.ts @@ -0,0 +1,304 @@ +/** + * A 2D Tensor with more methods. + * @module + */ + +import type { + AddDTypeValues, + DataType, + DType, + DTypeConstructor, + DTypeValue, + Sliceable, +} from "../common_types.ts"; +import { type NDArray, type Shape, Tensor, type TensorLike } from "./tensor.ts"; + +/** The base type implemented by Matrix */ +export type MatrixLike
= { + /** Raw 1D TypedArray supplied */ + data: DType
; + /** Number of rows, columns */ + shape: Shape<2>; +}; + +/** + * Class for 2D Arrays. + * This is not akin to a mathematical Matrix (a collection of column vectors). + * This is a collection of row vectors. + * A special case of Tensor for 2D data. + */ +export class Matrix
extends Tensor + implements Sliceable, MatrixLike
{ + /** + * Create a matrix from a typed array + * @param data Data to move into the matrix. + * @param shape [rows, columns] of the matrix. + */ + constructor(matrix: TensorLike); + constructor(array: NDArray
[2], dType: DT); + constructor(data: DType
, shape: Shape<2>); + constructor(dType: DT, shape: Shape<2>); + constructor( + data: NDArray
[2] | DType
| DT | TensorLike, + shape?: Shape<2> | DT, + ) { + // @ts-ignore This call will work + super(data, shape); + } + /** Convert the Matrix into a HTML table */ + get html(): string { + let res = "\n"; + res += "idx"; + for (let i = 0; i < this.nCols; i += 1) { + res += `${i}`; + } + res += ""; + let j = 0; + for (const row of this.rows()) { + res += ``; + j += 1; + for (const x of row) { + res += ``; + } + res += ""; + } + res += "
${j}${x}
"; + return res; + } + get length(): number { + return this.nRows; + } + /** Returns number of cols */ + get nCols(): number { + return this.shape[1]; + } + /** Returns number of rows */ + get nRows(): number { + return this.shape[0]; + } + /** Get the transpose of the matrix. This method clones the matrix. */ + get T(): Matrix
{ + const resArr = new (this.data.constructor as DTypeConstructor
)( + this.nRows * this.nCols, + ) as DType
; + let i = 0; + for (const col of this.cols()) { + // @ts-ignore This line will work + resArr.set(col, i * this.nRows); + i += 1; + } + return new Matrix(resArr, this.shape); + } + /** Get a pretty version for printing. DO NOT USE FOR MATRICES WITH MANY COLUMNS. */ + get pretty(): string { + let res = ""; + for (const row of this.rows()) { + res += row.join("\t"); + res += "\n"; + } + return res; + } + /** Alias for row */ + at(pos: number): DType
{ + return this.row(pos); + } + /** Get the nth column in the matrix */ + col(n: number): DType
{ + let i = 0; + const col = new (this.data.constructor as DTypeConstructor
)( + this.nRows, + ) as DType
; + let offset = 0; + while (i < this.nRows) { + col[i] = this.data[offset + n]; + i += 1; + offset += this.nCols; + } + return col; + } + colMean(): DType
{ + const sum = this.colSum(); + let i = 0; + const divisor = ( + typeof this.data[0] === "bigint" ? BigInt(this.nCols) : this.nCols + ) as DTypeValue
; + while (i < sum.length) { + sum[i] = (sum[i] as DTypeValue
) / divisor; + i += 1; + } + return sum; + } + /** Get a column array of all column sums in the matrix */ + colSum(): DType
{ + const sum = new (this.data.constructor as DTypeConstructor
)( + this.nRows, + ) as DType
; + let i = 0; + while (i < this.nCols) { + let j = 0; + while (j < this.nRows) { + // @ts-ignore I'll fix this later + sum[j] = (sum[j] + this.item(j, i)) as AddDTypeValues< + DTypeValue
, + DTypeValue
+ >; + j += 1; + } + i += 1; + } + return sum; + } + /** Get the dot product of two matrices */ + dot(rhs: Matrix
): number | bigint { + if (rhs.nRows !== this.nRows) { + throw new Error("Matrices must have equal rows."); + } + if (rhs.nCols !== this.nCols) { + throw new Error("Matrices must have equal cols."); + } + let res = (typeof this.data[0] === "bigint" ? 0n : 0) as DTypeValue
; + let j = 0; + while (j < this.nCols) { + let i = 0; + while (i < this.nRows) { + const adder = (this.item(i, j) as DTypeValue
) * + (rhs.item(i, j) as DTypeValue
); + // @ts-ignore I'll fix this later + res += adder as DTypeValue
; + i += 1; + } + j += 1; + } + return res; + } + /** Filter the matrix by rows */ + filter( + fn: (value: DType
, row: number, _: DType
[]) => boolean, + ): Matrix
{ + const satisfying: number[] = []; + let i = 0; + while (i < this.nRows) { + if (fn(this.row(i), i, [])) { + satisfying.push(i); + } + i += 1; + } + const matrix = new Matrix(this.dType, [satisfying.length, this.nCols]); + i = 0; + while (i < satisfying.length) { + // @ts-ignore This line will work + matrix.setRow(i, this.row(satisfying[i])); + i += 1; + } + return matrix; + } + /** Get an item using a row and column index */ + item(row: number, col: number): DTypeValue
{ + return this.data[row * this.nCols + col] as DTypeValue
; + } + /** Get the nth row in the matrix */ + row(n: number): DType
{ + return this.data.slice(n * this.nCols, (n + 1) * this.nCols) as DType
; + } + rowMean(): DType
{ + const sum = this.rowSum(); + let i = 0; + const divisor = ( + typeof this.data[0] === "bigint" ? BigInt(this.nRows) : this.nRows + ) as DTypeValue
; + while (i < sum.length) { + sum[i] = (sum[i] as DTypeValue
) / divisor; + i += 1; + } + return sum; + } + /** Compute the sum of all rows */ + rowSum(): DType
{ + const sum = new (this.data.constructor as DTypeConstructor
)( + this.nCols, + ) as DType
; + let i = 0; + let offset = 0; + while (i < this.nRows) { + let j = 0; + while (j < this.nCols) { + // @ts-ignore This line will work + sum[j] += this.data[offset + j]; + j += 1; + } + i += 1; + offset += this.nCols; + } + return sum; + } + /** + * Add a value to an existing element + * Will throw an error if the types mismatch + */ + setAdd(row: number, col: number, val: number | bigint) { + // @ts-expect-error Must provide appropriate number/bigint argument + this.data[row * this.nCols + col] += val; + } + /** Replace a column */ + setCol(col: number, val: ArrayLike): number { + let i = 0; + while (i < this.nRows) { + this.data[i * this.nCols + col] = val[i]; + i += 1; + } + return col; + } + /** Set a value in the matrix */ + setCell(row: number, col: number, val: number) { + this.data[row * this.nCols + col] = val; + } + /** Replace a row */ + setRow(row: number, val: ArrayLike | ArrayLike) { + // @ts-expect-error Must provide appropriate number/bigint argument + this.data.set(val, row * this.nCols); + } + /** Slice matrix by rows */ + slice(start = 0, end?: number): Matrix
{ + return new Matrix
( + this.data.slice( + start ? start * this.nCols : 0, + end ? end * this.nCols : undefined, + ) as DType
, + [end ? end - start : this.nRows - start, this.nCols], + ); + } + /** Iterate through rows */ + *rows(): Generator> { + let i = 0; + while (i < this.nRows) { + yield this.data.slice(i * this.nCols, (i + 1) * this.nCols) as DType
; + i += 1; + } + } + /** Iterate through columns */ + *cols(): Generator> { + let i = 0; + while (i < this.nCols) { + let j = 0; + const col = new (this.data.constructor as DTypeConstructor
)( + this.nRows, + ) as DType
; + while (j < this.nRows) { + col[j] = this.data[j * this.nCols + i]; + j += 1; + } + yield col; + i += 1; + } + } + + [Symbol.for("Jupyter.display")](): Record { + return { + // Plain text content + "text/plain": this.pretty, + + // HTML output + "text/html": this.html, + }; + } +} diff --git a/packages/utilities/src/utils/misc/mod.ts b/packages/utilities/src/utils/misc/mod.ts new file mode 100644 index 0000000..4e1e713 --- /dev/null +++ b/packages/utilities/src/utils/misc/mod.ts @@ -0,0 +1,4 @@ +export * from "./get_constructor.ts"; +export * from "./image.ts"; +export * from "./matrix.ts"; +export * from "./tensor.ts"; diff --git a/packages/utilities/src/utils/misc/tensor.ts b/packages/utilities/src/utils/misc/tensor.ts new file mode 100644 index 0000000..98b738f --- /dev/null +++ b/packages/utilities/src/utils/misc/tensor.ts @@ -0,0 +1,201 @@ +/** + * Multi-dimensional representation of data. + * @module + */ + +import { + type DataType, + type DType, + type DTypeValue, + getDataType, + type Sliceable, +} from "../common_types.ts"; +import { getConstructor } from "./get_constructor.ts"; + +/** Order of the tensor */ +export type Order = 1 | 2 | 3 | 4 | 5 | 6; + +/** The base type implemented by Tensor */ +export interface TensorLike
{ + data: DType
; + shape: Shape; +}; + +/** An array with n items */ +export type Shape = N extends 0 ? [] + : [number, ...number[]] & { length: N }; + +/** nDArray type */ +export interface NDArray
{ + 1: DTypeValue
[]; + 2: DTypeValue
[][]; + 3: DTypeValue
[][][]; + 4: DTypeValue
[][][][]; + 5: DTypeValue
[][][][][]; + 6: DTypeValue
[][][][][][]; +}; + +function getShape( + arr: NDArray
[O], +): Shape { + const shape: number[] = []; + let curr: NDArray
[O] | DTypeValue
= arr; + while (Array.isArray(curr)) { + shape.push(curr.length); + curr = curr[0] as NDArray
[O] | DTypeValue
; + } + return shape as Shape; +} + +/** + * A Tensor of order O. + */ +export class Tensor
+ implements Sliceable, TensorLike { + order: O; + shape: Shape; + data: DType
; + strides: Shape; + dType: DT; + constructor(tensor: TensorLike); + constructor(array: NDArray
[O], dType: DT); + constructor(data: DType
, shape: Shape); + constructor(dType: DT, shape: Shape); + constructor( + data: NDArray
[O] | DType
| DT | TensorLike, + shape?: Shape | DType
, + ) { + if (typeof data === "string") { + if (!shape || !Array.isArray(shape)) { + throw new Error( + `Expected shape to be defined as an Array. Got ${shape}.`, + ); + } else { + this.data = new (getConstructor(data))( + shape.reduce((acc, val) => acc * val, 1), + ) as DType
; + this.shape = shape; + this.order = shape.length as O; + this.dType = data; + this.strides = Tensor.getStrides(this.shape); + } + } else if (Array.isArray(data)) { + if (!shape || typeof shape !== "string") { + throw new Error( + `Expected dType to be defined when using a normal array. Got ${shape}.`, + ); + } else { + this.shape = getShape(data); + this.order = this.shape.length as O; + // @ts-ignore They're mapped correctly + this.data = getConstructor(shape).from( + data.flat(this.shape.length) as DTypeValue
[], + ) as DType
; + this.dType = shape; + this.strides = Tensor.getStrides(this.shape); + } + } else if (ArrayBuffer.isView(data)) { + if (!shape || !Array.isArray(shape)) { + throw new Error( + `Shape must be defined when Tensor is constructed from a TypedArray.`, + ); + } + this.shape = shape; + this.order = this.shape.length as O; + this.data = data; + this.dType = getDataType(data); + this.strides = Tensor.getStrides(this.shape); + } else if (data.shape) { + this.data = data.data; + this.shape = data.shape; + this.dType = getDataType(data.data); + this.order = this.shape.length as O; + this.strides = Tensor.getStrides(this.shape); + } else { + throw new Error("Tensor initialization does not follow any overload."); + } + } + /** For compat with useSplit() */ + get length(): number { + return this.shape[0]; + } + /** Filter the tensor by 0th axis */ + filter( + fn: (value: DType
, row: number, _: DType
[]) => boolean, + ): Tensor { + const satisfying: number[] = []; + let i = 0; + const stride = this.strides[0]; + while (i < this.shape[0]) { + if ( + fn(this.data.slice(stride * i, stride * (i + 1)) as DType
, i, []) + ) { + satisfying.push(i); + } + i += 1; + } + const res = new Tensor(this.dType, [ + satisfying.length, + ...this.shape.slice(1), + ] as Shape); + i = 0; + while (i < satisfying.length) { + res.data.set( + // @ts-ignore This line will work + this.data.slice(stride * satisfying[i], stride * (satisfying[i] + 1)), + i, + ); + i += 1; + } + return res; + } + /** Get an item using indices */ + item(...indices: number[]): DTypeValue
{ + return this.data[ + indices.reduce((acc, val, i) => acc + val * this.strides[i]) + ] as DTypeValue
; + } + /** Slice matrix by axis */ + slice(start = 0, end?: number, axis = 0): Tensor { + if (axis > this.strides.length - 1) { + throw new Error( + `Axis given is ${axis} while highest axis is ${ + this.strides.length - 1 + }.`, + ); + } + if (!end) end = this.shape[axis] - start; + const stride = this.strides[axis - 1] || this.length; + const newStride = (stride / this.shape[axis]) * (end - start); + const res = new Tensor(this.dType, [ + ...this.shape.slice(0, axis), + end - start, + ...this.shape.slice(axis + 1), + ] as Shape); + for (let i = 0; i < this.length / stride; i += 1) { + // @ts-ignore The values will always match + res.data.set( + // @ts-ignore The values will always match + this.data + .slice(stride * i, stride * (i + 1)) + .slice(start * this.strides[axis], end * this.strides[axis]), + newStride * i, + ); + } + return res; + } + toJSON(): { data: DTypeValue
[]; shape: Shape } { + return { + // @ts-ignore I have no idea why TS is doing this + data: Array.from(this.data) as DTypeValue
[], + shape: this.shape, + }; + } + static getStrides(shape: Shape): Shape { + const strides = new Array(shape.length).fill(1); + for (let i = 0; i < shape.length; i += 1) { + strides[i] = shape.slice(i + 1).reduce((acc, val) => acc * val, 1); + } + return strides as Shape; + } +} diff --git a/packages/utilities/src/utils/mod.ts b/packages/utilities/src/utils/mod.ts new file mode 100644 index 0000000..606c346 --- /dev/null +++ b/packages/utilities/src/utils/mod.ts @@ -0,0 +1,3 @@ +export * from "./random/mod.ts"; +export * from "./array/mod.ts"; +export * from "./misc/mod.ts"; diff --git a/packages/utilities/src/utils/random/mod.ts b/packages/utilities/src/utils/random/mod.ts new file mode 100644 index 0000000..31d700f --- /dev/null +++ b/packages/utilities/src/utils/random/mod.ts @@ -0,0 +1,5 @@ +export { useNormal, useNormalArray } from "./normal.ts"; +export { useRearrange } from "./rearrange.ts"; +export { useRNG } from "./rng.ts"; +export { useShuffle } from "./shuffle.ts"; +export { useWeighted } from "./weighted.ts"; diff --git a/packages/utilities/src/utils/random/normal.ts b/packages/utilities/src/utils/random/normal.ts new file mode 100644 index 0000000..4729cea --- /dev/null +++ b/packages/utilities/src/utils/random/normal.ts @@ -0,0 +1,53 @@ +const TWO_PI = Math.PI * 2; + +/** + * A **Normal** or **Gaussian** distribution is a type of + * continuous probability distribution dependent on two + * parameters: + * + * **μ** - The **mean** + * **σ** - The **standard deviation** + * + * This implementation makes use of the popular Box-Muller transform. + */ + +/** + * Generate a normal random variate. + * @param mean Mean of the distribution μ. + * @param stddev Standard Deviation of the distribution σ. + * @returns A normal random variate. + */ +export function useNormal( + mean: number, + stddev: number, +): [number, number] { + const u = [Math.random(), Math.random()]; + + const m = Math.sqrt(-2.0 * Math.log(u[0])); + return [ + (stddev * m * Math.cos(TWO_PI * u[1])) + mean, + (stddev * m * Math.sin(TWO_PI * u[1])) + mean, + ]; +} + +/** + * Generate a normally distributed array. + * @param mean Mean of the distribution μ. + * @param variance Variance of the distribution σ^2. + * @returns A normally distributed array. + */ + +export function useNormalArray( + num: number, + mean: number, + variance: number, +): Float32Array { + const result = new Float32Array(num); + let i = 0; + const stddev = Math.sqrt(variance); + while (i < num) { + result[i] = useNormal(mean, stddev)[0]; + ++i; + } + return result; +} diff --git a/packages/utilities/src/utils/random/rearrange.ts b/packages/utilities/src/utils/random/rearrange.ts new file mode 100644 index 0000000..7e980fc --- /dev/null +++ b/packages/utilities/src/utils/random/rearrange.ts @@ -0,0 +1,13 @@ +import { useShuffle } from "./shuffle.ts"; + +/** + * Rearrange characters in a string randomly. + * @param {number|string} n Number / String to rearrange. + * @returns {number|string} Number / String rearranged randomly. + */ +export function useRearrange(n: number | string): number | string { + const res = (typeof n === "number" ? n.toString() : n).split(""); + const shuffled = useShuffle(res).join(""); + return typeof n === "number" ? Number(shuffled) : shuffled; +} +export default useRearrange; diff --git a/packages/utilities/src/utils/random/rng.ts b/packages/utilities/src/utils/random/rng.ts new file mode 100644 index 0000000..2ac97a3 --- /dev/null +++ b/packages/utilities/src/utils/random/rng.ts @@ -0,0 +1,15 @@ +/** + * Get random number from range + * @param min Min value of range. + * @param max Max value of range. + * @param allowDecimals Whether to allow decimal point in result. + * @returns Random number + */ +export function useRNG( + min: number, + max: number, + allowDecimals = false, +): number { + const rng = min + (Math.random() * (max - min)); + return allowDecimals ? rng : Math.round(rng); +} diff --git a/packages/utilities/src/utils/random/shuffle.ts b/packages/utilities/src/utils/random/shuffle.ts new file mode 100644 index 0000000..0c3daec --- /dev/null +++ b/packages/utilities/src/utils/random/shuffle.ts @@ -0,0 +1,39 @@ +import { useSeries } from "../array/range.ts"; + +/** + * Shuffle a given array in-place. + * @param arr Array to shuffle + */ +export function useShuffle(arr: T[]): T[]; +/** Get a shuffled array of numbers from 0 to the given number */ +export function useShuffle(max: number): number[]; +/** Get a shuffled array of numbers between the given range */ +export function useShuffle(min: number, max: number): number[]; +export function useShuffle( + maybeArr: number | T[], + max?: number, +): T[] | number[] { + if (Array.isArray(maybeArr)) { + const idx = useShuffle(0, maybeArr.length); + const res = new Array(maybeArr.length); + let i = 0; + while (i < maybeArr.length) { + res[i] = maybeArr[idx[i]]; + i += 1; + } + return res; + } else { + const min = typeof max !== "undefined" ? maybeArr : 0; + const max1 = typeof max !== "undefined" ? max : maybeArr; + + const arr = useSeries(min, max1); + + let i = arr.length - 1; + while (i >= 1) { + const j = Math.floor(Math.random() * i); + [arr[i], arr[j]] = [arr[j], arr[i]]; + i -= 1; + } + return arr; + } +} diff --git a/packages/utilities/src/utils/random/weighted.ts b/packages/utilities/src/utils/random/weighted.ts new file mode 100644 index 0000000..ff70989 --- /dev/null +++ b/packages/utilities/src/utils/random/weighted.ts @@ -0,0 +1,29 @@ +// Check out https://github.com/retraigo/fortuna + +export interface WeightedChoice { + result: ItemType; + chance: number; +} + +/** + * Roll one from an array of weighted choices. + * @param {WeightedChoice[]} choices - Choices to roll from. + * @param {number} totalChance - Sum of all chance properties. + * @returns {WeightedChoice} Item rolled. + */ + +export function useWeighted( + choices: WeightedChoice[], +): WeightedChoice { + const total = choices.reduce( + (acc: number, val: WeightedChoice) => acc + val.chance, + 0, + ); + const result = Math.random() * total; + let going = 0.0; + for (let i = 0; i < choices.length; ++i) { + going += choices[i].chance; + if (result < going) return choices[i]; + } + return choices[Math.floor(Math.random() * choices.length)]; +} diff --git a/src/backends/wasm/lib/netsaur_bg.wasm b/src/backends/wasm/lib/netsaur_bg.wasm deleted file mode 100644 index 99ea6d9..0000000 Binary files a/src/backends/wasm/lib/netsaur_bg.wasm and /dev/null differ diff --git a/tokenizers/lib/netsaur_tokenizers_bg.wasm b/tokenizers/lib/netsaur_tokenizers_bg.wasm deleted file mode 100644 index 2782196..0000000 Binary files a/tokenizers/lib/netsaur_tokenizers_bg.wasm and /dev/null differ diff --git a/web.ts b/web.ts index d588f70..8760c53 100644 --- a/web.ts +++ b/web.ts @@ -1,10 +1,10 @@ -export { setupBackend } from "./src/core/engine.ts"; -export * from "./src/core/mod.ts"; -export * from "./src/core/types.ts"; -export * from "./src/core/tensor/tensor.ts"; -export * from "./src/core/api/layers.ts"; -export * from "./src/core/api/shape.ts"; -export * from "./src/core/api/network.ts"; -export * from "./src/core/api/optimizer.ts"; +export { setupBackend } from "./packages/core/core/engine.ts"; +export * from "./packages/core/core/mod.ts"; +export * from "./packages/core/core/types.ts"; +export * from "./packages/core/core/tensor/tensor.ts"; +export * from "./packages/core/core/api/layers.ts"; +export * from "./packages/core/core/api/shape.ts"; +export * from "./packages/core/core/api/network.ts"; +export * from "./packages/core/core/api/optimizer.ts"; -export { WASM } from "./src/backends/wasm/mod.ts"; +export { WASM } from "./packages/core/backends/wasm/mod.ts";