Skip to content

Commit

Permalink
feat: async batched validation
Browse files Browse the repository at this point in the history
  • Loading branch information
sidwebworks committed Sep 8, 2022
1 parent 1c76393 commit 45d2758
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 40 deletions.
45 changes: 26 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@


# ZOD-XLSX

[![npm version](https://badgen.net/npm/v/zod-xlsx)](https://www.npmjs.com/package/zod-xlsx)
![npm downloads](https://badgen.net/npm/dt/zod-xlsx)
[![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release)


> A xlsx based resource validator using Zod schemas
**Supports both ESM and CJS**

## Installation
> Note:

> Note:
> This package requires [Zod](https://www.npmjs.com/package/zod) and [xlsx](https://www.npmjs.com/package/xlsx) as peer dependencies
```bash
Expand All @@ -28,7 +26,7 @@ pnpm add zod-xlsx xlsx zod

## Usage

The library exports a single function called `createValidator` which takes in a xlsx workbook and creates a validator object.
The library exports a single function called `createValidator` which takes in a xlsx workbook and creates a validator object.

Please make sure your top row of the sheet (xlsx or xls) file contains only header content for the columns as it's required for the library to function properly.

Expand All @@ -38,43 +36,43 @@ import xlsx from "xlsx"

const workbook = xlsx.readFile(/*path to your file*/)

const validator = createValidator(workbook);
const validator = createValidator(workbook)

const schema = z.object({
'First Name': z.string(),
'Last Name': z.string(),
Gender: z.enum(['Male', 'Female']),
"First Name": z.string(),
"Last Name": z.string(),
Gender: z.enum(["Male", "Female"]),
Country: z.string(),
Age: z.number(),
Date: z.string(),
Id: z.number(),
});
})

const result = await validator.validate(schema);
const result = validator.validate(schema)
```

**OUTPUT**

```js
{
valid: [
{
valid: [
{ issues: [], isValid: true, data: [Object] },
{ issues: [], isValid: true, data: [Object] },
{ issues: [], isValid: true, data: [Object] },
{ issues: [], isValid: true, data: [Object] },
]
invalid: [
]
invalid: [
{ issues: [Object], isValid: false, data: [Object] },
{ issues: [Object], isValid: false, data: [Object] },
{ issues: [Object], isValid: false, data: [Object] },
]
}
]
}
```



## API Reference

### **createValidator**

Function to create a new validator object with the given workbook.
It takes an options object as the second arguement.

Expand All @@ -100,7 +98,16 @@ export interface ValidatorOptions {

> For details on what each of the xlsx option does can be found: [Here](https://docs.sheetjs.com/docs/api/utilities#json)
### **validator.validate()**

Synchronously parses all the rows against the given schema and returns the result.

### **validator.validateAsync()**

Asynchronously parses all the rows against the given schema _without blocking the event loop_, it does this using batch processing.
(500 is the default batch size)

Depending on your usecase, its possible to configure the `batchSize` like so: `validateAsync(schema, { batchSize: 500 })`.

## License

Expand Down
4 changes: 1 addition & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@
},
"types": "./dist/index.d.ts",
"scripts": {
"postinstall": "npx husky-init",
"build": "NODE_ENV=production tsup --dts-resolve",
"dev": "NODE_ENV=development tsup --watch",
"test": "vitest run",
"test:watch": "vitest",
"coverage": "vitest --coverage",
"prepublishOnly": "pnpm run build",
"prepare": "husky install"
"prepublishOnly": "pnpm run build"
},
"devDependencies": {
"@commitlint/cli": "17.0.2",
Expand Down
37 changes: 30 additions & 7 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { utils, WorkBook } from "xlsx"
import { ZodError, ZodSchema } from "zod"
import type { Result, ValidatorOptions } from "./types"
import type { Resource, Result, ValidatorOptions } from "./types"
import { defaultsOptions, toObject } from "./utils"

function createValidator(workbook: WorkBook, opts?: ValidatorOptions) {
Expand All @@ -26,7 +26,7 @@ function createValidator(workbook: WorkBook, opts?: ValidatorOptions) {
})

const parse = (row: any, schema: ZodSchema) => {
const data = toObject(row, header)
const data = toObject(row, header as string[])
try {
schema.parse(data)
options.onValid && options.onValid(data)
Expand All @@ -40,12 +40,35 @@ function createValidator(workbook: WorkBook, opts?: ValidatorOptions) {
}
}

const validateAsync = (schema: ZodSchema): Promise<Result> => {
const result = rows.map((row) => parse(row, schema))
const validateAsync = (
schema: ZodSchema,
options?: { batchSize: number },
): Promise<Result> => {
const batchSize = options?.batchSize ?? 500

return Promise.resolve({
valid: result.filter((r) => r.isValid),
invalid: result.filter((r) => !r.isValid),
return new Promise<Result>((resolve) => {
let i = 1
const result: Resource[] = []

async function parseBatch() {
const batch = rows.slice(i, i + batchSize)
i += batchSize

if (!batch.length) {
return resolve({
valid: result.filter((r) => r.isValid),
invalid: result.filter((r) => !r.isValid),
})
}

const processed = batch.map((row) => parse(row, schema))

result.push(...processed)

setTimeout(parseBatch, 0)
}

parseBatch()
})
}

Expand Down
2 changes: 1 addition & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import { Sheet2JSONOpts } from "xlsx"
import type { ZodIssue } from "zod"

type Resource = {
export type Resource = {
issues: ZodIssue[]
isValid: boolean
data: Record<string, any>
Expand Down
39 changes: 29 additions & 10 deletions tests/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ it("returns an array of invalid items", async () => {

const validator = createValidator(workbook)

const result = await validator.validate(badSchema)
const result = validator.validate(badSchema)

assert.isNotEmpty(result.invalid)
assert.isEmpty(result.valid)
Expand All @@ -55,7 +55,7 @@ it("returns an array of valid items", async () => {

const validator = createValidator(workbook)

const result = await validator.validate(schema)
const result = validator.validate(schema)

assert.isNotEmpty(result.valid)
assert.isEmpty(result.invalid)
Expand All @@ -74,17 +74,36 @@ it("calls the onValid hook when an item is valid", async () => {
Id: z.number(),
})

const spy = vitest.spyOn(console, "log")
const fn = vitest.fn(() => {})

const validator = createValidator(workbook, {
onValid: () => {
console.log("VALID")
},
onValid: fn,
})

const result = await validator.validate(schema)
const result = validator.validate(schema)

assert.strictEqual(spy.mock.calls.length, result.valid.length)
assert.strictEqual(fn.mock.calls.length, result.valid.length)

spy.mockClear()
})
fn.mockClear()
})

it("processes batches asynchronously", async () => {
const workbook = readFile(path.join(__dirname, "./mocks/demo.xls"))

const schema = z.object({
"First Name": z.string(),
"Last Name": z.string(),
Gender: z.enum(["Male", "Female"]),
Country: z.string(),
Age: z.number(),
Date: z.string(),
Id: z.number(),
})

const validator = createValidator(workbook)

const result = await validator.validateAsync(schema, { batchSize: 250 })

assert.isNotEmpty(result.valid)
assert.isEmpty(result.invalid)
})

0 comments on commit 45d2758

Please sign in to comment.