Skip to content

Commit

Permalink
fix: crawler instances with different StorageClients do not affect ea…
Browse files Browse the repository at this point in the history
…ch other (#2056)
  • Loading branch information
barjin committed Aug 29, 2023
1 parent 202566e commit 3f4c863
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 10 deletions.
8 changes: 5 additions & 3 deletions packages/core/src/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import type { MemoryStorageOptions } from '@crawlee/memory-storage';
import type { Dictionary, StorageClient } from '@crawlee/types';
import { pathExistsSync, readFileSync } from 'fs-extra';

import type { EventManager } from './events';
import { LocalEventManager } from './events';
import { entries } from './typedefs';
import { LocalEventManager, type EventManager } from './events';
import type { StorageManager } from './storages';
import { entries, type Constructor } from './typedefs';

export interface ConfigurationOptions {
/**
Expand Down Expand Up @@ -280,6 +280,8 @@ export class Configuration {
/** @internal */
static globalConfig?: Configuration;

public readonly storageManagers = new Map<Constructor, StorageManager>();

/**
* Creates new `Configuration` instance with provided options. Env vars will have precedence over those.
*/
Expand Down
13 changes: 6 additions & 7 deletions packages/core/src/storages/storage_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ export interface IStorage {
* @ignore
*/
export class StorageManager<T extends IStorage = IStorage> {
private static readonly storageManagers = new Map<Constructor, StorageManager>();
private readonly name: 'Dataset' | 'KeyValueStore' | 'RequestQueue';
private readonly StorageConstructor: Constructor<T> & { name: string };
private readonly cache = new Map<string, T>();
Expand All @@ -48,24 +47,24 @@ export class StorageManager<T extends IStorage = IStorage> {
storageClass: Constructor<T>,
config = Configuration.getGlobalConfig(),
): StorageManager<T> {
if (!this.storageManagers.has(storageClass)) {
if (!config.storageManagers.has(storageClass)) {
const manager = new StorageManager(storageClass, config);
this.storageManagers.set(storageClass, manager);
config.storageManagers.set(storageClass, manager);
}

return this.storageManagers.get(storageClass) as StorageManager<T>;
return config.storageManagers.get(storageClass) as StorageManager<T>;
}

/** @internal */
static clearCache(): void {
this.storageManagers.forEach((manager) => {
static clearCache(config = Configuration.getGlobalConfig()): void {
config.storageManagers.forEach((manager) => {
if (manager.name === 'KeyValueStore') {
manager.cache.forEach((item) => {
(item as Dictionary).clearCache?.();
});
}
});
this.storageManagers.clear();
config.storageManagers.clear();
}

async openStorage(idOrName?: string | null, client?: StorageClient): Promise<T> {
Expand Down
31 changes: 31 additions & 0 deletions test/core/multiple_crawlers.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { MemoryStorage } from '@crawlee/memory-storage';
import { CheerioCrawler, Configuration } from 'crawlee';

describe('multiple crawlers', () => {
test('Crawler instances with different StorageClients do not affect each other', async () => {
const getCrawler = () => {
return new CheerioCrawler({
requestHandler: async () => {},
}, new Configuration({
storageClient: new MemoryStorage({
persistStorage: false,
}),
}));
};

const a = getCrawler();

await a.run([
{ url: 'https://example.org/' },
]);

const b = getCrawler();

await b.run([
{ url: 'https://example.org/' },
]);

expect(a.stats.state.requestsFinished).toBe(1);
expect(b.stats.state.requestsFinished).toBe(1);
});
});

0 comments on commit 3f4c863

Please sign in to comment.