From f00c1742b9368726f7d30b0e976ad627b7e0c84e Mon Sep 17 00:00:00 2001 From: benbdeitch Date: Wed, 24 Jul 2024 20:48:54 -0400 Subject: [PATCH 1/7] Altered BulkSearch to handle abstractors in their own class, rather than as a part of the vue component. From d3fff1f33f8422bedc9e7d59e14f0ca91d307ee6 Mon Sep 17 00:00:00 2001 From: benbdeitch Date: Wed, 7 Aug 2024 17:47:30 -0400 Subject: [PATCH 2/7] Added support for tabular input in Bulk Search --- .../components/BulkSearch/utils/classes.js | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/openlibrary/components/BulkSearch/utils/classes.js b/openlibrary/components/BulkSearch/utils/classes.js index 4cacc2e0944..75c2a662615 100644 --- a/openlibrary/components/BulkSearch/utils/classes.js +++ b/openlibrary/components/BulkSearch/utils/classes.js @@ -122,10 +122,52 @@ export class AiExtractor extends AbstractExtractor{ } } +export class TableExtractor extends AbstractExtractor{ + /** + * + * @param {string} name + */ + constructor(name) { + super(name) + } + + /** + * @param {ExtractionOptions} extractionOptions + * @param {string} text + * @return {Promise} + */ + async run(extractionOptions, text){ + + /** @type {string[]} */ + const lines = text.split('\n') + /** @type {RegExp} */ + const matcher = /([\w ]*)\t/g + /** @type {string[][]} */ + const cells = lines.map(textLine => [...textLine.matchAll(matcher)].map(entry => entry[1])) + try { + /** @type {number} */ + const authorIndex = cells[0].findIndex(columnName => columnName.trim().toLowerCase() === extractionOptions.authorColumn) + /** @type {number} */ + const titleIndex = cells[0].findIndex(columnName => columnName.trim().toLowerCase() === extractionOptions.titleColumn) + if (titleIndex < 0){ + throw new Error(`Please have one column named ${extractionOptions.titleColumn} and (optionally) one column named ${extractionOptions.authorColumn}`) + } + + return cells.slice(1).filter(row=> row[titleIndex]!== '').map(row => new BookMatch(new ExtractedBook(row[titleIndex], row[authorIndex]), {})) + } + catch (error){ + return [] + } + } +} class ExtractionOptions { constructor() { /** @type {string} */ this.openaiApiKey = '' + /** @type {string} */ + this.authorColumn = 'author' + /** @type {string} */ + this.titleColumn = 'title' } } class MatchOptions { @@ -169,7 +211,8 @@ export class BulkSearchState{ new RegexExtractor('e.g. "The Wizard of Oz - L. Frank Baum"', '(^|>)(?[A-Za-z][\\p{L}0-9\\- ,]{1,250})\\s+[,-\u2013\u2014\\t]\\s+(?<author>[\\p{L}][\\p{L}\\.\\- ]{3,70})( \\(.*)?($|<\\/)'), new RegexExtractor('e.g. "The Wizard of Oz (L. Frank Baum)"', '^(?<title>[\\p{L}].{1,250})\\s\\(?<author>(.{3,70})\\)$$'), new RegexExtractor('Wikipedia Citation (e.g. Baum, Frank L. (1994). The Wizard of Oz)', '^(?<author>[^.()]+).*?\\)\\. (?<title>[^.]+)'), - new AiExtractor('✨ AI Extraction', 'gpt-4o-mini') + new AiExtractor('✨ AI Extraction', 'gpt-4o-mini'), + new TableExtractor('Extract from a Table/Spreadsheet') ] /** @type {Number} */ this._activeExtractorIndex = 0 From 1f4de747aeb54498f43ffcdd6afe0eddd5ea07fb Mon Sep 17 00:00:00 2001 From: benbdeitch <benbdeitch@gmail.com> Date: Wed, 7 Aug 2024 17:57:37 -0400 Subject: [PATCH 3/7] Added text to inform users what to title their columns. --- .../BulkSearch/components/BulkSearchControls.vue | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/openlibrary/components/BulkSearch/components/BulkSearchControls.vue b/openlibrary/components/BulkSearch/components/BulkSearchControls.vue index d35b86b9c7b..f8d1896e126 100644 --- a/openlibrary/components/BulkSearch/components/BulkSearchControls.vue +++ b/openlibrary/components/BulkSearch/components/BulkSearchControls.vue @@ -5,6 +5,7 @@ <details open class="bulk-search-controls"> <summary>Input</summary> <div> + <p v-if="showColumnHint"> Please name your relevant columns "Title", "Author".</p> <textarea v-model="bulkSearchState.inputText"></textarea> <br /> <label>Format: <select v-model="bulkSearchState._activeExtractorIndex"> @@ -74,7 +75,11 @@ export default { matchBooksText(){ if (this.loadingMatchedBooks) return 'Loading...' return 'Match Books' - } + }, + showColumnHint(){ + if (this.bulkSearchState.activeExtractor) return this.bulkSearchState.activeExtractor.isTable + return false + }, }, methods: { togglePasswordVisibility(){ From 000d8a300d1f340da1e0f348a6c1ce1d37c53603 Mon Sep 17 00:00:00 2001 From: benbdeitch <benbdeitch@gmail.com> Date: Wed, 7 Aug 2024 18:05:52 -0400 Subject: [PATCH 4/7] Added an 'isTable' attribute to the tableExtractor, to more easily identify when it's the active extractor of BulkSearchState. --- openlibrary/components/BulkSearch/utils/classes.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openlibrary/components/BulkSearch/utils/classes.js b/openlibrary/components/BulkSearch/utils/classes.js index 75c2a662615..1f854f1537a 100644 --- a/openlibrary/components/BulkSearch/utils/classes.js +++ b/openlibrary/components/BulkSearch/utils/classes.js @@ -129,6 +129,8 @@ export class TableExtractor extends AbstractExtractor{ */ constructor(name) { super(name) + /** @type {boolean} */ + this.isTable = true } /** From e6a015a27f84e91528a0be31f92710451c26351f Mon Sep 17 00:00:00 2001 From: benbdeitch <benbdeitch@gmail.com> Date: Wed, 28 Aug 2024 12:58:02 -0400 Subject: [PATCH 5/7] Misc changes --- .../components/BulkSearchControls.vue | 6 +-- .../components/BulkSearch/utils/classes.js | 47 +++++++++---------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/openlibrary/components/BulkSearch/components/BulkSearchControls.vue b/openlibrary/components/BulkSearch/components/BulkSearchControls.vue index f8d1896e126..5f40425aff1 100644 --- a/openlibrary/components/BulkSearch/components/BulkSearchControls.vue +++ b/openlibrary/components/BulkSearch/components/BulkSearchControls.vue @@ -5,12 +5,12 @@ <details open class="bulk-search-controls"> <summary>Input</summary> <div> - <p v-if="showColumnHint"> Please name your relevant columns "Title", "Author".</p> + <p v-if="showColumnHint">Please include a header row. Supported columns include: "Title", "Author".</p> <textarea v-model="bulkSearchState.inputText"></textarea> <br /> <label>Format: <select v-model="bulkSearchState._activeExtractorIndex"> <option v-for="extractor, index in bulkSearchState.extractors" :value = "index" :key="index"> - {{ extractor["name"] }} + {{ extractor["label"] }} </option> </select></label> <label v-if="this.showApiKey">OpenAI API Key: @@ -77,7 +77,7 @@ export default { return 'Match Books' }, showColumnHint(){ - if (this.bulkSearchState.activeExtractor) return this.bulkSearchState.activeExtractor.isTable + if (this.bulkSearchState.activeExtractor) return this.bulkSearchState.activeExtractor.name === 'table_extractor' return false }, }, diff --git a/openlibrary/components/BulkSearch/utils/classes.js b/openlibrary/components/BulkSearch/utils/classes.js index 1f854f1537a..cc17411d4c2 100644 --- a/openlibrary/components/BulkSearch/utils/classes.js +++ b/openlibrary/components/BulkSearch/utils/classes.js @@ -11,12 +11,13 @@ export class ExtractedBook { } class AbstractExtractor { + /** - * @param {string} name + * @param {string} label */ - constructor(name) { + constructor(label) { /** @type {string} */ - this.name = name + this.label = label } /** * @param {ExtractionOptions} _extractOptions @@ -32,11 +33,11 @@ export class RegexExtractor extends AbstractExtractor { /** * - * @param {string} name + * @param {string} label * @param {string} pattern */ - constructor(name, pattern){ - super(name) + constructor(label, pattern){ + super(label) /** @type {RegExp} */ this.pattern = new RegExp(pattern, 'gmu'); } @@ -57,11 +58,11 @@ export class RegexExtractor extends AbstractExtractor { export class AiExtractor extends AbstractExtractor{ /** - * @param {string} name + * @param {string} label * @param {string} model */ - constructor(name, model) { - super(name) + constructor(label, model) { + super(label) /** @type {string} */ this.model = model } @@ -123,14 +124,18 @@ export class AiExtractor extends AbstractExtractor{ } export class TableExtractor extends AbstractExtractor{ + + name = 'table_extractor' /** * - * @param {string} name + * @param {string} label */ - constructor(name) { - super(name) - /** @type {boolean} */ - this.isTable = true + constructor(label) { + super(label) + /** @type {string} */ + this.authorColumn = 'author' + /** @type {string} */ + this.titleColumn = 'title' } /** @@ -142,17 +147,15 @@ export class TableExtractor extends AbstractExtractor{ /** @type {string[]} */ const lines = text.split('\n') - /** @type {RegExp} */ - const matcher = /([\w ]*)\t/g /** @type {string[][]} */ - const cells = lines.map(textLine => [...textLine.matchAll(matcher)].map(entry => entry[1])) + const cells = lines.map(line => line.split('\t')) try { /** @type {number} */ - const authorIndex = cells[0].findIndex(columnName => columnName.trim().toLowerCase() === extractionOptions.authorColumn) + const authorIndex = cells[0].findIndex(columnName => columnName.trim().toLowerCase() === this.authorColumn) /** @type {number} */ - const titleIndex = cells[0].findIndex(columnName => columnName.trim().toLowerCase() === extractionOptions.titleColumn) + const titleIndex = cells[0].findIndex(columnName => columnName.trim().toLowerCase() === this.titleColumn) if (titleIndex < 0){ - throw new Error(`Please have one column named ${extractionOptions.titleColumn} and (optionally) one column named ${extractionOptions.authorColumn}`) + throw new Error(`Please have one column named ${this.titleColumn} and (optionally) one column named ${this.authorColumn}`) } return cells.slice(1).filter(row=> row[titleIndex]!== '').map(row => new BookMatch(new ExtractedBook(row[titleIndex], row[authorIndex]), {})) @@ -166,10 +169,6 @@ class ExtractionOptions { constructor() { /** @type {string} */ this.openaiApiKey = '' - /** @type {string} */ - this.authorColumn = 'author' - /** @type {string} */ - this.titleColumn = 'title' } } class MatchOptions { From d1066ac7d60d6efc642720a36c17256c4757d65d Mon Sep 17 00:00:00 2001 From: benbdeitch <benbdeitch@gmail.com> Date: Wed, 28 Aug 2024 16:31:26 -0400 Subject: [PATCH 6/7] Adjusted table extractor for better clarity. --- .../components/BulkSearchControls.vue | 2 +- .../components/BulkSearch/utils/classes.js | 32 ++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/openlibrary/components/BulkSearch/components/BulkSearchControls.vue b/openlibrary/components/BulkSearch/components/BulkSearchControls.vue index 5f40425aff1..02f2d081e50 100644 --- a/openlibrary/components/BulkSearch/components/BulkSearchControls.vue +++ b/openlibrary/components/BulkSearch/components/BulkSearchControls.vue @@ -10,7 +10,7 @@ <br /> <label>Format: <select v-model="bulkSearchState._activeExtractorIndex"> <option v-for="extractor, index in bulkSearchState.extractors" :value = "index" :key="index"> - {{ extractor["label"] }} + {{ extractor.label }} </option> </select></label> <label v-if="this.showApiKey">OpenAI API Key: diff --git a/openlibrary/components/BulkSearch/utils/classes.js b/openlibrary/components/BulkSearch/utils/classes.js index cc17411d4c2..1523c838e61 100644 --- a/openlibrary/components/BulkSearch/utils/classes.js +++ b/openlibrary/components/BulkSearch/utils/classes.js @@ -31,6 +31,7 @@ class AbstractExtractor { export class RegexExtractor extends AbstractExtractor { + name = 'regex_extractor' /** * * @param {string} label @@ -57,6 +58,7 @@ export class RegexExtractor extends AbstractExtractor { export class AiExtractor extends AbstractExtractor{ + name = 'ai_extractor' /** * @param {string} label * @param {string} model @@ -149,22 +151,28 @@ export class TableExtractor extends AbstractExtractor{ const lines = text.split('\n') /** @type {string[][]} */ const cells = lines.map(line => line.split('\t')) - try { - /** @type {number} */ - const authorIndex = cells[0].findIndex(columnName => columnName.trim().toLowerCase() === this.authorColumn) - /** @type {number} */ - const titleIndex = cells[0].findIndex(columnName => columnName.trim().toLowerCase() === this.titleColumn) - if (titleIndex < 0){ - throw new Error(`Please have one column named ${this.titleColumn} and (optionally) one column named ${this.authorColumn}`) - } - - return cells.slice(1).filter(row=> row[titleIndex]!== '').map(row => new BookMatch(new ExtractedBook(row[titleIndex], row[authorIndex]), {})) + /** @type {{columns: String[], rows: {columnName: string}[]}} */ + const tableData = { + columns: cells[0], + rows: [] } - catch (error){ - return [] + for (let i=1; i< cells.length; i++){ + const row = {} + for (let j = 0; j < tableData.columns.length; j++){ + row[tableData.columns[j].trim().toLowerCase()] = cells[i][j] + } + // @ts-ignore + tableData.rows.push(row) } + return tableData.rows.map( + row => new BookMatch( + new ExtractedBook( + row[this.authorColumn] || '', row[this.titleColumn] || ''), + {}) + ) } } + class ExtractionOptions { constructor() { /** @type {string} */ From b7538d8c4e3f9439f633ae705a37a6805cc00d41 Mon Sep 17 00:00:00 2001 From: Drini Cami <cdrini@gmail.com> Date: Wed, 4 Sep 2024 16:34:00 +0200 Subject: [PATCH 7/7] Fix ExtractedBook called with arguments backwards --- openlibrary/components/BulkSearch/utils/classes.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/components/BulkSearch/utils/classes.js b/openlibrary/components/BulkSearch/utils/classes.js index 1523c838e61..774e84109db 100644 --- a/openlibrary/components/BulkSearch/utils/classes.js +++ b/openlibrary/components/BulkSearch/utils/classes.js @@ -167,7 +167,7 @@ export class TableExtractor extends AbstractExtractor{ return tableData.rows.map( row => new BookMatch( new ExtractedBook( - row[this.authorColumn] || '', row[this.titleColumn] || ''), + row[this.titleColumn] || '', row[this.authorColumn] || ''), {}) ) }