From 65474616379e1dacc1329b3bdc5eb14f34428bb1 Mon Sep 17 00:00:00 2001 From: Mihajlo Medjedovic Date: Fri, 9 Aug 2024 16:09:53 +0200 Subject: [PATCH] fix(multi load): xlsx read file ahead of time, while user choose datasets --- client/src/app/app.component.ts | 4 +- .../src/app/models/ParseParams.interface.ts | 6 + .../multi-dataset.component.html | 30 +- .../multi-dataset/multi-dataset.component.ts | 134 ++- .../src/app/services/spreadsheet.service.ts | 33 +- .../spreadsheet-util/spreadsheet-util.ts | 778 ++++++++++-------- 6 files changed, 579 insertions(+), 406 deletions(-) diff --git a/client/src/app/app.component.ts b/client/src/app/app.component.ts index 335e187..27b9ec0 100644 --- a/client/src/app/app.component.ts +++ b/client/src/app/app.component.ts @@ -18,6 +18,7 @@ import { ClarityIcons, exclamationTriangleIcon, moonIcon, + processOnVmIcon, sunIcon, tableIcon, trashIcon @@ -28,7 +29,8 @@ ClarityIcons.addIcons( sunIcon, exclamationTriangleIcon, tableIcon, - trashIcon + trashIcon, + processOnVmIcon ) @Component({ diff --git a/client/src/app/models/ParseParams.interface.ts b/client/src/app/models/ParseParams.interface.ts index f5d9e8e..442b2fd 100644 --- a/client/src/app/models/ParseParams.interface.ts +++ b/client/src/app/models/ParseParams.interface.ts @@ -2,11 +2,17 @@ import { DcValidator } from '../shared/dc-validator/dc-validator' import { FileUploadEncoding } from './FileUploadEncoding' import { FileUploader } from './FileUploader.class' import { ExcelRule } from './TableData' +import XLSX from 'xlsx' export interface ParseParams { file: File password?: string dcValidator: DcValidator + /** + * If workbook is provided, parse function will not run a XLSX.read() + * it will use this property instead. So the client must do a file read beforehand + */ + workbook?: XLSX.WorkBook, /** * Parse function will manipulate and return the uploader array which can be provided with files already in the queue * Otherwise new empty instance will be created. diff --git a/client/src/app/multi-dataset/multi-dataset.component.html b/client/src/app/multi-dataset/multi-dataset.component.html index 182c2c4..9cf47c3 100644 --- a/client/src/app/multi-dataset/multi-dataset.component.html +++ b/client/src/app/multi-dataset/multi-dataset.component.html @@ -198,14 +198,28 @@ *ngIf="!activeParsedDataset" class="no-table-selected pointer-events-none" > - -

- Please select a dataset on the left to review the data -

+ + + +

+ {{ fileLoadingState }}... +

+
+ + + +

+ Please select a dataset on the left to review the data +

+
diff --git a/client/src/app/multi-dataset/multi-dataset.component.ts b/client/src/app/multi-dataset/multi-dataset.component.ts index 793372c..dc4d93f 100644 --- a/client/src/app/multi-dataset/multi-dataset.component.ts +++ b/client/src/app/multi-dataset/multi-dataset.component.ts @@ -30,6 +30,17 @@ import { UploadFile } from '@sasjs/adapter' import { UploadFileResponse } from '../models/UploadFile' import { RequestWrapperResponse } from '../models/request-wrapper/RequestWrapperResponse' import { ParseResult } from '../models/ParseResult.interface' +import XLSX from 'xlsx' + +enum FileLoadingState { + reading = 'Reading the file', + parsing = 'Searching for the data in the file', + parsed = 'Searching for the data finished', + /** + * Defualt value + */ + notSelected = 'File not selected' +} @Component({ selector: 'app-multi-dataset', @@ -43,6 +54,11 @@ export class MultiDatasetComponent implements OnInit { public licenceState = this.licenceService.licenceState public Infinity = Infinity + public workbookInterval: any + public fileLoadingState: FileLoadingState = FileLoadingState.notSelected + + public FileLoadingState = FileLoadingState + public hotTableLicenseKey: string | undefined = undefined public hotTableMaxRows = this.licenceState.value.viewer_rows_allowed || Infinity @@ -163,7 +179,7 @@ export class MultiDatasetComponent implements OnInit { } } - onFileChange(event: any) { + async onFileChange(event: any) { const files = event?.target?.files || [] if (files.length < 1) { @@ -200,10 +216,17 @@ export class MultiDatasetComponent implements OnInit { // For EXCEL if multiple files, we only take one (the first one) this.selectedFile = event.target.files[0] - if (this.selectedFile) - this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB( - this.selectedFile.size - ) + if (this.selectedFile) { + this.fileLoadingState = FileLoadingState.reading + + this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB(this.selectedFile.size) + + // Read the excel file to be ready + this.spreadsheetService.xlsxReadFile(this.selectedFile!).then(wb => { + this.fileLoadingState = FileLoadingState.parsing + this.selectedFile!.workbook = wb + }) + } this.initUserInputHot() this.onAutoDetectColumns() @@ -291,53 +314,56 @@ export class MultiDatasetComponent implements OnInit { }) }) - for (let parsedDataset of this.parsedDatasets) { - this.spreadsheetService - .parseExcelFile({ - file: this.selectedFile!, - password: this.selectedFile!.password || undefined, - dcValidator: parsedDataset.datasetInfo.dcValidator!, - headerPks: parsedDataset.datasetInfo.headerPks, - headerArray: parsedDataset.datasetInfo.headerArray, - headerShow: [], - timeHeaders: parsedDataset.datasetInfo.timeHeaders, - dateHeaders: parsedDataset.datasetInfo.dateHeaders, - dateTimeHeaders: parsedDataset.datasetInfo.dateTimeHeaders, - xlRules: parsedDataset.datasetInfo.xlRules - }) - .then((parseResult: ParseResult | undefined) => { - console.log('parseResult', parseResult) + this.workbookLoaded().then(workbook => { + for (let parsedDataset of this.parsedDatasets) { + this.spreadsheetService + .parseExcelFile({ + file: this.selectedFile!, + workbook: workbook, + password: this.selectedFile!.password || undefined, + dcValidator: parsedDataset.datasetInfo.dcValidator!, + headerPks: parsedDataset.datasetInfo.headerPks, + headerArray: parsedDataset.datasetInfo.headerArray, + headerShow: [], + timeHeaders: parsedDataset.datasetInfo.timeHeaders, + dateHeaders: parsedDataset.datasetInfo.dateHeaders, + dateTimeHeaders: parsedDataset.datasetInfo.dateTimeHeaders, + xlRules: parsedDataset.datasetInfo.xlRules + }) + .then((parseResult: ParseResult | undefined) => { + this.fileLoadingState = FileLoadingState.parsed - if (parseResult && parseResult.data) { - let datasource: any[] = [] + if (parseResult && parseResult.data) { + let datasource: any[] = [] - parseResult.data.map((item) => { - let itemObject: any = {} + parseResult.data.map((item) => { + let itemObject: any = {} - parseResult.headerShow!.map((header: any, index: number) => { - itemObject[header] = item[index] + parseResult.headerShow!.map((header: any, index: number) => { + itemObject[header] = item[index] + }) + + // If Delete? column is not set in the file, we set it to NO + if (!itemObject['_____DELETE__THIS__RECORD_____']) + itemObject['_____DELETE__THIS__RECORD_____'] = 'No' + + datasource.push(itemObject) }) - // If Delete? column is not set in the file, we set it to NO - if (!itemObject['_____DELETE__THIS__RECORD_____']) - itemObject['_____DELETE__THIS__RECORD_____'] = 'No' + parsedDataset.datasource = datasource + parsedDataset.parseResult = parseResult + parsedDataset.parsingTable = false + } + }) + .catch((error: string) => { + console.warn('Parsing excel file error.', error) - datasource.push(itemObject) - }) - - parsedDataset.datasource = datasource - parsedDataset.parseResult = parseResult + parsedDataset.datasource = [] + parsedDataset.includeInSubmission = false parsedDataset.parsingTable = false - } - }) - .catch((error: string) => { - console.warn('Parsing excel file error.', error) - - parsedDataset.datasource = [] - parsedDataset.includeInSubmission = false - parsedDataset.parsingTable = false - }) - } + }) + } + }) }) } @@ -826,6 +852,23 @@ export class MultiDatasetComponent implements OnInit { if (newSubmittedDataset) newSubmittedDataset.active = true } + /** + * + * @returns Promise once workbook is loaded because use XLSX.read in the background + */ + private workbookLoaded(): Promise { + return new Promise((resolve, reject) => { + if (!this.selectedFile) reject('No file selected') + + this.workbookInterval = setInterval(() => { + if (this.selectedFile!.workbook) { + clearInterval(this.workbookInterval) + resolve(this.selectedFile!.workbook) + } + }, 500) + }) + } + private parseDatasetFromCsvName(fileName: string) { const fileNameArr = fileName.split('.') fileNameArr.pop() @@ -1044,4 +1087,5 @@ export interface SubmittedCsvDatasetResult { export interface SelectedFile extends File { sizeMB?: number password?: string + workbook?: XLSX.WorkBook } diff --git a/client/src/app/services/spreadsheet.service.ts b/client/src/app/services/spreadsheet.service.ts index 64f6740..2edceee 100644 --- a/client/src/app/services/spreadsheet.service.ts +++ b/client/src/app/services/spreadsheet.service.ts @@ -30,7 +30,7 @@ export class SpreadsheetService { licenceState: this.licenceState }) - return spreadSheetUtil.parseExcelFile( + return spreadSheetUtil.parseSpreadsheetFile( parseParams, this.promptExcelPassword, onParseStateChange, @@ -38,6 +38,37 @@ export class SpreadsheetService { ) } + /** + * Reads the excel file using the XLSX.read() function + * If possible, function will use the web worker to read it in background thread + * otherwise fallback method will be used + * + * @param file selected in an + * @returns WorkBook + */ + public xlsxReadFile(file: any): Promise { + return new Promise((resolve, reject) => { + const spreadSheetUtil = new SpreadsheetUtil({ + licenceState: this.licenceState + }) + + let reader: FileReader = new FileReader() + + reader.onload = (fileReaderResponse: any) => { + spreadSheetUtil.xslxStartReading( + fileReaderResponse, + this.promptExcelPassword + ).then(response => { + resolve(response) + }).catch(err => { + reject(err) + }) + } + + reader.readAsArrayBuffer(file) + }) + } + /** * Read the file minimally just to get the sheet names, not reading full file * to help boost the performance diff --git a/client/src/app/shared/spreadsheet-util/spreadsheet-util.ts b/client/src/app/shared/spreadsheet-util/spreadsheet-util.ts index e3ae50d..d008eb3 100644 --- a/client/src/app/shared/spreadsheet-util/spreadsheet-util.ts +++ b/client/src/app/shared/spreadsheet-util/spreadsheet-util.ts @@ -47,8 +47,13 @@ export class SpreadsheetUtil { } /** - * Parses attached file and searches fo the matching data + * Parses attached file and searches for the matching data * + * If CSV is provided no searching of the data will be executed, but csv file + * returned back in an FileUploader array + * + * @param promptExcelPassword used to trigger the modal for password input + * when provided file is locked * @param parseParams params required for parsing the file * @param onParseStateChange callback used to inform about parsing state * so the user of the function can update the UI with latest info @@ -56,364 +61,345 @@ export class SpreadsheetUtil { * * @returns parsed list of files to upload and JSON data ready for HOT usage */ - public parseExcelFile( + public parseSpreadsheetFile( parseParams: ParseParams, promptExcelPassword: (options?: OpenOptions) => Promise, onParseStateChange?: (uploadState: string) => void, onTableFoundEvent?: (info: string) => void ): Promise { return new Promise((resolve, reject) => { - let data: any[] = [] const uploader: FileUploader = parseParams.uploader || new FileUploader() - const file: File = parseParams.file - const filename = file.name - - if (!parseParams.encoding) parseParams.encoding = 'UTF-8' - - if (onParseStateChange) - onParseStateChange(`Loading ${filename} into the browser`) - - let foundData = { - sheet: '' - } - - let fileType = filename.slice( - filename.lastIndexOf('.') + 1, - filename.lastIndexOf('.') + 4 - ) - - if (fileType.toLowerCase() === 'xls') { - let reader: FileReader = new FileReader() - - const self = this - reader.onload = async (theFile: any) => { - /* read workbook */ - const bstr = this.toBstr(theFile.target.result) - let wb: XLSX.WorkBook | undefined = undefined - let fileUnlocking: boolean = false - - const xlsxOptions: XLSX.ParsingOptions = { - type: 'binary', - cellDates: false, - cellFormula: true, - cellStyles: true, - cellNF: false, - cellText: false, - password: parseParams.password - } - - try { - wb = await this.xlsxRead(bstr, { - ...xlsxOptions - }) - } catch (err: any) { - if (err.message.toLowerCase().includes('password')) { - fileUnlocking = true - - let passwordError = false - - while (fileUnlocking) { - const password = await promptExcelPassword({ - error: passwordError - }) - - if (password) { - try { - wb = await this.xlsxRead(bstr, { - ...xlsxOptions, - password: password - }) - - fileUnlocking = false - passwordError = false - } catch (err: any) { - passwordError = true - - if (!err.message.toLowerCase().includes('password')) { - fileUnlocking = false - } - } - } else { - fileUnlocking = false - } - } - } else { - return reject('Error reading the file') - } - } - - if (!wb) { - return reject('No workbook found.') - } - - /* save data */ - let isComplete: boolean = false - let missingHeaders: MissingHeaders[] = [] - - const csvArrayHeaders: string[] = [ - '_____DELETE__THIS__RECORD_____', - ...parseParams.headerArray - ] - - let csvArrayHeadersLower = csvArrayHeaders.map((x) => x.toLowerCase()) - let csvArrayHeadersMap = csvArrayHeadersLower.reduce( - (map: any, obj: string) => { - map[obj] = -1 - return map - }, - {} - ) - - const searchResult = this.searchDataInExcel( - wb, - parseParams - ) - - let csvArrayData: any[] = [] - - if (searchResult.found) { - isComplete = true - csvArrayData = searchResult.found.arrayData - - if (!searchResult.found.headers.includes('_____delete__this__record_____')) { - csvArrayData = csvArrayData.map((row: any[]) => { - // Add empty val on start of the column to compensate for _____delete__this__record_____ - // when not found in the file - row.unshift({ v: '' }) - - return row - }) - } - - if (onTableFoundEvent) - onTableFoundEvent( - `Sheet: ${searchResult.found.sheetName}\nRange: ${searchResult.found.startAddress}:${searchResult.found.endAddress}` - ) - } else { - missingHeaders = searchResult.missing || [] - } - - if (missingHeaders.length > 0) { - let abortMsg = missingHeaders.map(mh => { - return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}` - }).join('\n\n') - - uploader.queue.pop() - return reject(abortMsg) - } - - // If first row is empty, that means no data has been found - if (csvArrayData.length === 0 || csvArrayData[0].length === 0) { - let abortMsg = 'No relevant data found in File !' - - uploader.queue.pop() - return reject(abortMsg) - } - - if ( - parseParams.dateTimeHeaders.length > 0 || - parseParams.dateHeaders.length > 0 || - parseParams.timeHeaders.length > 0 - ) { - csvArrayData = this.updateDateTimeCols( - csvArrayHeaders, - csvArrayData, - parseParams - ) - } - - if (parseParams.xlRules.length > 0) { - csvArrayData = this.updateXLRuleCols( - csvArrayHeaders, - csvArrayData, - parseParams - ) - } - - if (!isComplete) { - let abortMsg = '' - - if (missingHeaders.length === 0) { - abortMsg = 'No relevant data found in File !' - } else { - abortMsg = missingHeaders.map(mh => { - return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}` - }).join('\n\n') - } - - // abort message is fired, return undefined - uploader.queue.pop() - return reject(abortMsg) - } else { - parseParams.headerShow = csvArrayHeaders - - // Remove the metadata from the cells, leave only values - csvArrayData = csvArrayData.map((row: any) => - row.map((col: any) => { - if (col.t === 'n') { - return col.v - } else { - if (col.w) return col.v - - return typeof col.v === 'string' ? col.v.trim() : col.v - } - }) - ) - - csvArrayData = csvArrayData.map((row: any) => { - return row.map((col: any, index: number) => { - if (!col && col !== 0) col = '' - - /** - * Keeping this for the reference - * Code below used to convert JSON to CSV - * now the XLSX is converting to CSV - */ - // if (isNaN(col)) { - // // Match and replace the double quotes, ignore the first and last char - // // in case they are double quotes already - // col = col.replace(/(? -1 || - // col.search(/\r|\n/g) > -1 - // ) { - // // Missing quotes at the end - // if (col.search(/"$/g) < 0) { - // col = col + '"' // So we add them - // } - - // // Missing quotes at the start - // if (col.search(/^"/g) < 0) { - // col = '"' + col // So we add them - // } - // } - // } - - const colName = parseParams.headerShow[index] - const colRule = parseParams.dcValidator?.getRule(colName) - - if (colRule?.type === 'numeric') { - if (isSpecialMissing(col) && !col.includes('.')) - col = '.' + col - } - - return col - }) - }) - - data = csvArrayData - - // Apply licence rows limitation if exists, it is only affecting data - // which will be send to SAS - const strippedCsvArrayData = csvArrayData.slice( - 0, - this.licenceState.value.submit_rows_limit - ) - // To submit to sas service, we need clean version of CSV of file - // attached. XLSX will do the parsing and heavy lifting - // First we create worksheet of json (data we extracted) - let ws = XLSX.utils.json_to_sheet(strippedCsvArrayData, { - skipHeader: true - }) - - // create CSV to be uploaded from worksheet - let csvContentClean = XLSX.utils.sheet_to_csv(ws) - // Prepend headers - csvContentClean = csvArrayHeaders.join(',') + '\n' + csvContentClean - - // Blob from which CSV file will be created depending of the selected - // encoding - let blob: Blob - - if (parseParams.encoding === 'WLATIN1') { - // WLATIN1 - let encoded = iconv.decode( - Buffer.from(csvContentClean), - 'CP-1252' - ) - blob = new Blob([encoded], { type: 'application/csv' }) - } else { - // UTF-8 - blob = new Blob([csvContentClean], { type: 'application/csv' }) - } - - let newCSVFile: File = blobToFile(blob, filename + '.csv') - uploader.addToQueue([newCSVFile]) - } - - if (data.length === 0) { - return reject( - `Table in the file is empty. Data found on sheet: ${foundData.sheet}` - ) - } - - if (!searchResult.found) { - return reject( - `No relevant data found. 'found' object is empty, unexpected error occurred.` - ) - } - - const rangeStartAddress = searchResult.found.startAddress || '' - const rangeEndAddress = searchResult.found.endAddress || '' - - return resolve({ - uploader, - data: csvArrayData, - rangeSheetRes: { - found: !!searchResult.found, - sheetName: searchResult.found.sheetName, - rangeStartAddress: rangeStartAddress, - rangeEndAddress: rangeEndAddress, - rangeAddress: `${rangeStartAddress}:${rangeEndAddress}`, - missingHeaders: missingHeaders, - }, - headerShow: parseParams.headerShow + // If workbook is present it means file is already read and we don't need + // to read it again, otherwise we will do a XLSX.read() + if (parseParams.workbook) { + this.parseExcelFile( + parseParams, + parseParams.workbook, + uploader, + onTableFoundEvent + ) + .then((response) => { + resolve(response) }) - } + .catch((err) => { + reject(err) + }) + } else { + // File is not read so we must do a XLSX.read() + let data: any[] = [] - reader.readAsArrayBuffer(file) - } else if (fileType.toLowerCase() === 'csv') { - if (this.licenceState.value.submit_rows_limit !== Infinity) { - uploader.queue.pop() - return reject( - 'Excel files only. To unlock CSV uploads, please contact support@datacontroller.io' - ) - } + const file: File = parseParams.file + + if (!parseParams.encoding) parseParams.encoding = 'UTF-8' + + if (onParseStateChange) + onParseStateChange(`Loading ${file.name} into the browser`) + + let fileType = file.name.slice( + file.name.lastIndexOf('.') + 1, + file.name.lastIndexOf('.') + 4 + ) + + if (fileType.toLowerCase() === 'xls') { + let reader: FileReader = new FileReader() - if (parseParams.encoding === 'WLATIN1') { - let reader = new FileReader() const self = this - // Closure to capture the file information. - reader.onload = (theFile: any) => { - let encoded = iconv.decode( - Buffer.from(theFile.target.result), - 'CP-1252' - ) - let blob = new Blob([encoded], { type: fileType }) - let encodedFile: File = blobToFile(blob, filename) - uploader.queue.pop() - uploader.addToQueue([encodedFile]) - return resolve({ - uploader - }) + reader.onload = async (fileReaderResponse: any) => { + const wb = await this.xslxStartReading( + fileReaderResponse, + promptExcelPassword, + parseParams.password + ) + + if (!wb) { + return reject('No workbook found.') + } + + this.parseExcelFile(parseParams, wb, uploader, onTableFoundEvent) + .then((response) => { + resolve(response) + }) + .catch((err) => { + reject(err) + }) } reader.readAsArrayBuffer(file) + } else if (fileType.toLowerCase() === 'csv') { + return this.parseCsvFile(parseParams, uploader, fileType) } else { + let abortMsg = + 'Invalid file type "' + + parseParams.file.name + + '". Please upload csv or excel file.' + + uploader.queue.pop() + return reject(abortMsg) + } + } + }) + } + + private parseExcelFile( + parseParams: ParseParams, + workbook: XLSX.WorkBook, + uploader: FileUploader, + onTableFoundEvent?: (info: string) => void + ): Promise { + return new Promise((resolve, reject) => { + /* save data */ + let isComplete: boolean = false + let missingHeaders: MissingHeaders[] = [] + + const csvArrayHeaders: string[] = [ + '_____DELETE__THIS__RECORD_____', + ...parseParams.headerArray + ] + + const searchResult = this.searchDataInExcel(workbook, parseParams) + + let csvArrayData: any[] = [] + + if (searchResult.found) { + isComplete = true + csvArrayData = searchResult.found.arrayData + + if ( + !searchResult.found.headers.includes('_____delete__this__record_____') + ) { + csvArrayData = csvArrayData.map((row: any[]) => { + // Add empty val on start of the column to compensate for _____delete__this__record_____ + // when not found in the file + row.unshift({ v: '' }) + + return row + }) + } + + if (onTableFoundEvent) + onTableFoundEvent( + `Sheet: ${searchResult.found.sheetName}\nRange: ${searchResult.found.startAddress}:${searchResult.found.endAddress}` + ) + } else { + missingHeaders = searchResult.missing || [] + } + + if (missingHeaders.length > 0) { + let abortMsg = missingHeaders + .map((mh) => { + return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}` + }) + .join('\n\n') + + uploader.queue.pop() + return reject(abortMsg) + } + + // If first row is empty, that means no data has been found + if (csvArrayData.length === 0 || csvArrayData[0].length === 0) { + let abortMsg = 'No relevant data found in File !' + + uploader.queue.pop() + return reject(abortMsg) + } + + if ( + parseParams.dateTimeHeaders.length > 0 || + parseParams.dateHeaders.length > 0 || + parseParams.timeHeaders.length > 0 + ) { + csvArrayData = this.updateDateTimeCols( + csvArrayHeaders, + csvArrayData, + parseParams + ) + } + + if (parseParams.xlRules.length > 0) { + csvArrayData = this.updateXLRuleCols( + csvArrayHeaders, + csvArrayData, + parseParams + ) + } + + if (!isComplete) { + let abortMsg = '' + + if (missingHeaders.length === 0) { + abortMsg = 'No relevant data found in File !' + } else { + abortMsg = missingHeaders + .map((mh) => { + return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}` + }) + .join('\n\n') + } + + // abort message is fired, return undefined + uploader.queue.pop() + return reject(abortMsg) + } else { + parseParams.headerShow = csvArrayHeaders + + // Remove the metadata from the cells, leave only values + csvArrayData = csvArrayData.map((row: any) => + row.map((col: any) => { + if (col.t === 'n') { + return col.v + } else { + if (col.w) return col.v + + return typeof col.v === 'string' ? col.v.trim() : col.v + } + }) + ) + + csvArrayData = csvArrayData.map((row: any) => { + return row.map((col: any, index: number) => { + if (!col && col !== 0) col = '' + + /** + * Keeping this for the reference + * Code below used to convert JSON to CSV + * now the XLSX is converting to CSV + */ + // if (isNaN(col)) { + // // Match and replace the double quotes, ignore the first and last char + // // in case they are double quotes already + // col = col.replace(/(? -1 || + // col.search(/\r|\n/g) > -1 + // ) { + // // Missing quotes at the end + // if (col.search(/"$/g) < 0) { + // col = col + '"' // So we add them + // } + + // // Missing quotes at the start + // if (col.search(/^"/g) < 0) { + // col = '"' + col // So we add them + // } + // } + // } + + const colName = parseParams.headerShow[index] + const colRule = parseParams.dcValidator?.getRule(colName) + + if (colRule?.type === 'numeric') { + if (isSpecialMissing(col) && !col.includes('.')) col = '.' + col + } + + return col + }) + }) + + // Apply licence rows limitation if exists, it is only affecting data + // which will be send to SAS + const strippedCsvArrayData = csvArrayData.slice( + 0, + this.licenceState.value.submit_rows_limit + ) + // To submit to sas service, we need clean version of CSV of file + // attached. XLSX will do the parsing and heavy lifting + // First we create worksheet of json (data we extracted) + let ws = XLSX.utils.json_to_sheet(strippedCsvArrayData, { + skipHeader: true + }) + + // create CSV to be uploaded from worksheet + let csvContentClean = XLSX.utils.sheet_to_csv(ws) + // Prepend headers + csvContentClean = csvArrayHeaders.join(',') + '\n' + csvContentClean + + // Blob from which CSV file will be created depending of the selected + // encoding + let blob: Blob + + if (parseParams.encoding === 'WLATIN1') { + // WLATIN1 + let encoded = iconv.decode(Buffer.from(csvContentClean), 'CP-1252') + blob = new Blob([encoded], { type: 'application/csv' }) + } else { + // UTF-8 + blob = new Blob([csvContentClean], { type: 'application/csv' }) + } + + let newCSVFile: File = blobToFile(blob, parseParams.file.name + '.csv') + uploader.addToQueue([newCSVFile]) + } + + if (csvArrayData.length === 0) { + return reject( + `Table in the file is empty. Data found on sheet: ${searchResult.found?.sheetName || ''}` + ) + } + + if (!searchResult.found) { + return reject( + `No relevant data found. 'found' object is empty, unexpected error occurred.` + ) + } + + const rangeStartAddress = searchResult.found.startAddress || '' + const rangeEndAddress = searchResult.found.endAddress || '' + + return resolve({ + uploader, + data: csvArrayData, + rangeSheetRes: { + found: !!searchResult.found, + sheetName: searchResult.found.sheetName, + rangeStartAddress: rangeStartAddress, + rangeEndAddress: rangeEndAddress, + rangeAddress: `${rangeStartAddress}:${rangeEndAddress}`, + missingHeaders: missingHeaders + }, + headerShow: parseParams.headerShow + }) + }) + } + + private parseCsvFile( + parseParams: ParseParams, + uploader: FileUploader, + fileType: string + ) { + return new Promise((resolve, reject) => { + if (this.licenceState.value.submit_rows_limit !== Infinity) { + uploader.queue.pop() + return reject( + 'Excel files only. To unlock CSV uploads, please contact support@datacontroller.io' + ) + } + + if (parseParams.encoding === 'WLATIN1') { + let reader = new FileReader() + const self = this + // Closure to capture the file information. + reader.onload = (theFile: any) => { + let encoded = iconv.decode( + Buffer.from(theFile.target.result), + 'CP-1252' + ) + let blob = new Blob([encoded], { type: fileType }) + let encodedFile: File = blobToFile(blob, parseParams.file.name) + uploader.queue.pop() + uploader.addToQueue([encodedFile]) + return resolve({ uploader }) } - } else { - let abortMsg = - 'Invalid file type "' + - filename + - '". Please upload csv or excel file.' - uploader.queue.pop() - return reject(abortMsg) + reader.readAsArrayBuffer(parseParams.file) + } else { + return resolve({ + uploader + }) } }) } @@ -422,6 +408,83 @@ export class SpreadsheetUtil { return parseFloat((size / (1024 * 1024)).toFixed(2)) } + /** + * Wrapper function for XLSX.read() with integrated 'unlock' functionality + * Used by multi load component to load the file while user chooses the datasets + * to be updated + * + * @param fileReaderResponse response from the file reader + * @param promptExcelPassword password callback + * @param password password provided by the user + * @returns WorkBook + */ + public xslxStartReading( + fileReaderResponse: any, + promptExcelPassword: (options?: OpenOptions) => Promise, + password?: string + ): Promise { + return new Promise(async (resolve, reject) => { + /* read workbook */ + const bstr = this.toBstr(fileReaderResponse.target.result) + let wb: XLSX.WorkBook | undefined = undefined + let fileUnlocking: boolean = false + + const xlsxOptions: XLSX.ParsingOptions = { + type: 'binary', + cellDates: false, + cellFormula: true, + cellStyles: true, + cellNF: false, + cellText: false, + password: password + } + + try { + wb = await this.xlsxRead(bstr, { + ...xlsxOptions + }) + } catch (err: any) { + if (err.message.toLowerCase().includes('password')) { + fileUnlocking = true + + let passwordError = false + + while (fileUnlocking) { + const password = await promptExcelPassword({ + error: passwordError + }) + + if (password) { + try { + wb = await this.xlsxRead(bstr, { + ...xlsxOptions, + password: password + }) + + fileUnlocking = false + passwordError = false + } catch (err: any) { + passwordError = true + + if (!err.message.toLowerCase().includes('password')) { + fileUnlocking = false + } + } + } else { + fileUnlocking = false + } + } + } else { + return reject('Error reading the file') + } + } + + if (!wb) return reject('Failed to parse a workbook') + + return resolve(wb) + }) + } + /** * XLSX Read wrapper which uses Web Worker to read the file and not block * the UI while reading. It will allow reading bigger files. @@ -526,7 +589,7 @@ export class SpreadsheetUtil { const ws: XLSX.WorkSheet = wb.Sheets[sheetName] // Find the first header - Object.keys(ws).forEach(wsKey => { + Object.keys(ws).forEach((wsKey) => { const cellValue = ws[wsKey].v // If the cell does not have `v` property we ignore it, those are metadata properties @@ -547,15 +610,18 @@ export class SpreadsheetUtil { }) // If _____delete__this__record_____ is not found in the file, remove it from the array - if (csvArrayHeadersMap['_____delete__this__record_____'] === -1) delete csvArrayHeadersMap['_____delete__this__record_____'] + if (csvArrayHeadersMap['_____delete__this__record_____'] === -1) + delete csvArrayHeadersMap['_____delete__this__record_____'] // Parse missing headers, if any, abort the search and jump to next sheet - missingHeaders = Object.keys(csvArrayHeadersMap).filter(header => csvArrayHeadersMap[header] === -1) + missingHeaders = Object.keys(csvArrayHeadersMap).filter( + (header) => csvArrayHeadersMap[header] === -1 + ) if (missingHeaders.length > 0) { missing.push({ sheetName: sheetName, - missingHeaders: missingHeaders.map(header => header.toUpperCase()) + missingHeaders: missingHeaders.map((header) => header.toUpperCase()) }) continue @@ -576,13 +642,16 @@ export class SpreadsheetUtil { return bIsPk - aIsPk }) - foundHeadersSorted.forEach(header => { + foundHeadersSorted.forEach((header) => { const headerAddress = csvArrayHeadersMap[header] const headerAddressLetterRegex = headerAddress.match(/\D+/) const headerAddressNumberRegex = headerAddress.match(/\d+/) - const headerAddressLetter = (headerAddressLetterRegex ? headerAddressLetterRegex[0] : -1) || -1 - const headerAddressNumber = parseInt((headerAddressNumberRegex ? headerAddressNumberRegex[0] : -1) || -1) + const headerAddressLetter = + (headerAddressLetterRegex ? headerAddressLetterRegex[0] : -1) || -1 + const headerAddressNumber = parseInt( + (headerAddressNumberRegex ? headerAddressNumberRegex[0] : -1) || -1 + ) const firstDataRow = headerAddressNumber + 1 @@ -604,7 +673,9 @@ export class SpreadsheetUtil { // Push to array of objects if (!json[jsonRow]) json.push({}) - if (cell) json[jsonRow][header] = typeof cell.v === 'string' ? cell.v.trim() : cell.v + if (cell) + json[jsonRow][header] = + typeof cell.v === 'string' ? cell.v.trim() : cell.v // Push to array of arrays, but with all cell meta info if (!arrayData[jsonRow]) arrayData.push([]) @@ -636,7 +707,10 @@ export class SpreadsheetUtil { if (ws[address].w) { json[jsonRow][header] = ws[address].w } else { - json[jsonRow][header] = typeof ws[address].v === 'string' ? ws[address].v.trim() : ws[address].v + json[jsonRow][header] = + typeof ws[address].v === 'string' + ? ws[address].v.trim() + : ws[address].v } } @@ -664,7 +738,7 @@ export class SpreadsheetUtil { json.forEach((row: any, rowIndex: number) => { let missingPk = false - parseParams.headerPks.forEach(pkHeader => { + parseParams.headerPks.forEach((pkHeader) => { if (row[pkHeader.toLowerCase()] === undefined) missingPk = true }) @@ -682,10 +756,12 @@ export class SpreadsheetUtil { arrayData.splice(firstRowIndexMissingPk, Infinity) } else { // Fallback: Remove only rows with missing PK - rowsWithMissingPk.sort((a,b) => b - a).forEach(index => { - json.splice(index, 1) - arrayData.splice(index, 1) - }) + rowsWithMissingPk + .sort((a, b) => b - a) + .forEach((index) => { + json.splice(index, 1) + arrayData.splice(index, 1) + }) } if (!arrayData.length) {