fix(multi load): xlsx read file ahead of time, while user choose datasets
Some checks failed
Build / Build-and-ng-test (pull_request) Failing after 44s

This commit is contained in:
Mihajlo Medjedovic 2024-08-09 16:09:53 +02:00
parent bbb725c64c
commit 6547461637
6 changed files with 579 additions and 406 deletions

View File

@ -18,6 +18,7 @@ import {
ClarityIcons,
exclamationTriangleIcon,
moonIcon,
processOnVmIcon,
sunIcon,
tableIcon,
trashIcon
@ -28,7 +29,8 @@ ClarityIcons.addIcons(
sunIcon,
exclamationTriangleIcon,
tableIcon,
trashIcon
trashIcon,
processOnVmIcon
)
@Component({

View File

@ -2,11 +2,17 @@ import { DcValidator } from '../shared/dc-validator/dc-validator'
import { FileUploadEncoding } from './FileUploadEncoding'
import { FileUploader } from './FileUploader.class'
import { ExcelRule } from './TableData'
import XLSX from 'xlsx'
export interface ParseParams {
file: File
password?: string
dcValidator: DcValidator
/**
* If workbook is provided, parse function will not run a XLSX.read()
* it will use this property instead. So the client must do a file read beforehand
*/
workbook?: XLSX.WorkBook,
/**
* Parse function will manipulate and return the uploader array which can be provided with files already in the queue
* Otherwise new empty instance will be created.

View File

@ -198,14 +198,28 @@
*ngIf="!activeParsedDataset"
class="no-table-selected pointer-events-none"
>
<clr-icon
shape="warning-standard"
size="40"
class="is-info icon-dc-fill"
></clr-icon>
<p class="text-center color-gray mt-10" cds-text="section">
Please select a dataset on the left to review the data
</p>
<ng-container *ngIf="fileLoadingState !== FileLoadingState.parsed">
<clr-icon
shape="process-on-vm"
size="40"
class="is-info icon-dc-fill"
></clr-icon>
<p class="text-center color-gray mt-10" cds-text="section">
{{ fileLoadingState }}...
</p>
</ng-container>
<ng-container *ngIf="fileLoadingState === FileLoadingState.parsed">
<clr-icon
shape="warning-standard"
size="40"
class="is-info icon-dc-fill"
></clr-icon>
<p class="text-center color-gray mt-10" cds-text="section">
Please select a dataset on the left to review the data
</p>
</ng-container>
</div>
<ng-container *ngIf="activeParsedDataset">

View File

@ -30,6 +30,17 @@ import { UploadFile } from '@sasjs/adapter'
import { UploadFileResponse } from '../models/UploadFile'
import { RequestWrapperResponse } from '../models/request-wrapper/RequestWrapperResponse'
import { ParseResult } from '../models/ParseResult.interface'
import XLSX from 'xlsx'
enum FileLoadingState {
reading = 'Reading the file',
parsing = 'Searching for the data in the file',
parsed = 'Searching for the data finished',
/**
* Defualt value
*/
notSelected = 'File not selected'
}
@Component({
selector: 'app-multi-dataset',
@ -43,6 +54,11 @@ export class MultiDatasetComponent implements OnInit {
public licenceState = this.licenceService.licenceState
public Infinity = Infinity
public workbookInterval: any
public fileLoadingState: FileLoadingState = FileLoadingState.notSelected
public FileLoadingState = FileLoadingState
public hotTableLicenseKey: string | undefined = undefined
public hotTableMaxRows =
this.licenceState.value.viewer_rows_allowed || Infinity
@ -163,7 +179,7 @@ export class MultiDatasetComponent implements OnInit {
}
}
onFileChange(event: any) {
async onFileChange(event: any) {
const files = event?.target?.files || []
if (files.length < 1) {
@ -200,10 +216,17 @@ export class MultiDatasetComponent implements OnInit {
// For EXCEL if multiple files, we only take one (the first one)
this.selectedFile = event.target.files[0]
if (this.selectedFile)
this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB(
this.selectedFile.size
)
if (this.selectedFile) {
this.fileLoadingState = FileLoadingState.reading
this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB(this.selectedFile.size)
// Read the excel file to be ready
this.spreadsheetService.xlsxReadFile(this.selectedFile!).then(wb => {
this.fileLoadingState = FileLoadingState.parsing
this.selectedFile!.workbook = wb
})
}
this.initUserInputHot()
this.onAutoDetectColumns()
@ -291,53 +314,56 @@ export class MultiDatasetComponent implements OnInit {
})
})
for (let parsedDataset of this.parsedDatasets) {
this.spreadsheetService
.parseExcelFile({
file: this.selectedFile!,
password: this.selectedFile!.password || undefined,
dcValidator: parsedDataset.datasetInfo.dcValidator!,
headerPks: parsedDataset.datasetInfo.headerPks,
headerArray: parsedDataset.datasetInfo.headerArray,
headerShow: [],
timeHeaders: parsedDataset.datasetInfo.timeHeaders,
dateHeaders: parsedDataset.datasetInfo.dateHeaders,
dateTimeHeaders: parsedDataset.datasetInfo.dateTimeHeaders,
xlRules: parsedDataset.datasetInfo.xlRules
})
.then((parseResult: ParseResult | undefined) => {
console.log('parseResult', parseResult)
this.workbookLoaded().then(workbook => {
for (let parsedDataset of this.parsedDatasets) {
this.spreadsheetService
.parseExcelFile({
file: this.selectedFile!,
workbook: workbook,
password: this.selectedFile!.password || undefined,
dcValidator: parsedDataset.datasetInfo.dcValidator!,
headerPks: parsedDataset.datasetInfo.headerPks,
headerArray: parsedDataset.datasetInfo.headerArray,
headerShow: [],
timeHeaders: parsedDataset.datasetInfo.timeHeaders,
dateHeaders: parsedDataset.datasetInfo.dateHeaders,
dateTimeHeaders: parsedDataset.datasetInfo.dateTimeHeaders,
xlRules: parsedDataset.datasetInfo.xlRules
})
.then((parseResult: ParseResult | undefined) => {
this.fileLoadingState = FileLoadingState.parsed
if (parseResult && parseResult.data) {
let datasource: any[] = []
if (parseResult && parseResult.data) {
let datasource: any[] = []
parseResult.data.map((item) => {
let itemObject: any = {}
parseResult.data.map((item) => {
let itemObject: any = {}
parseResult.headerShow!.map((header: any, index: number) => {
itemObject[header] = item[index]
parseResult.headerShow!.map((header: any, index: number) => {
itemObject[header] = item[index]
})
// If Delete? column is not set in the file, we set it to NO
if (!itemObject['_____DELETE__THIS__RECORD_____'])
itemObject['_____DELETE__THIS__RECORD_____'] = 'No'
datasource.push(itemObject)
})
// If Delete? column is not set in the file, we set it to NO
if (!itemObject['_____DELETE__THIS__RECORD_____'])
itemObject['_____DELETE__THIS__RECORD_____'] = 'No'
parsedDataset.datasource = datasource
parsedDataset.parseResult = parseResult
parsedDataset.parsingTable = false
}
})
.catch((error: string) => {
console.warn('Parsing excel file error.', error)
datasource.push(itemObject)
})
parsedDataset.datasource = datasource
parsedDataset.parseResult = parseResult
parsedDataset.datasource = []
parsedDataset.includeInSubmission = false
parsedDataset.parsingTable = false
}
})
.catch((error: string) => {
console.warn('Parsing excel file error.', error)
parsedDataset.datasource = []
parsedDataset.includeInSubmission = false
parsedDataset.parsingTable = false
})
}
})
}
})
})
}
@ -826,6 +852,23 @@ export class MultiDatasetComponent implements OnInit {
if (newSubmittedDataset) newSubmittedDataset.active = true
}
/**
*
* @returns Promise once workbook is loaded because use XLSX.read in the background
*/
private workbookLoaded(): Promise<XLSX.WorkBook> {
return new Promise((resolve, reject) => {
if (!this.selectedFile) reject('No file selected')
this.workbookInterval = setInterval(() => {
if (this.selectedFile!.workbook) {
clearInterval(this.workbookInterval)
resolve(this.selectedFile!.workbook)
}
}, 500)
})
}
private parseDatasetFromCsvName(fileName: string) {
const fileNameArr = fileName.split('.')
fileNameArr.pop()
@ -1044,4 +1087,5 @@ export interface SubmittedCsvDatasetResult {
export interface SelectedFile extends File {
sizeMB?: number
password?: string
workbook?: XLSX.WorkBook
}

View File

@ -30,7 +30,7 @@ export class SpreadsheetService {
licenceState: this.licenceState
})
return spreadSheetUtil.parseExcelFile(
return spreadSheetUtil.parseSpreadsheetFile(
parseParams,
this.promptExcelPassword,
onParseStateChange,
@ -38,6 +38,37 @@ export class SpreadsheetService {
)
}
/**
* Reads the excel file using the XLSX.read() function
* If possible, function will use the web worker to read it in background thread
* otherwise fallback method will be used
*
* @param file selected in an <input>
* @returns WorkBook
*/
public xlsxReadFile(file: any): Promise<XLSX.WorkBook> {
return new Promise((resolve, reject) => {
const spreadSheetUtil = new SpreadsheetUtil({
licenceState: this.licenceState
})
let reader: FileReader = new FileReader()
reader.onload = (fileReaderResponse: any) => {
spreadSheetUtil.xslxStartReading(
fileReaderResponse,
this.promptExcelPassword
).then(response => {
resolve(response)
}).catch(err => {
reject(err)
})
}
reader.readAsArrayBuffer(file)
})
}
/**
* Read the file minimally just to get the sheet names, not reading full file
* to help boost the performance

View File

@ -47,8 +47,13 @@ export class SpreadsheetUtil {
}
/**
* Parses attached file and searches fo the matching data
* Parses attached file and searches for the matching data
*
* If CSV is provided no searching of the data will be executed, but csv file
* returned back in an FileUploader array
*
* @param promptExcelPassword used to trigger the modal for password input
* when provided file is locked
* @param parseParams params required for parsing the file
* @param onParseStateChange callback used to inform about parsing state
* so the user of the function can update the UI with latest info
@ -56,364 +61,345 @@ export class SpreadsheetUtil {
*
* @returns parsed list of files to upload and JSON data ready for HOT usage
*/
public parseExcelFile(
public parseSpreadsheetFile(
parseParams: ParseParams,
promptExcelPassword: (options?: OpenOptions) => Promise<string | undefined>,
onParseStateChange?: (uploadState: string) => void,
onTableFoundEvent?: (info: string) => void
): Promise<ParseResult | undefined> {
return new Promise((resolve, reject) => {
let data: any[] = []
const uploader: FileUploader = parseParams.uploader || new FileUploader()
const file: File = parseParams.file
const filename = file.name
if (!parseParams.encoding) parseParams.encoding = 'UTF-8'
if (onParseStateChange)
onParseStateChange(`Loading ${filename} into the browser`)
let foundData = {
sheet: ''
}
let fileType = filename.slice(
filename.lastIndexOf('.') + 1,
filename.lastIndexOf('.') + 4
)
if (fileType.toLowerCase() === 'xls') {
let reader: FileReader = new FileReader()
const self = this
reader.onload = async (theFile: any) => {
/* read workbook */
const bstr = this.toBstr(theFile.target.result)
let wb: XLSX.WorkBook | undefined = undefined
let fileUnlocking: boolean = false
const xlsxOptions: XLSX.ParsingOptions = {
type: 'binary',
cellDates: false,
cellFormula: true,
cellStyles: true,
cellNF: false,
cellText: false,
password: parseParams.password
}
try {
wb = await this.xlsxRead(bstr, {
...xlsxOptions
})
} catch (err: any) {
if (err.message.toLowerCase().includes('password')) {
fileUnlocking = true
let passwordError = false
while (fileUnlocking) {
const password = await promptExcelPassword({
error: passwordError
})
if (password) {
try {
wb = await this.xlsxRead(bstr, {
...xlsxOptions,
password: password
})
fileUnlocking = false
passwordError = false
} catch (err: any) {
passwordError = true
if (!err.message.toLowerCase().includes('password')) {
fileUnlocking = false
}
}
} else {
fileUnlocking = false
}
}
} else {
return reject('Error reading the file')
}
}
if (!wb) {
return reject('No workbook found.')
}
/* save data */
let isComplete: boolean = false
let missingHeaders: MissingHeaders[] = []
const csvArrayHeaders: string[] = [
'_____DELETE__THIS__RECORD_____',
...parseParams.headerArray
]
let csvArrayHeadersLower = csvArrayHeaders.map((x) => x.toLowerCase())
let csvArrayHeadersMap = csvArrayHeadersLower.reduce(
(map: any, obj: string) => {
map[obj] = -1
return map
},
{}
)
const searchResult = this.searchDataInExcel(
wb,
parseParams
)
let csvArrayData: any[] = []
if (searchResult.found) {
isComplete = true
csvArrayData = searchResult.found.arrayData
if (!searchResult.found.headers.includes('_____delete__this__record_____')) {
csvArrayData = csvArrayData.map((row: any[]) => {
// Add empty val on start of the column to compensate for _____delete__this__record_____
// when not found in the file
row.unshift({ v: '' })
return row
})
}
if (onTableFoundEvent)
onTableFoundEvent(
`Sheet: ${searchResult.found.sheetName}\nRange: ${searchResult.found.startAddress}:${searchResult.found.endAddress}`
)
} else {
missingHeaders = searchResult.missing || []
}
if (missingHeaders.length > 0) {
let abortMsg = missingHeaders.map(mh => {
return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}`
}).join('\n\n')
uploader.queue.pop()
return reject(abortMsg)
}
// If first row is empty, that means no data has been found
if (csvArrayData.length === 0 || csvArrayData[0].length === 0) {
let abortMsg = 'No relevant data found in File !'
uploader.queue.pop()
return reject(abortMsg)
}
if (
parseParams.dateTimeHeaders.length > 0 ||
parseParams.dateHeaders.length > 0 ||
parseParams.timeHeaders.length > 0
) {
csvArrayData = this.updateDateTimeCols(
csvArrayHeaders,
csvArrayData,
parseParams
)
}
if (parseParams.xlRules.length > 0) {
csvArrayData = this.updateXLRuleCols(
csvArrayHeaders,
csvArrayData,
parseParams
)
}
if (!isComplete) {
let abortMsg = ''
if (missingHeaders.length === 0) {
abortMsg = 'No relevant data found in File !'
} else {
abortMsg = missingHeaders.map(mh => {
return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}`
}).join('\n\n')
}
// abort message is fired, return undefined
uploader.queue.pop()
return reject(abortMsg)
} else {
parseParams.headerShow = csvArrayHeaders
// Remove the metadata from the cells, leave only values
csvArrayData = csvArrayData.map((row: any) =>
row.map((col: any) => {
if (col.t === 'n') {
return col.v
} else {
if (col.w) return col.v
return typeof col.v === 'string' ? col.v.trim() : col.v
}
})
)
csvArrayData = csvArrayData.map((row: any) => {
return row.map((col: any, index: number) => {
if (!col && col !== 0) col = ''
/**
* Keeping this for the reference
* Code below used to convert JSON to CSV
* now the XLSX is converting to CSV
*/
// if (isNaN(col)) {
// // Match and replace the double quotes, ignore the first and last char
// // in case they are double quotes already
// col = col.replace(/(?<!^)"(?!$)/g, '""')
// if (col.search(/,/g) > -1 ||
// col.search(/\r|\n/g) > -1
// ) {
// // Missing quotes at the end
// if (col.search(/"$/g) < 0) {
// col = col + '"' // So we add them
// }
// // Missing quotes at the start
// if (col.search(/^"/g) < 0) {
// col = '"' + col // So we add them
// }
// }
// }
const colName = parseParams.headerShow[index]
const colRule = parseParams.dcValidator?.getRule(colName)
if (colRule?.type === 'numeric') {
if (isSpecialMissing(col) && !col.includes('.'))
col = '.' + col
}
return col
})
})
data = csvArrayData
// Apply licence rows limitation if exists, it is only affecting data
// which will be send to SAS
const strippedCsvArrayData = csvArrayData.slice(
0,
this.licenceState.value.submit_rows_limit
)
// To submit to sas service, we need clean version of CSV of file
// attached. XLSX will do the parsing and heavy lifting
// First we create worksheet of json (data we extracted)
let ws = XLSX.utils.json_to_sheet(strippedCsvArrayData, {
skipHeader: true
})
// create CSV to be uploaded from worksheet
let csvContentClean = XLSX.utils.sheet_to_csv(ws)
// Prepend headers
csvContentClean = csvArrayHeaders.join(',') + '\n' + csvContentClean
// Blob from which CSV file will be created depending of the selected
// encoding
let blob: Blob
if (parseParams.encoding === 'WLATIN1') {
// WLATIN1
let encoded = iconv.decode(
Buffer.from(csvContentClean),
'CP-1252'
)
blob = new Blob([encoded], { type: 'application/csv' })
} else {
// UTF-8
blob = new Blob([csvContentClean], { type: 'application/csv' })
}
let newCSVFile: File = blobToFile(blob, filename + '.csv')
uploader.addToQueue([newCSVFile])
}
if (data.length === 0) {
return reject(
`Table in the file is empty. Data found on sheet: ${foundData.sheet}`
)
}
if (!searchResult.found) {
return reject(
`No relevant data found. 'found' object is empty, unexpected error occurred.`
)
}
const rangeStartAddress = searchResult.found.startAddress || ''
const rangeEndAddress = searchResult.found.endAddress || ''
return resolve({
uploader,
data: csvArrayData,
rangeSheetRes: {
found: !!searchResult.found,
sheetName: searchResult.found.sheetName,
rangeStartAddress: rangeStartAddress,
rangeEndAddress: rangeEndAddress,
rangeAddress: `${rangeStartAddress}:${rangeEndAddress}`,
missingHeaders: missingHeaders,
},
headerShow: parseParams.headerShow
// If workbook is present it means file is already read and we don't need
// to read it again, otherwise we will do a XLSX.read()
if (parseParams.workbook) {
this.parseExcelFile(
parseParams,
parseParams.workbook,
uploader,
onTableFoundEvent
)
.then((response) => {
resolve(response)
})
}
.catch((err) => {
reject(err)
})
} else {
// File is not read so we must do a XLSX.read()
let data: any[] = []
reader.readAsArrayBuffer(file)
} else if (fileType.toLowerCase() === 'csv') {
if (this.licenceState.value.submit_rows_limit !== Infinity) {
uploader.queue.pop()
return reject(
'Excel files only. To unlock CSV uploads, please contact support@datacontroller.io'
)
}
const file: File = parseParams.file
if (!parseParams.encoding) parseParams.encoding = 'UTF-8'
if (onParseStateChange)
onParseStateChange(`Loading ${file.name} into the browser`)
let fileType = file.name.slice(
file.name.lastIndexOf('.') + 1,
file.name.lastIndexOf('.') + 4
)
if (fileType.toLowerCase() === 'xls') {
let reader: FileReader = new FileReader()
if (parseParams.encoding === 'WLATIN1') {
let reader = new FileReader()
const self = this
// Closure to capture the file information.
reader.onload = (theFile: any) => {
let encoded = iconv.decode(
Buffer.from(theFile.target.result),
'CP-1252'
)
let blob = new Blob([encoded], { type: fileType })
let encodedFile: File = blobToFile(blob, filename)
uploader.queue.pop()
uploader.addToQueue([encodedFile])
return resolve({
uploader
})
reader.onload = async (fileReaderResponse: any) => {
const wb = await this.xslxStartReading(
fileReaderResponse,
promptExcelPassword,
parseParams.password
)
if (!wb) {
return reject('No workbook found.')
}
this.parseExcelFile(parseParams, wb, uploader, onTableFoundEvent)
.then((response) => {
resolve(response)
})
.catch((err) => {
reject(err)
})
}
reader.readAsArrayBuffer(file)
} else if (fileType.toLowerCase() === 'csv') {
return this.parseCsvFile(parseParams, uploader, fileType)
} else {
let abortMsg =
'Invalid file type "<b>' +
parseParams.file.name +
'</b>". Please upload csv or excel file.'
uploader.queue.pop()
return reject(abortMsg)
}
}
})
}
private parseExcelFile(
parseParams: ParseParams,
workbook: XLSX.WorkBook,
uploader: FileUploader,
onTableFoundEvent?: (info: string) => void
): Promise<ParseResult | undefined> {
return new Promise((resolve, reject) => {
/* save data */
let isComplete: boolean = false
let missingHeaders: MissingHeaders[] = []
const csvArrayHeaders: string[] = [
'_____DELETE__THIS__RECORD_____',
...parseParams.headerArray
]
const searchResult = this.searchDataInExcel(workbook, parseParams)
let csvArrayData: any[] = []
if (searchResult.found) {
isComplete = true
csvArrayData = searchResult.found.arrayData
if (
!searchResult.found.headers.includes('_____delete__this__record_____')
) {
csvArrayData = csvArrayData.map((row: any[]) => {
// Add empty val on start of the column to compensate for _____delete__this__record_____
// when not found in the file
row.unshift({ v: '' })
return row
})
}
if (onTableFoundEvent)
onTableFoundEvent(
`Sheet: ${searchResult.found.sheetName}\nRange: ${searchResult.found.startAddress}:${searchResult.found.endAddress}`
)
} else {
missingHeaders = searchResult.missing || []
}
if (missingHeaders.length > 0) {
let abortMsg = missingHeaders
.map((mh) => {
return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}`
})
.join('\n\n')
uploader.queue.pop()
return reject(abortMsg)
}
// If first row is empty, that means no data has been found
if (csvArrayData.length === 0 || csvArrayData[0].length === 0) {
let abortMsg = 'No relevant data found in File !'
uploader.queue.pop()
return reject(abortMsg)
}
if (
parseParams.dateTimeHeaders.length > 0 ||
parseParams.dateHeaders.length > 0 ||
parseParams.timeHeaders.length > 0
) {
csvArrayData = this.updateDateTimeCols(
csvArrayHeaders,
csvArrayData,
parseParams
)
}
if (parseParams.xlRules.length > 0) {
csvArrayData = this.updateXLRuleCols(
csvArrayHeaders,
csvArrayData,
parseParams
)
}
if (!isComplete) {
let abortMsg = ''
if (missingHeaders.length === 0) {
abortMsg = 'No relevant data found in File !'
} else {
abortMsg = missingHeaders
.map((mh) => {
return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}`
})
.join('\n\n')
}
// abort message is fired, return undefined
uploader.queue.pop()
return reject(abortMsg)
} else {
parseParams.headerShow = csvArrayHeaders
// Remove the metadata from the cells, leave only values
csvArrayData = csvArrayData.map((row: any) =>
row.map((col: any) => {
if (col.t === 'n') {
return col.v
} else {
if (col.w) return col.v
return typeof col.v === 'string' ? col.v.trim() : col.v
}
})
)
csvArrayData = csvArrayData.map((row: any) => {
return row.map((col: any, index: number) => {
if (!col && col !== 0) col = ''
/**
* Keeping this for the reference
* Code below used to convert JSON to CSV
* now the XLSX is converting to CSV
*/
// if (isNaN(col)) {
// // Match and replace the double quotes, ignore the first and last char
// // in case they are double quotes already
// col = col.replace(/(?<!^)"(?!$)/g, '""')
// if (col.search(/,/g) > -1 ||
// col.search(/\r|\n/g) > -1
// ) {
// // Missing quotes at the end
// if (col.search(/"$/g) < 0) {
// col = col + '"' // So we add them
// }
// // Missing quotes at the start
// if (col.search(/^"/g) < 0) {
// col = '"' + col // So we add them
// }
// }
// }
const colName = parseParams.headerShow[index]
const colRule = parseParams.dcValidator?.getRule(colName)
if (colRule?.type === 'numeric') {
if (isSpecialMissing(col) && !col.includes('.')) col = '.' + col
}
return col
})
})
// Apply licence rows limitation if exists, it is only affecting data
// which will be send to SAS
const strippedCsvArrayData = csvArrayData.slice(
0,
this.licenceState.value.submit_rows_limit
)
// To submit to sas service, we need clean version of CSV of file
// attached. XLSX will do the parsing and heavy lifting
// First we create worksheet of json (data we extracted)
let ws = XLSX.utils.json_to_sheet(strippedCsvArrayData, {
skipHeader: true
})
// create CSV to be uploaded from worksheet
let csvContentClean = XLSX.utils.sheet_to_csv(ws)
// Prepend headers
csvContentClean = csvArrayHeaders.join(',') + '\n' + csvContentClean
// Blob from which CSV file will be created depending of the selected
// encoding
let blob: Blob
if (parseParams.encoding === 'WLATIN1') {
// WLATIN1
let encoded = iconv.decode(Buffer.from(csvContentClean), 'CP-1252')
blob = new Blob([encoded], { type: 'application/csv' })
} else {
// UTF-8
blob = new Blob([csvContentClean], { type: 'application/csv' })
}
let newCSVFile: File = blobToFile(blob, parseParams.file.name + '.csv')
uploader.addToQueue([newCSVFile])
}
if (csvArrayData.length === 0) {
return reject(
`Table in the file is empty. Data found on sheet: ${searchResult.found?.sheetName || ''}`
)
}
if (!searchResult.found) {
return reject(
`No relevant data found. 'found' object is empty, unexpected error occurred.`
)
}
const rangeStartAddress = searchResult.found.startAddress || ''
const rangeEndAddress = searchResult.found.endAddress || ''
return resolve({
uploader,
data: csvArrayData,
rangeSheetRes: {
found: !!searchResult.found,
sheetName: searchResult.found.sheetName,
rangeStartAddress: rangeStartAddress,
rangeEndAddress: rangeEndAddress,
rangeAddress: `${rangeStartAddress}:${rangeEndAddress}`,
missingHeaders: missingHeaders
},
headerShow: parseParams.headerShow
})
})
}
private parseCsvFile(
parseParams: ParseParams,
uploader: FileUploader,
fileType: string
) {
return new Promise((resolve, reject) => {
if (this.licenceState.value.submit_rows_limit !== Infinity) {
uploader.queue.pop()
return reject(
'Excel files only. To unlock CSV uploads, please contact support@datacontroller.io'
)
}
if (parseParams.encoding === 'WLATIN1') {
let reader = new FileReader()
const self = this
// Closure to capture the file information.
reader.onload = (theFile: any) => {
let encoded = iconv.decode(
Buffer.from(theFile.target.result),
'CP-1252'
)
let blob = new Blob([encoded], { type: fileType })
let encodedFile: File = blobToFile(blob, parseParams.file.name)
uploader.queue.pop()
uploader.addToQueue([encodedFile])
return resolve({
uploader
})
}
} else {
let abortMsg =
'Invalid file type "<b>' +
filename +
'</b>". Please upload csv or excel file.'
uploader.queue.pop()
return reject(abortMsg)
reader.readAsArrayBuffer(parseParams.file)
} else {
return resolve({
uploader
})
}
})
}
@ -422,6 +408,83 @@ export class SpreadsheetUtil {
return parseFloat((size / (1024 * 1024)).toFixed(2))
}
/**
* Wrapper function for XLSX.read() with integrated 'unlock' functionality
* Used by multi load component to load the file while user chooses the datasets
* to be updated
*
* @param fileReaderResponse response from the file reader
* @param promptExcelPassword password callback
* @param password password provided by the user
* @returns WorkBook
*/
public xslxStartReading(
fileReaderResponse: any,
promptExcelPassword: (options?: OpenOptions) => Promise<string | undefined>,
password?: string
): Promise<XLSX.WorkBook> {
return new Promise(async (resolve, reject) => {
/* read workbook */
const bstr = this.toBstr(fileReaderResponse.target.result)
let wb: XLSX.WorkBook | undefined = undefined
let fileUnlocking: boolean = false
const xlsxOptions: XLSX.ParsingOptions = {
type: 'binary',
cellDates: false,
cellFormula: true,
cellStyles: true,
cellNF: false,
cellText: false,
password: password
}
try {
wb = await this.xlsxRead(bstr, {
...xlsxOptions
})
} catch (err: any) {
if (err.message.toLowerCase().includes('password')) {
fileUnlocking = true
let passwordError = false
while (fileUnlocking) {
const password = await promptExcelPassword({
error: passwordError
})
if (password) {
try {
wb = await this.xlsxRead(bstr, {
...xlsxOptions,
password: password
})
fileUnlocking = false
passwordError = false
} catch (err: any) {
passwordError = true
if (!err.message.toLowerCase().includes('password')) {
fileUnlocking = false
}
}
} else {
fileUnlocking = false
}
}
} else {
return reject('Error reading the file')
}
}
if (!wb) return reject('Failed to parse a workbook')
return resolve(wb)
})
}
/**
* XLSX Read wrapper which uses Web Worker to read the file and not block
* the UI while reading. It will allow reading bigger files.
@ -526,7 +589,7 @@ export class SpreadsheetUtil {
const ws: XLSX.WorkSheet = wb.Sheets[sheetName]
// Find the first header
Object.keys(ws).forEach(wsKey => {
Object.keys(ws).forEach((wsKey) => {
const cellValue = ws[wsKey].v
// If the cell does not have `v` property we ignore it, those are metadata properties
@ -547,15 +610,18 @@ export class SpreadsheetUtil {
})
// If _____delete__this__record_____ is not found in the file, remove it from the array
if (csvArrayHeadersMap['_____delete__this__record_____'] === -1) delete csvArrayHeadersMap['_____delete__this__record_____']
if (csvArrayHeadersMap['_____delete__this__record_____'] === -1)
delete csvArrayHeadersMap['_____delete__this__record_____']
// Parse missing headers, if any, abort the search and jump to next sheet
missingHeaders = Object.keys(csvArrayHeadersMap).filter(header => csvArrayHeadersMap[header] === -1)
missingHeaders = Object.keys(csvArrayHeadersMap).filter(
(header) => csvArrayHeadersMap[header] === -1
)
if (missingHeaders.length > 0) {
missing.push({
sheetName: sheetName,
missingHeaders: missingHeaders.map(header => header.toUpperCase())
missingHeaders: missingHeaders.map((header) => header.toUpperCase())
})
continue
@ -576,13 +642,16 @@ export class SpreadsheetUtil {
return bIsPk - aIsPk
})
foundHeadersSorted.forEach(header => {
foundHeadersSorted.forEach((header) => {
const headerAddress = csvArrayHeadersMap[header]
const headerAddressLetterRegex = headerAddress.match(/\D+/)
const headerAddressNumberRegex = headerAddress.match(/\d+/)
const headerAddressLetter = (headerAddressLetterRegex ? headerAddressLetterRegex[0] : -1) || -1
const headerAddressNumber = parseInt((headerAddressNumberRegex ? headerAddressNumberRegex[0] : -1) || -1)
const headerAddressLetter =
(headerAddressLetterRegex ? headerAddressLetterRegex[0] : -1) || -1
const headerAddressNumber = parseInt(
(headerAddressNumberRegex ? headerAddressNumberRegex[0] : -1) || -1
)
const firstDataRow = headerAddressNumber + 1
@ -604,7 +673,9 @@ export class SpreadsheetUtil {
// Push to array of objects
if (!json[jsonRow]) json.push({})
if (cell) json[jsonRow][header] = typeof cell.v === 'string' ? cell.v.trim() : cell.v
if (cell)
json[jsonRow][header] =
typeof cell.v === 'string' ? cell.v.trim() : cell.v
// Push to array of arrays, but with all cell meta info
if (!arrayData[jsonRow]) arrayData.push([])
@ -636,7 +707,10 @@ export class SpreadsheetUtil {
if (ws[address].w) {
json[jsonRow][header] = ws[address].w
} else {
json[jsonRow][header] = typeof ws[address].v === 'string' ? ws[address].v.trim() : ws[address].v
json[jsonRow][header] =
typeof ws[address].v === 'string'
? ws[address].v.trim()
: ws[address].v
}
}
@ -664,7 +738,7 @@ export class SpreadsheetUtil {
json.forEach((row: any, rowIndex: number) => {
let missingPk = false
parseParams.headerPks.forEach(pkHeader => {
parseParams.headerPks.forEach((pkHeader) => {
if (row[pkHeader.toLowerCase()] === undefined) missingPk = true
})
@ -682,10 +756,12 @@ export class SpreadsheetUtil {
arrayData.splice(firstRowIndexMissingPk, Infinity)
} else {
// Fallback: Remove only rows with missing PK
rowsWithMissingPk.sort((a,b) => b - a).forEach(index => {
json.splice(index, 1)
arrayData.splice(index, 1)
})
rowsWithMissingPk
.sort((a, b) => b - a)
.forEach((index) => {
json.splice(index, 1)
arrayData.splice(index, 1)
})
}
if (!arrayData.length) {