fix(multi load): xlsx read file ahead of time, while user choose datasets
Some checks failed
Build / Build-and-ng-test (pull_request) Failing after 44s
Some checks failed
Build / Build-and-ng-test (pull_request) Failing after 44s
This commit is contained in:
parent
bbb725c64c
commit
6547461637
@ -18,6 +18,7 @@ import {
|
||||
ClarityIcons,
|
||||
exclamationTriangleIcon,
|
||||
moonIcon,
|
||||
processOnVmIcon,
|
||||
sunIcon,
|
||||
tableIcon,
|
||||
trashIcon
|
||||
@ -28,7 +29,8 @@ ClarityIcons.addIcons(
|
||||
sunIcon,
|
||||
exclamationTriangleIcon,
|
||||
tableIcon,
|
||||
trashIcon
|
||||
trashIcon,
|
||||
processOnVmIcon
|
||||
)
|
||||
|
||||
@Component({
|
||||
|
@ -2,11 +2,17 @@ import { DcValidator } from '../shared/dc-validator/dc-validator'
|
||||
import { FileUploadEncoding } from './FileUploadEncoding'
|
||||
import { FileUploader } from './FileUploader.class'
|
||||
import { ExcelRule } from './TableData'
|
||||
import XLSX from 'xlsx'
|
||||
|
||||
export interface ParseParams {
|
||||
file: File
|
||||
password?: string
|
||||
dcValidator: DcValidator
|
||||
/**
|
||||
* If workbook is provided, parse function will not run a XLSX.read()
|
||||
* it will use this property instead. So the client must do a file read beforehand
|
||||
*/
|
||||
workbook?: XLSX.WorkBook,
|
||||
/**
|
||||
* Parse function will manipulate and return the uploader array which can be provided with files already in the queue
|
||||
* Otherwise new empty instance will be created.
|
||||
|
@ -198,14 +198,28 @@
|
||||
*ngIf="!activeParsedDataset"
|
||||
class="no-table-selected pointer-events-none"
|
||||
>
|
||||
<clr-icon
|
||||
shape="warning-standard"
|
||||
size="40"
|
||||
class="is-info icon-dc-fill"
|
||||
></clr-icon>
|
||||
<p class="text-center color-gray mt-10" cds-text="section">
|
||||
Please select a dataset on the left to review the data
|
||||
</p>
|
||||
<ng-container *ngIf="fileLoadingState !== FileLoadingState.parsed">
|
||||
<clr-icon
|
||||
shape="process-on-vm"
|
||||
size="40"
|
||||
class="is-info icon-dc-fill"
|
||||
></clr-icon>
|
||||
|
||||
<p class="text-center color-gray mt-10" cds-text="section">
|
||||
{{ fileLoadingState }}...
|
||||
</p>
|
||||
</ng-container>
|
||||
|
||||
<ng-container *ngIf="fileLoadingState === FileLoadingState.parsed">
|
||||
<clr-icon
|
||||
shape="warning-standard"
|
||||
size="40"
|
||||
class="is-info icon-dc-fill"
|
||||
></clr-icon>
|
||||
<p class="text-center color-gray mt-10" cds-text="section">
|
||||
Please select a dataset on the left to review the data
|
||||
</p>
|
||||
</ng-container>
|
||||
</div>
|
||||
|
||||
<ng-container *ngIf="activeParsedDataset">
|
||||
|
@ -30,6 +30,17 @@ import { UploadFile } from '@sasjs/adapter'
|
||||
import { UploadFileResponse } from '../models/UploadFile'
|
||||
import { RequestWrapperResponse } from '../models/request-wrapper/RequestWrapperResponse'
|
||||
import { ParseResult } from '../models/ParseResult.interface'
|
||||
import XLSX from 'xlsx'
|
||||
|
||||
enum FileLoadingState {
|
||||
reading = 'Reading the file',
|
||||
parsing = 'Searching for the data in the file',
|
||||
parsed = 'Searching for the data finished',
|
||||
/**
|
||||
* Defualt value
|
||||
*/
|
||||
notSelected = 'File not selected'
|
||||
}
|
||||
|
||||
@Component({
|
||||
selector: 'app-multi-dataset',
|
||||
@ -43,6 +54,11 @@ export class MultiDatasetComponent implements OnInit {
|
||||
public licenceState = this.licenceService.licenceState
|
||||
public Infinity = Infinity
|
||||
|
||||
public workbookInterval: any
|
||||
public fileLoadingState: FileLoadingState = FileLoadingState.notSelected
|
||||
|
||||
public FileLoadingState = FileLoadingState
|
||||
|
||||
public hotTableLicenseKey: string | undefined = undefined
|
||||
public hotTableMaxRows =
|
||||
this.licenceState.value.viewer_rows_allowed || Infinity
|
||||
@ -163,7 +179,7 @@ export class MultiDatasetComponent implements OnInit {
|
||||
}
|
||||
}
|
||||
|
||||
onFileChange(event: any) {
|
||||
async onFileChange(event: any) {
|
||||
const files = event?.target?.files || []
|
||||
|
||||
if (files.length < 1) {
|
||||
@ -200,10 +216,17 @@ export class MultiDatasetComponent implements OnInit {
|
||||
// For EXCEL if multiple files, we only take one (the first one)
|
||||
this.selectedFile = event.target.files[0]
|
||||
|
||||
if (this.selectedFile)
|
||||
this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB(
|
||||
this.selectedFile.size
|
||||
)
|
||||
if (this.selectedFile) {
|
||||
this.fileLoadingState = FileLoadingState.reading
|
||||
|
||||
this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB(this.selectedFile.size)
|
||||
|
||||
// Read the excel file to be ready
|
||||
this.spreadsheetService.xlsxReadFile(this.selectedFile!).then(wb => {
|
||||
this.fileLoadingState = FileLoadingState.parsing
|
||||
this.selectedFile!.workbook = wb
|
||||
})
|
||||
}
|
||||
|
||||
this.initUserInputHot()
|
||||
this.onAutoDetectColumns()
|
||||
@ -291,53 +314,56 @@ export class MultiDatasetComponent implements OnInit {
|
||||
})
|
||||
})
|
||||
|
||||
for (let parsedDataset of this.parsedDatasets) {
|
||||
this.spreadsheetService
|
||||
.parseExcelFile({
|
||||
file: this.selectedFile!,
|
||||
password: this.selectedFile!.password || undefined,
|
||||
dcValidator: parsedDataset.datasetInfo.dcValidator!,
|
||||
headerPks: parsedDataset.datasetInfo.headerPks,
|
||||
headerArray: parsedDataset.datasetInfo.headerArray,
|
||||
headerShow: [],
|
||||
timeHeaders: parsedDataset.datasetInfo.timeHeaders,
|
||||
dateHeaders: parsedDataset.datasetInfo.dateHeaders,
|
||||
dateTimeHeaders: parsedDataset.datasetInfo.dateTimeHeaders,
|
||||
xlRules: parsedDataset.datasetInfo.xlRules
|
||||
})
|
||||
.then((parseResult: ParseResult | undefined) => {
|
||||
console.log('parseResult', parseResult)
|
||||
this.workbookLoaded().then(workbook => {
|
||||
for (let parsedDataset of this.parsedDatasets) {
|
||||
this.spreadsheetService
|
||||
.parseExcelFile({
|
||||
file: this.selectedFile!,
|
||||
workbook: workbook,
|
||||
password: this.selectedFile!.password || undefined,
|
||||
dcValidator: parsedDataset.datasetInfo.dcValidator!,
|
||||
headerPks: parsedDataset.datasetInfo.headerPks,
|
||||
headerArray: parsedDataset.datasetInfo.headerArray,
|
||||
headerShow: [],
|
||||
timeHeaders: parsedDataset.datasetInfo.timeHeaders,
|
||||
dateHeaders: parsedDataset.datasetInfo.dateHeaders,
|
||||
dateTimeHeaders: parsedDataset.datasetInfo.dateTimeHeaders,
|
||||
xlRules: parsedDataset.datasetInfo.xlRules
|
||||
})
|
||||
.then((parseResult: ParseResult | undefined) => {
|
||||
this.fileLoadingState = FileLoadingState.parsed
|
||||
|
||||
if (parseResult && parseResult.data) {
|
||||
let datasource: any[] = []
|
||||
if (parseResult && parseResult.data) {
|
||||
let datasource: any[] = []
|
||||
|
||||
parseResult.data.map((item) => {
|
||||
let itemObject: any = {}
|
||||
parseResult.data.map((item) => {
|
||||
let itemObject: any = {}
|
||||
|
||||
parseResult.headerShow!.map((header: any, index: number) => {
|
||||
itemObject[header] = item[index]
|
||||
parseResult.headerShow!.map((header: any, index: number) => {
|
||||
itemObject[header] = item[index]
|
||||
})
|
||||
|
||||
// If Delete? column is not set in the file, we set it to NO
|
||||
if (!itemObject['_____DELETE__THIS__RECORD_____'])
|
||||
itemObject['_____DELETE__THIS__RECORD_____'] = 'No'
|
||||
|
||||
datasource.push(itemObject)
|
||||
})
|
||||
|
||||
// If Delete? column is not set in the file, we set it to NO
|
||||
if (!itemObject['_____DELETE__THIS__RECORD_____'])
|
||||
itemObject['_____DELETE__THIS__RECORD_____'] = 'No'
|
||||
parsedDataset.datasource = datasource
|
||||
parsedDataset.parseResult = parseResult
|
||||
parsedDataset.parsingTable = false
|
||||
}
|
||||
})
|
||||
.catch((error: string) => {
|
||||
console.warn('Parsing excel file error.', error)
|
||||
|
||||
datasource.push(itemObject)
|
||||
})
|
||||
|
||||
parsedDataset.datasource = datasource
|
||||
parsedDataset.parseResult = parseResult
|
||||
parsedDataset.datasource = []
|
||||
parsedDataset.includeInSubmission = false
|
||||
parsedDataset.parsingTable = false
|
||||
}
|
||||
})
|
||||
.catch((error: string) => {
|
||||
console.warn('Parsing excel file error.', error)
|
||||
|
||||
parsedDataset.datasource = []
|
||||
parsedDataset.includeInSubmission = false
|
||||
parsedDataset.parsingTable = false
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@ -826,6 +852,23 @@ export class MultiDatasetComponent implements OnInit {
|
||||
if (newSubmittedDataset) newSubmittedDataset.active = true
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns Promise once workbook is loaded because use XLSX.read in the background
|
||||
*/
|
||||
private workbookLoaded(): Promise<XLSX.WorkBook> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!this.selectedFile) reject('No file selected')
|
||||
|
||||
this.workbookInterval = setInterval(() => {
|
||||
if (this.selectedFile!.workbook) {
|
||||
clearInterval(this.workbookInterval)
|
||||
resolve(this.selectedFile!.workbook)
|
||||
}
|
||||
}, 500)
|
||||
})
|
||||
}
|
||||
|
||||
private parseDatasetFromCsvName(fileName: string) {
|
||||
const fileNameArr = fileName.split('.')
|
||||
fileNameArr.pop()
|
||||
@ -1044,4 +1087,5 @@ export interface SubmittedCsvDatasetResult {
|
||||
export interface SelectedFile extends File {
|
||||
sizeMB?: number
|
||||
password?: string
|
||||
workbook?: XLSX.WorkBook
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ export class SpreadsheetService {
|
||||
licenceState: this.licenceState
|
||||
})
|
||||
|
||||
return spreadSheetUtil.parseExcelFile(
|
||||
return spreadSheetUtil.parseSpreadsheetFile(
|
||||
parseParams,
|
||||
this.promptExcelPassword,
|
||||
onParseStateChange,
|
||||
@ -38,6 +38,37 @@ export class SpreadsheetService {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the excel file using the XLSX.read() function
|
||||
* If possible, function will use the web worker to read it in background thread
|
||||
* otherwise fallback method will be used
|
||||
*
|
||||
* @param file selected in an <input>
|
||||
* @returns WorkBook
|
||||
*/
|
||||
public xlsxReadFile(file: any): Promise<XLSX.WorkBook> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const spreadSheetUtil = new SpreadsheetUtil({
|
||||
licenceState: this.licenceState
|
||||
})
|
||||
|
||||
let reader: FileReader = new FileReader()
|
||||
|
||||
reader.onload = (fileReaderResponse: any) => {
|
||||
spreadSheetUtil.xslxStartReading(
|
||||
fileReaderResponse,
|
||||
this.promptExcelPassword
|
||||
).then(response => {
|
||||
resolve(response)
|
||||
}).catch(err => {
|
||||
reject(err)
|
||||
})
|
||||
}
|
||||
|
||||
reader.readAsArrayBuffer(file)
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the file minimally just to get the sheet names, not reading full file
|
||||
* to help boost the performance
|
||||
|
@ -47,8 +47,13 @@ export class SpreadsheetUtil {
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses attached file and searches fo the matching data
|
||||
* Parses attached file and searches for the matching data
|
||||
*
|
||||
* If CSV is provided no searching of the data will be executed, but csv file
|
||||
* returned back in an FileUploader array
|
||||
*
|
||||
* @param promptExcelPassword used to trigger the modal for password input
|
||||
* when provided file is locked
|
||||
* @param parseParams params required for parsing the file
|
||||
* @param onParseStateChange callback used to inform about parsing state
|
||||
* so the user of the function can update the UI with latest info
|
||||
@ -56,364 +61,345 @@ export class SpreadsheetUtil {
|
||||
*
|
||||
* @returns parsed list of files to upload and JSON data ready for HOT usage
|
||||
*/
|
||||
public parseExcelFile(
|
||||
public parseSpreadsheetFile(
|
||||
parseParams: ParseParams,
|
||||
promptExcelPassword: (options?: OpenOptions) => Promise<string | undefined>,
|
||||
onParseStateChange?: (uploadState: string) => void,
|
||||
onTableFoundEvent?: (info: string) => void
|
||||
): Promise<ParseResult | undefined> {
|
||||
return new Promise((resolve, reject) => {
|
||||
let data: any[] = []
|
||||
const uploader: FileUploader = parseParams.uploader || new FileUploader()
|
||||
|
||||
const file: File = parseParams.file
|
||||
const filename = file.name
|
||||
|
||||
if (!parseParams.encoding) parseParams.encoding = 'UTF-8'
|
||||
|
||||
if (onParseStateChange)
|
||||
onParseStateChange(`Loading ${filename} into the browser`)
|
||||
|
||||
let foundData = {
|
||||
sheet: ''
|
||||
}
|
||||
|
||||
let fileType = filename.slice(
|
||||
filename.lastIndexOf('.') + 1,
|
||||
filename.lastIndexOf('.') + 4
|
||||
)
|
||||
|
||||
if (fileType.toLowerCase() === 'xls') {
|
||||
let reader: FileReader = new FileReader()
|
||||
|
||||
const self = this
|
||||
reader.onload = async (theFile: any) => {
|
||||
/* read workbook */
|
||||
const bstr = this.toBstr(theFile.target.result)
|
||||
let wb: XLSX.WorkBook | undefined = undefined
|
||||
let fileUnlocking: boolean = false
|
||||
|
||||
const xlsxOptions: XLSX.ParsingOptions = {
|
||||
type: 'binary',
|
||||
cellDates: false,
|
||||
cellFormula: true,
|
||||
cellStyles: true,
|
||||
cellNF: false,
|
||||
cellText: false,
|
||||
password: parseParams.password
|
||||
}
|
||||
|
||||
try {
|
||||
wb = await this.xlsxRead(bstr, {
|
||||
...xlsxOptions
|
||||
})
|
||||
} catch (err: any) {
|
||||
if (err.message.toLowerCase().includes('password')) {
|
||||
fileUnlocking = true
|
||||
|
||||
let passwordError = false
|
||||
|
||||
while (fileUnlocking) {
|
||||
const password = await promptExcelPassword({
|
||||
error: passwordError
|
||||
})
|
||||
|
||||
if (password) {
|
||||
try {
|
||||
wb = await this.xlsxRead(bstr, {
|
||||
...xlsxOptions,
|
||||
password: password
|
||||
})
|
||||
|
||||
fileUnlocking = false
|
||||
passwordError = false
|
||||
} catch (err: any) {
|
||||
passwordError = true
|
||||
|
||||
if (!err.message.toLowerCase().includes('password')) {
|
||||
fileUnlocking = false
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fileUnlocking = false
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return reject('Error reading the file')
|
||||
}
|
||||
}
|
||||
|
||||
if (!wb) {
|
||||
return reject('No workbook found.')
|
||||
}
|
||||
|
||||
/* save data */
|
||||
let isComplete: boolean = false
|
||||
let missingHeaders: MissingHeaders[] = []
|
||||
|
||||
const csvArrayHeaders: string[] = [
|
||||
'_____DELETE__THIS__RECORD_____',
|
||||
...parseParams.headerArray
|
||||
]
|
||||
|
||||
let csvArrayHeadersLower = csvArrayHeaders.map((x) => x.toLowerCase())
|
||||
let csvArrayHeadersMap = csvArrayHeadersLower.reduce(
|
||||
(map: any, obj: string) => {
|
||||
map[obj] = -1
|
||||
return map
|
||||
},
|
||||
{}
|
||||
)
|
||||
|
||||
const searchResult = this.searchDataInExcel(
|
||||
wb,
|
||||
parseParams
|
||||
)
|
||||
|
||||
let csvArrayData: any[] = []
|
||||
|
||||
if (searchResult.found) {
|
||||
isComplete = true
|
||||
csvArrayData = searchResult.found.arrayData
|
||||
|
||||
if (!searchResult.found.headers.includes('_____delete__this__record_____')) {
|
||||
csvArrayData = csvArrayData.map((row: any[]) => {
|
||||
// Add empty val on start of the column to compensate for _____delete__this__record_____
|
||||
// when not found in the file
|
||||
row.unshift({ v: '' })
|
||||
|
||||
return row
|
||||
})
|
||||
}
|
||||
|
||||
if (onTableFoundEvent)
|
||||
onTableFoundEvent(
|
||||
`Sheet: ${searchResult.found.sheetName}\nRange: ${searchResult.found.startAddress}:${searchResult.found.endAddress}`
|
||||
)
|
||||
} else {
|
||||
missingHeaders = searchResult.missing || []
|
||||
}
|
||||
|
||||
if (missingHeaders.length > 0) {
|
||||
let abortMsg = missingHeaders.map(mh => {
|
||||
return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}`
|
||||
}).join('\n\n')
|
||||
|
||||
uploader.queue.pop()
|
||||
return reject(abortMsg)
|
||||
}
|
||||
|
||||
// If first row is empty, that means no data has been found
|
||||
if (csvArrayData.length === 0 || csvArrayData[0].length === 0) {
|
||||
let abortMsg = 'No relevant data found in File !'
|
||||
|
||||
uploader.queue.pop()
|
||||
return reject(abortMsg)
|
||||
}
|
||||
|
||||
if (
|
||||
parseParams.dateTimeHeaders.length > 0 ||
|
||||
parseParams.dateHeaders.length > 0 ||
|
||||
parseParams.timeHeaders.length > 0
|
||||
) {
|
||||
csvArrayData = this.updateDateTimeCols(
|
||||
csvArrayHeaders,
|
||||
csvArrayData,
|
||||
parseParams
|
||||
)
|
||||
}
|
||||
|
||||
if (parseParams.xlRules.length > 0) {
|
||||
csvArrayData = this.updateXLRuleCols(
|
||||
csvArrayHeaders,
|
||||
csvArrayData,
|
||||
parseParams
|
||||
)
|
||||
}
|
||||
|
||||
if (!isComplete) {
|
||||
let abortMsg = ''
|
||||
|
||||
if (missingHeaders.length === 0) {
|
||||
abortMsg = 'No relevant data found in File !'
|
||||
} else {
|
||||
abortMsg = missingHeaders.map(mh => {
|
||||
return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}`
|
||||
}).join('\n\n')
|
||||
}
|
||||
|
||||
// abort message is fired, return undefined
|
||||
uploader.queue.pop()
|
||||
return reject(abortMsg)
|
||||
} else {
|
||||
parseParams.headerShow = csvArrayHeaders
|
||||
|
||||
// Remove the metadata from the cells, leave only values
|
||||
csvArrayData = csvArrayData.map((row: any) =>
|
||||
row.map((col: any) => {
|
||||
if (col.t === 'n') {
|
||||
return col.v
|
||||
} else {
|
||||
if (col.w) return col.v
|
||||
|
||||
return typeof col.v === 'string' ? col.v.trim() : col.v
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
csvArrayData = csvArrayData.map((row: any) => {
|
||||
return row.map((col: any, index: number) => {
|
||||
if (!col && col !== 0) col = ''
|
||||
|
||||
/**
|
||||
* Keeping this for the reference
|
||||
* Code below used to convert JSON to CSV
|
||||
* now the XLSX is converting to CSV
|
||||
*/
|
||||
// if (isNaN(col)) {
|
||||
// // Match and replace the double quotes, ignore the first and last char
|
||||
// // in case they are double quotes already
|
||||
// col = col.replace(/(?<!^)"(?!$)/g, '""')
|
||||
|
||||
// if (col.search(/,/g) > -1 ||
|
||||
// col.search(/\r|\n/g) > -1
|
||||
// ) {
|
||||
// // Missing quotes at the end
|
||||
// if (col.search(/"$/g) < 0) {
|
||||
// col = col + '"' // So we add them
|
||||
// }
|
||||
|
||||
// // Missing quotes at the start
|
||||
// if (col.search(/^"/g) < 0) {
|
||||
// col = '"' + col // So we add them
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
const colName = parseParams.headerShow[index]
|
||||
const colRule = parseParams.dcValidator?.getRule(colName)
|
||||
|
||||
if (colRule?.type === 'numeric') {
|
||||
if (isSpecialMissing(col) && !col.includes('.'))
|
||||
col = '.' + col
|
||||
}
|
||||
|
||||
return col
|
||||
})
|
||||
})
|
||||
|
||||
data = csvArrayData
|
||||
|
||||
// Apply licence rows limitation if exists, it is only affecting data
|
||||
// which will be send to SAS
|
||||
const strippedCsvArrayData = csvArrayData.slice(
|
||||
0,
|
||||
this.licenceState.value.submit_rows_limit
|
||||
)
|
||||
// To submit to sas service, we need clean version of CSV of file
|
||||
// attached. XLSX will do the parsing and heavy lifting
|
||||
// First we create worksheet of json (data we extracted)
|
||||
let ws = XLSX.utils.json_to_sheet(strippedCsvArrayData, {
|
||||
skipHeader: true
|
||||
})
|
||||
|
||||
// create CSV to be uploaded from worksheet
|
||||
let csvContentClean = XLSX.utils.sheet_to_csv(ws)
|
||||
// Prepend headers
|
||||
csvContentClean = csvArrayHeaders.join(',') + '\n' + csvContentClean
|
||||
|
||||
// Blob from which CSV file will be created depending of the selected
|
||||
// encoding
|
||||
let blob: Blob
|
||||
|
||||
if (parseParams.encoding === 'WLATIN1') {
|
||||
// WLATIN1
|
||||
let encoded = iconv.decode(
|
||||
Buffer.from(csvContentClean),
|
||||
'CP-1252'
|
||||
)
|
||||
blob = new Blob([encoded], { type: 'application/csv' })
|
||||
} else {
|
||||
// UTF-8
|
||||
blob = new Blob([csvContentClean], { type: 'application/csv' })
|
||||
}
|
||||
|
||||
let newCSVFile: File = blobToFile(blob, filename + '.csv')
|
||||
uploader.addToQueue([newCSVFile])
|
||||
}
|
||||
|
||||
if (data.length === 0) {
|
||||
return reject(
|
||||
`Table in the file is empty. Data found on sheet: ${foundData.sheet}`
|
||||
)
|
||||
}
|
||||
|
||||
if (!searchResult.found) {
|
||||
return reject(
|
||||
`No relevant data found. 'found' object is empty, unexpected error occurred.`
|
||||
)
|
||||
}
|
||||
|
||||
const rangeStartAddress = searchResult.found.startAddress || ''
|
||||
const rangeEndAddress = searchResult.found.endAddress || ''
|
||||
|
||||
return resolve({
|
||||
uploader,
|
||||
data: csvArrayData,
|
||||
rangeSheetRes: {
|
||||
found: !!searchResult.found,
|
||||
sheetName: searchResult.found.sheetName,
|
||||
rangeStartAddress: rangeStartAddress,
|
||||
rangeEndAddress: rangeEndAddress,
|
||||
rangeAddress: `${rangeStartAddress}:${rangeEndAddress}`,
|
||||
missingHeaders: missingHeaders,
|
||||
},
|
||||
headerShow: parseParams.headerShow
|
||||
// If workbook is present it means file is already read and we don't need
|
||||
// to read it again, otherwise we will do a XLSX.read()
|
||||
if (parseParams.workbook) {
|
||||
this.parseExcelFile(
|
||||
parseParams,
|
||||
parseParams.workbook,
|
||||
uploader,
|
||||
onTableFoundEvent
|
||||
)
|
||||
.then((response) => {
|
||||
resolve(response)
|
||||
})
|
||||
}
|
||||
.catch((err) => {
|
||||
reject(err)
|
||||
})
|
||||
} else {
|
||||
// File is not read so we must do a XLSX.read()
|
||||
let data: any[] = []
|
||||
|
||||
reader.readAsArrayBuffer(file)
|
||||
} else if (fileType.toLowerCase() === 'csv') {
|
||||
if (this.licenceState.value.submit_rows_limit !== Infinity) {
|
||||
uploader.queue.pop()
|
||||
return reject(
|
||||
'Excel files only. To unlock CSV uploads, please contact support@datacontroller.io'
|
||||
)
|
||||
}
|
||||
const file: File = parseParams.file
|
||||
|
||||
if (!parseParams.encoding) parseParams.encoding = 'UTF-8'
|
||||
|
||||
if (onParseStateChange)
|
||||
onParseStateChange(`Loading ${file.name} into the browser`)
|
||||
|
||||
let fileType = file.name.slice(
|
||||
file.name.lastIndexOf('.') + 1,
|
||||
file.name.lastIndexOf('.') + 4
|
||||
)
|
||||
|
||||
if (fileType.toLowerCase() === 'xls') {
|
||||
let reader: FileReader = new FileReader()
|
||||
|
||||
if (parseParams.encoding === 'WLATIN1') {
|
||||
let reader = new FileReader()
|
||||
const self = this
|
||||
// Closure to capture the file information.
|
||||
reader.onload = (theFile: any) => {
|
||||
let encoded = iconv.decode(
|
||||
Buffer.from(theFile.target.result),
|
||||
'CP-1252'
|
||||
)
|
||||
let blob = new Blob([encoded], { type: fileType })
|
||||
let encodedFile: File = blobToFile(blob, filename)
|
||||
uploader.queue.pop()
|
||||
uploader.addToQueue([encodedFile])
|
||||
|
||||
return resolve({
|
||||
uploader
|
||||
})
|
||||
reader.onload = async (fileReaderResponse: any) => {
|
||||
const wb = await this.xslxStartReading(
|
||||
fileReaderResponse,
|
||||
promptExcelPassword,
|
||||
parseParams.password
|
||||
)
|
||||
|
||||
if (!wb) {
|
||||
return reject('No workbook found.')
|
||||
}
|
||||
|
||||
this.parseExcelFile(parseParams, wb, uploader, onTableFoundEvent)
|
||||
.then((response) => {
|
||||
resolve(response)
|
||||
})
|
||||
.catch((err) => {
|
||||
reject(err)
|
||||
})
|
||||
}
|
||||
|
||||
reader.readAsArrayBuffer(file)
|
||||
} else if (fileType.toLowerCase() === 'csv') {
|
||||
return this.parseCsvFile(parseParams, uploader, fileType)
|
||||
} else {
|
||||
let abortMsg =
|
||||
'Invalid file type "<b>' +
|
||||
parseParams.file.name +
|
||||
'</b>". Please upload csv or excel file.'
|
||||
|
||||
uploader.queue.pop()
|
||||
return reject(abortMsg)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
private parseExcelFile(
|
||||
parseParams: ParseParams,
|
||||
workbook: XLSX.WorkBook,
|
||||
uploader: FileUploader,
|
||||
onTableFoundEvent?: (info: string) => void
|
||||
): Promise<ParseResult | undefined> {
|
||||
return new Promise((resolve, reject) => {
|
||||
/* save data */
|
||||
let isComplete: boolean = false
|
||||
let missingHeaders: MissingHeaders[] = []
|
||||
|
||||
const csvArrayHeaders: string[] = [
|
||||
'_____DELETE__THIS__RECORD_____',
|
||||
...parseParams.headerArray
|
||||
]
|
||||
|
||||
const searchResult = this.searchDataInExcel(workbook, parseParams)
|
||||
|
||||
let csvArrayData: any[] = []
|
||||
|
||||
if (searchResult.found) {
|
||||
isComplete = true
|
||||
csvArrayData = searchResult.found.arrayData
|
||||
|
||||
if (
|
||||
!searchResult.found.headers.includes('_____delete__this__record_____')
|
||||
) {
|
||||
csvArrayData = csvArrayData.map((row: any[]) => {
|
||||
// Add empty val on start of the column to compensate for _____delete__this__record_____
|
||||
// when not found in the file
|
||||
row.unshift({ v: '' })
|
||||
|
||||
return row
|
||||
})
|
||||
}
|
||||
|
||||
if (onTableFoundEvent)
|
||||
onTableFoundEvent(
|
||||
`Sheet: ${searchResult.found.sheetName}\nRange: ${searchResult.found.startAddress}:${searchResult.found.endAddress}`
|
||||
)
|
||||
} else {
|
||||
missingHeaders = searchResult.missing || []
|
||||
}
|
||||
|
||||
if (missingHeaders.length > 0) {
|
||||
let abortMsg = missingHeaders
|
||||
.map((mh) => {
|
||||
return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}`
|
||||
})
|
||||
.join('\n\n')
|
||||
|
||||
uploader.queue.pop()
|
||||
return reject(abortMsg)
|
||||
}
|
||||
|
||||
// If first row is empty, that means no data has been found
|
||||
if (csvArrayData.length === 0 || csvArrayData[0].length === 0) {
|
||||
let abortMsg = 'No relevant data found in File !'
|
||||
|
||||
uploader.queue.pop()
|
||||
return reject(abortMsg)
|
||||
}
|
||||
|
||||
if (
|
||||
parseParams.dateTimeHeaders.length > 0 ||
|
||||
parseParams.dateHeaders.length > 0 ||
|
||||
parseParams.timeHeaders.length > 0
|
||||
) {
|
||||
csvArrayData = this.updateDateTimeCols(
|
||||
csvArrayHeaders,
|
||||
csvArrayData,
|
||||
parseParams
|
||||
)
|
||||
}
|
||||
|
||||
if (parseParams.xlRules.length > 0) {
|
||||
csvArrayData = this.updateXLRuleCols(
|
||||
csvArrayHeaders,
|
||||
csvArrayData,
|
||||
parseParams
|
||||
)
|
||||
}
|
||||
|
||||
if (!isComplete) {
|
||||
let abortMsg = ''
|
||||
|
||||
if (missingHeaders.length === 0) {
|
||||
abortMsg = 'No relevant data found in File !'
|
||||
} else {
|
||||
abortMsg = missingHeaders
|
||||
.map((mh) => {
|
||||
return `Sheet: ${mh.sheetName}.\nMissing columns: ${mh.missingHeaders.join(',')}`
|
||||
})
|
||||
.join('\n\n')
|
||||
}
|
||||
|
||||
// abort message is fired, return undefined
|
||||
uploader.queue.pop()
|
||||
return reject(abortMsg)
|
||||
} else {
|
||||
parseParams.headerShow = csvArrayHeaders
|
||||
|
||||
// Remove the metadata from the cells, leave only values
|
||||
csvArrayData = csvArrayData.map((row: any) =>
|
||||
row.map((col: any) => {
|
||||
if (col.t === 'n') {
|
||||
return col.v
|
||||
} else {
|
||||
if (col.w) return col.v
|
||||
|
||||
return typeof col.v === 'string' ? col.v.trim() : col.v
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
csvArrayData = csvArrayData.map((row: any) => {
|
||||
return row.map((col: any, index: number) => {
|
||||
if (!col && col !== 0) col = ''
|
||||
|
||||
/**
|
||||
* Keeping this for the reference
|
||||
* Code below used to convert JSON to CSV
|
||||
* now the XLSX is converting to CSV
|
||||
*/
|
||||
// if (isNaN(col)) {
|
||||
// // Match and replace the double quotes, ignore the first and last char
|
||||
// // in case they are double quotes already
|
||||
// col = col.replace(/(?<!^)"(?!$)/g, '""')
|
||||
|
||||
// if (col.search(/,/g) > -1 ||
|
||||
// col.search(/\r|\n/g) > -1
|
||||
// ) {
|
||||
// // Missing quotes at the end
|
||||
// if (col.search(/"$/g) < 0) {
|
||||
// col = col + '"' // So we add them
|
||||
// }
|
||||
|
||||
// // Missing quotes at the start
|
||||
// if (col.search(/^"/g) < 0) {
|
||||
// col = '"' + col // So we add them
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
const colName = parseParams.headerShow[index]
|
||||
const colRule = parseParams.dcValidator?.getRule(colName)
|
||||
|
||||
if (colRule?.type === 'numeric') {
|
||||
if (isSpecialMissing(col) && !col.includes('.')) col = '.' + col
|
||||
}
|
||||
|
||||
return col
|
||||
})
|
||||
})
|
||||
|
||||
// Apply licence rows limitation if exists, it is only affecting data
|
||||
// which will be send to SAS
|
||||
const strippedCsvArrayData = csvArrayData.slice(
|
||||
0,
|
||||
this.licenceState.value.submit_rows_limit
|
||||
)
|
||||
// To submit to sas service, we need clean version of CSV of file
|
||||
// attached. XLSX will do the parsing and heavy lifting
|
||||
// First we create worksheet of json (data we extracted)
|
||||
let ws = XLSX.utils.json_to_sheet(strippedCsvArrayData, {
|
||||
skipHeader: true
|
||||
})
|
||||
|
||||
// create CSV to be uploaded from worksheet
|
||||
let csvContentClean = XLSX.utils.sheet_to_csv(ws)
|
||||
// Prepend headers
|
||||
csvContentClean = csvArrayHeaders.join(',') + '\n' + csvContentClean
|
||||
|
||||
// Blob from which CSV file will be created depending of the selected
|
||||
// encoding
|
||||
let blob: Blob
|
||||
|
||||
if (parseParams.encoding === 'WLATIN1') {
|
||||
// WLATIN1
|
||||
let encoded = iconv.decode(Buffer.from(csvContentClean), 'CP-1252')
|
||||
blob = new Blob([encoded], { type: 'application/csv' })
|
||||
} else {
|
||||
// UTF-8
|
||||
blob = new Blob([csvContentClean], { type: 'application/csv' })
|
||||
}
|
||||
|
||||
let newCSVFile: File = blobToFile(blob, parseParams.file.name + '.csv')
|
||||
uploader.addToQueue([newCSVFile])
|
||||
}
|
||||
|
||||
if (csvArrayData.length === 0) {
|
||||
return reject(
|
||||
`Table in the file is empty. Data found on sheet: ${searchResult.found?.sheetName || ''}`
|
||||
)
|
||||
}
|
||||
|
||||
if (!searchResult.found) {
|
||||
return reject(
|
||||
`No relevant data found. 'found' object is empty, unexpected error occurred.`
|
||||
)
|
||||
}
|
||||
|
||||
const rangeStartAddress = searchResult.found.startAddress || ''
|
||||
const rangeEndAddress = searchResult.found.endAddress || ''
|
||||
|
||||
return resolve({
|
||||
uploader,
|
||||
data: csvArrayData,
|
||||
rangeSheetRes: {
|
||||
found: !!searchResult.found,
|
||||
sheetName: searchResult.found.sheetName,
|
||||
rangeStartAddress: rangeStartAddress,
|
||||
rangeEndAddress: rangeEndAddress,
|
||||
rangeAddress: `${rangeStartAddress}:${rangeEndAddress}`,
|
||||
missingHeaders: missingHeaders
|
||||
},
|
||||
headerShow: parseParams.headerShow
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
private parseCsvFile(
|
||||
parseParams: ParseParams,
|
||||
uploader: FileUploader,
|
||||
fileType: string
|
||||
) {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (this.licenceState.value.submit_rows_limit !== Infinity) {
|
||||
uploader.queue.pop()
|
||||
return reject(
|
||||
'Excel files only. To unlock CSV uploads, please contact support@datacontroller.io'
|
||||
)
|
||||
}
|
||||
|
||||
if (parseParams.encoding === 'WLATIN1') {
|
||||
let reader = new FileReader()
|
||||
const self = this
|
||||
// Closure to capture the file information.
|
||||
reader.onload = (theFile: any) => {
|
||||
let encoded = iconv.decode(
|
||||
Buffer.from(theFile.target.result),
|
||||
'CP-1252'
|
||||
)
|
||||
let blob = new Blob([encoded], { type: fileType })
|
||||
let encodedFile: File = blobToFile(blob, parseParams.file.name)
|
||||
uploader.queue.pop()
|
||||
uploader.addToQueue([encodedFile])
|
||||
|
||||
return resolve({
|
||||
uploader
|
||||
})
|
||||
}
|
||||
} else {
|
||||
let abortMsg =
|
||||
'Invalid file type "<b>' +
|
||||
filename +
|
||||
'</b>". Please upload csv or excel file.'
|
||||
|
||||
uploader.queue.pop()
|
||||
return reject(abortMsg)
|
||||
reader.readAsArrayBuffer(parseParams.file)
|
||||
} else {
|
||||
return resolve({
|
||||
uploader
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
@ -422,6 +408,83 @@ export class SpreadsheetUtil {
|
||||
return parseFloat((size / (1024 * 1024)).toFixed(2))
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper function for XLSX.read() with integrated 'unlock' functionality
|
||||
* Used by multi load component to load the file while user chooses the datasets
|
||||
* to be updated
|
||||
*
|
||||
* @param fileReaderResponse response from the file reader
|
||||
* @param promptExcelPassword password callback
|
||||
* @param password password provided by the user
|
||||
* @returns WorkBook
|
||||
*/
|
||||
public xslxStartReading(
|
||||
fileReaderResponse: any,
|
||||
promptExcelPassword: (options?: OpenOptions) => Promise<string | undefined>,
|
||||
password?: string
|
||||
): Promise<XLSX.WorkBook> {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
/* read workbook */
|
||||
const bstr = this.toBstr(fileReaderResponse.target.result)
|
||||
let wb: XLSX.WorkBook | undefined = undefined
|
||||
let fileUnlocking: boolean = false
|
||||
|
||||
const xlsxOptions: XLSX.ParsingOptions = {
|
||||
type: 'binary',
|
||||
cellDates: false,
|
||||
cellFormula: true,
|
||||
cellStyles: true,
|
||||
cellNF: false,
|
||||
cellText: false,
|
||||
password: password
|
||||
}
|
||||
|
||||
try {
|
||||
wb = await this.xlsxRead(bstr, {
|
||||
...xlsxOptions
|
||||
})
|
||||
} catch (err: any) {
|
||||
if (err.message.toLowerCase().includes('password')) {
|
||||
fileUnlocking = true
|
||||
|
||||
let passwordError = false
|
||||
|
||||
while (fileUnlocking) {
|
||||
const password = await promptExcelPassword({
|
||||
error: passwordError
|
||||
})
|
||||
|
||||
if (password) {
|
||||
try {
|
||||
wb = await this.xlsxRead(bstr, {
|
||||
...xlsxOptions,
|
||||
password: password
|
||||
})
|
||||
|
||||
fileUnlocking = false
|
||||
passwordError = false
|
||||
} catch (err: any) {
|
||||
passwordError = true
|
||||
|
||||
if (!err.message.toLowerCase().includes('password')) {
|
||||
fileUnlocking = false
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fileUnlocking = false
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return reject('Error reading the file')
|
||||
}
|
||||
}
|
||||
|
||||
if (!wb) return reject('Failed to parse a workbook')
|
||||
|
||||
return resolve(wb)
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* XLSX Read wrapper which uses Web Worker to read the file and not block
|
||||
* the UI while reading. It will allow reading bigger files.
|
||||
@ -526,7 +589,7 @@ export class SpreadsheetUtil {
|
||||
const ws: XLSX.WorkSheet = wb.Sheets[sheetName]
|
||||
|
||||
// Find the first header
|
||||
Object.keys(ws).forEach(wsKey => {
|
||||
Object.keys(ws).forEach((wsKey) => {
|
||||
const cellValue = ws[wsKey].v
|
||||
|
||||
// If the cell does not have `v` property we ignore it, those are metadata properties
|
||||
@ -547,15 +610,18 @@ export class SpreadsheetUtil {
|
||||
})
|
||||
|
||||
// If _____delete__this__record_____ is not found in the file, remove it from the array
|
||||
if (csvArrayHeadersMap['_____delete__this__record_____'] === -1) delete csvArrayHeadersMap['_____delete__this__record_____']
|
||||
if (csvArrayHeadersMap['_____delete__this__record_____'] === -1)
|
||||
delete csvArrayHeadersMap['_____delete__this__record_____']
|
||||
|
||||
// Parse missing headers, if any, abort the search and jump to next sheet
|
||||
missingHeaders = Object.keys(csvArrayHeadersMap).filter(header => csvArrayHeadersMap[header] === -1)
|
||||
missingHeaders = Object.keys(csvArrayHeadersMap).filter(
|
||||
(header) => csvArrayHeadersMap[header] === -1
|
||||
)
|
||||
|
||||
if (missingHeaders.length > 0) {
|
||||
missing.push({
|
||||
sheetName: sheetName,
|
||||
missingHeaders: missingHeaders.map(header => header.toUpperCase())
|
||||
missingHeaders: missingHeaders.map((header) => header.toUpperCase())
|
||||
})
|
||||
|
||||
continue
|
||||
@ -576,13 +642,16 @@ export class SpreadsheetUtil {
|
||||
return bIsPk - aIsPk
|
||||
})
|
||||
|
||||
foundHeadersSorted.forEach(header => {
|
||||
foundHeadersSorted.forEach((header) => {
|
||||
const headerAddress = csvArrayHeadersMap[header]
|
||||
const headerAddressLetterRegex = headerAddress.match(/\D+/)
|
||||
const headerAddressNumberRegex = headerAddress.match(/\d+/)
|
||||
|
||||
const headerAddressLetter = (headerAddressLetterRegex ? headerAddressLetterRegex[0] : -1) || -1
|
||||
const headerAddressNumber = parseInt((headerAddressNumberRegex ? headerAddressNumberRegex[0] : -1) || -1)
|
||||
const headerAddressLetter =
|
||||
(headerAddressLetterRegex ? headerAddressLetterRegex[0] : -1) || -1
|
||||
const headerAddressNumber = parseInt(
|
||||
(headerAddressNumberRegex ? headerAddressNumberRegex[0] : -1) || -1
|
||||
)
|
||||
|
||||
const firstDataRow = headerAddressNumber + 1
|
||||
|
||||
@ -604,7 +673,9 @@ export class SpreadsheetUtil {
|
||||
|
||||
// Push to array of objects
|
||||
if (!json[jsonRow]) json.push({})
|
||||
if (cell) json[jsonRow][header] = typeof cell.v === 'string' ? cell.v.trim() : cell.v
|
||||
if (cell)
|
||||
json[jsonRow][header] =
|
||||
typeof cell.v === 'string' ? cell.v.trim() : cell.v
|
||||
|
||||
// Push to array of arrays, but with all cell meta info
|
||||
if (!arrayData[jsonRow]) arrayData.push([])
|
||||
@ -636,7 +707,10 @@ export class SpreadsheetUtil {
|
||||
if (ws[address].w) {
|
||||
json[jsonRow][header] = ws[address].w
|
||||
} else {
|
||||
json[jsonRow][header] = typeof ws[address].v === 'string' ? ws[address].v.trim() : ws[address].v
|
||||
json[jsonRow][header] =
|
||||
typeof ws[address].v === 'string'
|
||||
? ws[address].v.trim()
|
||||
: ws[address].v
|
||||
}
|
||||
}
|
||||
|
||||
@ -664,7 +738,7 @@ export class SpreadsheetUtil {
|
||||
json.forEach((row: any, rowIndex: number) => {
|
||||
let missingPk = false
|
||||
|
||||
parseParams.headerPks.forEach(pkHeader => {
|
||||
parseParams.headerPks.forEach((pkHeader) => {
|
||||
if (row[pkHeader.toLowerCase()] === undefined) missingPk = true
|
||||
})
|
||||
|
||||
@ -682,10 +756,12 @@ export class SpreadsheetUtil {
|
||||
arrayData.splice(firstRowIndexMissingPk, Infinity)
|
||||
} else {
|
||||
// Fallback: Remove only rows with missing PK
|
||||
rowsWithMissingPk.sort((a,b) => b - a).forEach(index => {
|
||||
json.splice(index, 1)
|
||||
arrayData.splice(index, 1)
|
||||
})
|
||||
rowsWithMissingPk
|
||||
.sort((a, b) => b - a)
|
||||
.forEach((index) => {
|
||||
json.splice(index, 1)
|
||||
arrayData.splice(index, 1)
|
||||
})
|
||||
}
|
||||
|
||||
if (!arrayData.length) {
|
||||
|
Loading…
Reference in New Issue
Block a user