feat: searching data in excel files using new algorithm (massive performance improvement) #123
@ -109,13 +109,8 @@ context('excel tests: ', function () {
|
||||
openTableFromTree(libraryToOpenIncludes, 'mpe_x_test')
|
||||
|
||||
attachExcelFile('duplicate_column_excel.xlsx', () => {
|
||||
cy.get('.abortMsg', { timeout: longerCommandTimeout })
|
||||
.should('exist')
|
||||
.then((elements: any) => {
|
||||
if (elements[0]) {
|
||||
if (elements[0].innerText.toLowerCase().includes('missing')) done()
|
||||
}
|
||||
})
|
||||
submitExcel()
|
||||
rejectExcel(done)
|
||||
})
|
||||
})
|
||||
|
||||
|
@ -18,6 +18,7 @@ import {
|
||||
ClarityIcons,
|
||||
exclamationTriangleIcon,
|
||||
moonIcon,
|
||||
processOnVmIcon,
|
||||
sunIcon,
|
||||
tableIcon,
|
||||
trashIcon
|
||||
@ -28,7 +29,8 @@ ClarityIcons.addIcons(
|
||||
sunIcon,
|
||||
exclamationTriangleIcon,
|
||||
tableIcon,
|
||||
trashIcon
|
||||
trashIcon,
|
||||
processOnVmIcon
|
||||
)
|
||||
|
||||
@Component({
|
||||
|
@ -2,11 +2,17 @@ import { DcValidator } from '../shared/dc-validator/dc-validator'
|
||||
import { FileUploadEncoding } from './FileUploadEncoding'
|
||||
import { FileUploader } from './FileUploader.class'
|
||||
import { ExcelRule } from './TableData'
|
||||
import XLSX from 'xlsx'
|
||||
|
||||
export interface ParseParams {
|
||||
file: File
|
||||
password?: string
|
||||
dcValidator: DcValidator
|
||||
/**
|
||||
* If workbook is provided, parse function will not run a XLSX.read()
|
||||
* it will use this property instead. So the client must do a file read beforehand
|
||||
*/
|
||||
workbook?: XLSX.WorkBook
|
||||
/**
|
||||
* Parse function will manipulate and return the uploader array which can be provided with files already in the queue
|
||||
* Otherwise new empty instance will be created.
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { FileUploader } from './FileUploader.class'
|
||||
import SheetInfo from './SheetInfo'
|
||||
import FoundRangeInfo from './RangeInfo'
|
||||
|
||||
export interface ParseResult {
|
||||
/**
|
||||
@ -10,6 +10,6 @@ export interface ParseResult {
|
||||
* In case of CSV file, won't be returned
|
||||
*/
|
||||
headerShow?: string[]
|
||||
rangeSheetRes?: SheetInfo
|
||||
rangeSheetRes?: FoundRangeInfo
|
||||
uploader: FileUploader
|
||||
}
|
||||
|
13
client/src/app/models/RangeInfo.ts
Normal file
13
client/src/app/models/RangeInfo.ts
Normal file
@ -0,0 +1,13 @@
|
||||
export default interface FoundRangeInfo {
|
||||
found: boolean
|
||||
sheetName: string
|
||||
rangeStartAddress: string
|
||||
rangeEndAddress: string
|
||||
rangeAddress: string
|
||||
missingHeaders: MissingHeaders[]
|
||||
}
|
||||
|
||||
export interface MissingHeaders {
|
||||
sheetName: string
|
||||
missingHeaders: string[]
|
||||
}
|
13
client/src/app/models/SearchDataExcelResult.interface.ts
Normal file
13
client/src/app/models/SearchDataExcelResult.interface.ts
Normal file
@ -0,0 +1,13 @@
|
||||
import { MissingHeaders } from './RangeInfo'
|
||||
|
||||
export interface SearchDataExcelResult {
|
||||
missing?: MissingHeaders[]
|
||||
found?: {
|
||||
data: any
|
||||
arrayData: any[]
|
||||
sheetName: string
|
||||
headers: string[]
|
||||
startAddress?: string
|
||||
endAddress?: string
|
||||
}
|
||||
}
|
@ -198,14 +198,28 @@
|
||||
*ngIf="!activeParsedDataset"
|
||||
class="no-table-selected pointer-events-none"
|
||||
>
|
||||
<clr-icon
|
||||
shape="warning-standard"
|
||||
size="40"
|
||||
class="is-info icon-dc-fill"
|
||||
></clr-icon>
|
||||
<p class="text-center color-gray mt-10" cds-text="section">
|
||||
Please select a dataset on the left to review the data
|
||||
</p>
|
||||
<ng-container *ngIf="fileLoadingState !== FileLoadingState.parsed">
|
||||
<clr-icon
|
||||
shape="process-on-vm"
|
||||
size="40"
|
||||
class="is-info icon-dc-fill"
|
||||
></clr-icon>
|
||||
|
||||
<p class="text-center color-gray mt-10" cds-text="section">
|
||||
{{ fileLoadingState }}...
|
||||
</p>
|
||||
</ng-container>
|
||||
|
||||
<ng-container *ngIf="fileLoadingState === FileLoadingState.parsed">
|
||||
<clr-icon
|
||||
shape="warning-standard"
|
||||
size="40"
|
||||
class="is-info icon-dc-fill"
|
||||
></clr-icon>
|
||||
<p class="text-center color-gray mt-10" cds-text="section">
|
||||
Please select a dataset on the left to review the data
|
||||
</p>
|
||||
</ng-container>
|
||||
</div>
|
||||
|
||||
<ng-container *ngIf="activeParsedDataset">
|
||||
|
@ -30,6 +30,17 @@ import { UploadFile } from '@sasjs/adapter'
|
||||
import { UploadFileResponse } from '../models/UploadFile'
|
||||
import { RequestWrapperResponse } from '../models/request-wrapper/RequestWrapperResponse'
|
||||
import { ParseResult } from '../models/ParseResult.interface'
|
||||
import XLSX from 'xlsx'
|
||||
|
||||
enum FileLoadingState {
|
||||
reading = 'Reading the file',
|
||||
parsing = 'Searching for the data in the file',
|
||||
parsed = 'Searching for the data finished',
|
||||
/**
|
||||
* Defualt value
|
||||
*/
|
||||
notSelected = 'File not selected'
|
||||
}
|
||||
|
||||
@Component({
|
||||
selector: 'app-multi-dataset',
|
||||
@ -43,6 +54,11 @@ export class MultiDatasetComponent implements OnInit {
|
||||
public licenceState = this.licenceService.licenceState
|
||||
public Infinity = Infinity
|
||||
|
||||
public workbookInterval: any
|
||||
public fileLoadingState: FileLoadingState = FileLoadingState.notSelected
|
||||
|
||||
public FileLoadingState = FileLoadingState
|
||||
|
||||
public hotTableLicenseKey: string | undefined = undefined
|
||||
public hotTableMaxRows =
|
||||
this.licenceState.value.viewer_rows_allowed || Infinity
|
||||
@ -163,7 +179,7 @@ export class MultiDatasetComponent implements OnInit {
|
||||
}
|
||||
}
|
||||
|
||||
onFileChange(event: any) {
|
||||
async onFileChange(event: any) {
|
||||
const files = event?.target?.files || []
|
||||
|
||||
if (files.length < 1) {
|
||||
@ -200,11 +216,20 @@ export class MultiDatasetComponent implements OnInit {
|
||||
// For EXCEL if multiple files, we only take one (the first one)
|
||||
this.selectedFile = event.target.files[0]
|
||||
|
||||
if (this.selectedFile)
|
||||
if (this.selectedFile) {
|
||||
this.fileLoadingState = FileLoadingState.reading
|
||||
|
||||
this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB(
|
||||
this.selectedFile.size
|
||||
)
|
||||
|
||||
// Read the excel file to be ready
|
||||
this.spreadsheetService.xlsxReadFile(this.selectedFile!).then((wb) => {
|
||||
this.fileLoadingState = FileLoadingState.parsing
|
||||
this.selectedFile!.workbook = wb
|
||||
})
|
||||
}
|
||||
|
||||
this.initUserInputHot()
|
||||
this.onAutoDetectColumns()
|
||||
} else if (matchedExtension === 'csv') {
|
||||
@ -291,53 +316,56 @@ export class MultiDatasetComponent implements OnInit {
|
||||
})
|
||||
})
|
||||
|
||||
for (let parsedDataset of this.parsedDatasets) {
|
||||
this.spreadsheetService
|
||||
.parseExcelFile({
|
||||
file: this.selectedFile!,
|
||||
password: this.selectedFile!.password || undefined,
|
||||
dcValidator: parsedDataset.datasetInfo.dcValidator!,
|
||||
headerPks: parsedDataset.datasetInfo.headerPks,
|
||||
headerArray: parsedDataset.datasetInfo.headerArray,
|
||||
headerShow: [],
|
||||
timeHeaders: parsedDataset.datasetInfo.timeHeaders,
|
||||
dateHeaders: parsedDataset.datasetInfo.dateHeaders,
|
||||
dateTimeHeaders: parsedDataset.datasetInfo.dateTimeHeaders,
|
||||
xlRules: parsedDataset.datasetInfo.xlRules
|
||||
})
|
||||
.then((parseResult: ParseResult | undefined) => {
|
||||
console.log('parseResult', parseResult)
|
||||
this.workbookLoaded().then((workbook) => {
|
||||
for (let parsedDataset of this.parsedDatasets) {
|
||||
this.spreadsheetService
|
||||
.parseExcelFile({
|
||||
file: this.selectedFile!,
|
||||
workbook: workbook,
|
||||
password: this.selectedFile!.password || undefined,
|
||||
dcValidator: parsedDataset.datasetInfo.dcValidator!,
|
||||
headerPks: parsedDataset.datasetInfo.headerPks,
|
||||
headerArray: parsedDataset.datasetInfo.headerArray,
|
||||
headerShow: [],
|
||||
timeHeaders: parsedDataset.datasetInfo.timeHeaders,
|
||||
dateHeaders: parsedDataset.datasetInfo.dateHeaders,
|
||||
dateTimeHeaders: parsedDataset.datasetInfo.dateTimeHeaders,
|
||||
xlRules: parsedDataset.datasetInfo.xlRules
|
||||
})
|
||||
.then((parseResult: ParseResult | undefined) => {
|
||||
this.fileLoadingState = FileLoadingState.parsed
|
||||
|
||||
if (parseResult && parseResult.data) {
|
||||
let datasource: any[] = []
|
||||
if (parseResult && parseResult.data) {
|
||||
let datasource: any[] = []
|
||||
|
||||
parseResult.data.map((item) => {
|
||||
let itemObject: any = {}
|
||||
parseResult.data.map((item) => {
|
||||
let itemObject: any = {}
|
||||
|
||||
parseResult.headerShow!.map((header: any, index: number) => {
|
||||
itemObject[header] = item[index]
|
||||
parseResult.headerShow!.map((header: any, index: number) => {
|
||||
itemObject[header] = item[index]
|
||||
})
|
||||
|
||||
// If Delete? column is not set in the file, we set it to NO
|
||||
if (!itemObject['_____DELETE__THIS__RECORD_____'])
|
||||
itemObject['_____DELETE__THIS__RECORD_____'] = 'No'
|
||||
|
||||
datasource.push(itemObject)
|
||||
})
|
||||
|
||||
// If Delete? column is not set in the file, we set it to NO
|
||||
if (!itemObject['_____DELETE__THIS__RECORD_____'])
|
||||
itemObject['_____DELETE__THIS__RECORD_____'] = 'No'
|
||||
parsedDataset.datasource = datasource
|
||||
parsedDataset.parseResult = parseResult
|
||||
parsedDataset.parsingTable = false
|
||||
}
|
||||
})
|
||||
.catch((error: string) => {
|
||||
console.warn('Parsing excel file error.', error)
|
||||
|
||||
datasource.push(itemObject)
|
||||
})
|
||||
|
||||
parsedDataset.datasource = datasource
|
||||
parsedDataset.parseResult = parseResult
|
||||
parsedDataset.datasource = []
|
||||
parsedDataset.includeInSubmission = false
|
||||
parsedDataset.parsingTable = false
|
||||
}
|
||||
})
|
||||
.catch((error: string) => {
|
||||
console.warn('Parsing excel file error.', error)
|
||||
|
||||
parsedDataset.datasource = []
|
||||
parsedDataset.includeInSubmission = false
|
||||
parsedDataset.parsingTable = false
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@ -826,6 +854,23 @@ export class MultiDatasetComponent implements OnInit {
|
||||
if (newSubmittedDataset) newSubmittedDataset.active = true
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns Promise once workbook is loaded because use XLSX.read in the background
|
||||
*/
|
||||
private workbookLoaded(): Promise<XLSX.WorkBook> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!this.selectedFile) reject('No file selected')
|
||||
|
||||
this.workbookInterval = setInterval(() => {
|
||||
if (this.selectedFile!.workbook) {
|
||||
clearInterval(this.workbookInterval)
|
||||
resolve(this.selectedFile!.workbook)
|
||||
}
|
||||
}, 500)
|
||||
})
|
||||
}
|
||||
|
||||
private parseDatasetFromCsvName(fileName: string) {
|
||||
const fileNameArr = fileName.split('.')
|
||||
fileNameArr.pop()
|
||||
@ -1044,4 +1089,5 @@ export interface SubmittedCsvDatasetResult {
|
||||
export interface SelectedFile extends File {
|
||||
sizeMB?: number
|
||||
password?: string
|
||||
workbook?: XLSX.WorkBook
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ export class SpreadsheetService {
|
||||
licenceState: this.licenceState
|
||||
})
|
||||
|
||||
return spreadSheetUtil.parseExcelFile(
|
||||
return spreadSheetUtil.parseSpreadsheetFile(
|
||||
parseParams,
|
||||
this.promptExcelPassword,
|
||||
onParseStateChange,
|
||||
@ -38,6 +38,37 @@ export class SpreadsheetService {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the excel file using the XLSX.read() function
|
||||
* If possible, function will use the web worker to read it in background thread
|
||||
* otherwise fallback method will be used
|
||||
*
|
||||
* @param file selected in an <input>
|
||||
* @returns WorkBook
|
||||
*/
|
||||
public xlsxReadFile(file: any): Promise<XLSX.WorkBook> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const spreadSheetUtil = new SpreadsheetUtil({
|
||||
licenceState: this.licenceState
|
||||
})
|
||||
|
||||
let reader: FileReader = new FileReader()
|
||||
|
||||
reader.onload = (fileReaderResponse: any) => {
|
||||
spreadSheetUtil
|
||||
.xslxStartReading(fileReaderResponse, this.promptExcelPassword)
|
||||
.then((response) => {
|
||||
resolve(response)
|
||||
})
|
||||
.catch((err) => {
|
||||
reject(err)
|
||||
})
|
||||
}
|
||||
|
||||
reader.readAsArrayBuffer(file)
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the file minimally just to get the sheet names, not reading full file
|
||||
* to help boost the performance
|
||||
|
File diff suppressed because it is too large
Load Diff
8
sas/package-lock.json
generated
8
sas/package-lock.json
generated
@ -7,7 +7,7 @@
|
||||
"name": "dc-sas",
|
||||
"dependencies": {
|
||||
"@sasjs/cli": "^4.11.1",
|
||||
"@sasjs/core": "^4.52.4"
|
||||
"@sasjs/core": "^4.52.5"
|
||||
}
|
||||
},
|
||||
"node_modules/@coolaj86/urequest": {
|
||||
@ -116,9 +116,9 @@
|
||||
"integrity": "sha512-Grwydm5GxBsYk238PZw41XPjXVVQ9vWcvfZ06L2P0bQbvK0sGn7l69JA7H5MGr3QcaLpiD4Kg70cAh7PgE+JOw=="
|
||||
},
|
||||
"node_modules/@sasjs/core": {
|
||||
"version": "4.52.4",
|
||||
"resolved": "https://registry.npmjs.org/@sasjs/core/-/core-4.52.4.tgz",
|
||||
"integrity": "sha512-8lf5ixlA312EgA2DorwbpNXXPfLPzUHO67exIV7SjKiU23Tn1au5GD6hT0Ysr2kophOs10Mp1TCXJjhEq7Qk4A=="
|
||||
"version": "4.52.5",
|
||||
"resolved": "https://registry.npmjs.org/@sasjs/core/-/core-4.52.5.tgz",
|
||||
"integrity": "sha512-fGuLC+DcH2AoIDDU/Eyn7d4ZIrIeQAN3PB9FgjBikiGuXZYooRELKz1WgaQvhI4qSKgczUANaT80ZJOpfH+sQQ=="
|
||||
},
|
||||
"node_modules/@sasjs/lint": {
|
||||
"version": "2.3.1",
|
||||
|
@ -29,6 +29,6 @@
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@sasjs/cli": "^4.11.1",
|
||||
"@sasjs/core": "^4.52.4"
|
||||
"@sasjs/core": "^4.52.5"
|
||||
}
|
||||
}
|
||||
|
@ -100,6 +100,7 @@ Areas for optimisation
|
||||
@li mf_getattrn.sas
|
||||
@li mf_getengine.sas
|
||||
@li mf_getschema.sas
|
||||
@li mf_getuniquefileref.sas
|
||||
@li mf_getuniquename.sas
|
||||
@li mf_getuser.sas
|
||||
@li mf_getvarlist.sas
|
||||
@ -621,7 +622,7 @@ data work.bitemp0_append &keepvars &outds_del(drop=&md5_col )
|
||||
%put DCNOTE: Extracting matching observations from &base_lib..&base_dsn;
|
||||
|
||||
%if &engine_type=OLEDB %then %do;
|
||||
%let temp_table=##BITEMP_&base_dsn;
|
||||
%let temp_table=##%mf_getuniquefileref(prefix=BTMP)_&base_dsn;
|
||||
%if &loadtype=BITEMPORAL or &loadtype=TXTEMPORAL %then
|
||||
%let base_table=(select * from [dbo].&base_dsn
|
||||
where convert(datetime,&SQLNOW) < &tech_to );
|
||||
@ -1049,7 +1050,7 @@ run;
|
||||
%let cat_string=catx('|' ,&bus_from,&bus_to);
|
||||
|
||||
data work.bitemp5a_lkp (keep=&md5_col)
|
||||
%if "%substr(&sysver,1,1)" ne "4" and "%substr(&sysver,1,1)" ne "5" %then %do;
|
||||
%if "%substr(&sysver,1,1)" ne "4" & "%substr(&sysver,1,1)" ne "5" %then %do;
|
||||
/nonote2err
|
||||
%end;
|
||||
;
|
||||
@ -1191,10 +1192,10 @@ run;
|
||||
/* if OLEDB then create a temp table for efficiency */
|
||||
%local innertable;
|
||||
%if &engine_type=OLEDB %then %do;
|
||||
%let innertable=[##BITEMP_&base_dsn];
|
||||
%let innertable=[&temp_table];
|
||||
%let top_table=[dbo].&base_dsn;
|
||||
%let flexinow=&SQLNOW;
|
||||
create table &base_lib.."##BITEMP_&base_dsn"n as
|
||||
create table &base_lib.."&temp_table"n as
|
||||
select * from work.bitemp5d_subquery;
|
||||
/* open up a connection for pass through SQL */
|
||||
%dc_assignlib(WRITE,&base_lib,passthru=myAlias)
|
||||
|
Loading…
x
Reference in New Issue
Block a user