feat: searching data in excel files using new algorithm (massive performance improvement) #123
@ -109,13 +109,8 @@ context('excel tests: ', function () {
|
|||||||
openTableFromTree(libraryToOpenIncludes, 'mpe_x_test')
|
openTableFromTree(libraryToOpenIncludes, 'mpe_x_test')
|
||||||
|
|
||||||
attachExcelFile('duplicate_column_excel.xlsx', () => {
|
attachExcelFile('duplicate_column_excel.xlsx', () => {
|
||||||
cy.get('.abortMsg', { timeout: longerCommandTimeout })
|
submitExcel()
|
||||||
.should('exist')
|
rejectExcel(done)
|
||||||
.then((elements: any) => {
|
|
||||||
if (elements[0]) {
|
|
||||||
if (elements[0].innerText.toLowerCase().includes('missing')) done()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ import {
|
|||||||
ClarityIcons,
|
ClarityIcons,
|
||||||
exclamationTriangleIcon,
|
exclamationTriangleIcon,
|
||||||
moonIcon,
|
moonIcon,
|
||||||
|
processOnVmIcon,
|
||||||
sunIcon,
|
sunIcon,
|
||||||
tableIcon,
|
tableIcon,
|
||||||
trashIcon
|
trashIcon
|
||||||
@ -28,7 +29,8 @@ ClarityIcons.addIcons(
|
|||||||
sunIcon,
|
sunIcon,
|
||||||
exclamationTriangleIcon,
|
exclamationTriangleIcon,
|
||||||
tableIcon,
|
tableIcon,
|
||||||
trashIcon
|
trashIcon,
|
||||||
|
processOnVmIcon
|
||||||
)
|
)
|
||||||
|
|
||||||
@Component({
|
@Component({
|
||||||
|
@ -2,11 +2,17 @@ import { DcValidator } from '../shared/dc-validator/dc-validator'
|
|||||||
import { FileUploadEncoding } from './FileUploadEncoding'
|
import { FileUploadEncoding } from './FileUploadEncoding'
|
||||||
import { FileUploader } from './FileUploader.class'
|
import { FileUploader } from './FileUploader.class'
|
||||||
import { ExcelRule } from './TableData'
|
import { ExcelRule } from './TableData'
|
||||||
|
import XLSX from 'xlsx'
|
||||||
|
|
||||||
export interface ParseParams {
|
export interface ParseParams {
|
||||||
file: File
|
file: File
|
||||||
password?: string
|
password?: string
|
||||||
dcValidator: DcValidator
|
dcValidator: DcValidator
|
||||||
|
/**
|
||||||
|
* If workbook is provided, parse function will not run a XLSX.read()
|
||||||
|
* it will use this property instead. So the client must do a file read beforehand
|
||||||
|
*/
|
||||||
|
workbook?: XLSX.WorkBook
|
||||||
/**
|
/**
|
||||||
* Parse function will manipulate and return the uploader array which can be provided with files already in the queue
|
* Parse function will manipulate and return the uploader array which can be provided with files already in the queue
|
||||||
* Otherwise new empty instance will be created.
|
* Otherwise new empty instance will be created.
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import { FileUploader } from './FileUploader.class'
|
import { FileUploader } from './FileUploader.class'
|
||||||
import SheetInfo from './SheetInfo'
|
import FoundRangeInfo from './RangeInfo'
|
||||||
|
|
||||||
export interface ParseResult {
|
export interface ParseResult {
|
||||||
/**
|
/**
|
||||||
@ -10,6 +10,6 @@ export interface ParseResult {
|
|||||||
* In case of CSV file, won't be returned
|
* In case of CSV file, won't be returned
|
||||||
*/
|
*/
|
||||||
headerShow?: string[]
|
headerShow?: string[]
|
||||||
rangeSheetRes?: SheetInfo
|
rangeSheetRes?: FoundRangeInfo
|
||||||
uploader: FileUploader
|
uploader: FileUploader
|
||||||
}
|
}
|
||||||
|
13
client/src/app/models/RangeInfo.ts
Normal file
13
client/src/app/models/RangeInfo.ts
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
export default interface FoundRangeInfo {
|
||||||
|
found: boolean
|
||||||
|
sheetName: string
|
||||||
|
rangeStartAddress: string
|
||||||
|
rangeEndAddress: string
|
||||||
|
rangeAddress: string
|
||||||
|
missingHeaders: MissingHeaders[]
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface MissingHeaders {
|
||||||
|
sheetName: string
|
||||||
|
missingHeaders: string[]
|
||||||
|
}
|
13
client/src/app/models/SearchDataExcelResult.interface.ts
Normal file
13
client/src/app/models/SearchDataExcelResult.interface.ts
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
import { MissingHeaders } from './RangeInfo'
|
||||||
|
|
||||||
|
export interface SearchDataExcelResult {
|
||||||
|
missing?: MissingHeaders[]
|
||||||
|
found?: {
|
||||||
|
data: any
|
||||||
|
arrayData: any[]
|
||||||
|
sheetName: string
|
||||||
|
headers: string[]
|
||||||
|
startAddress?: string
|
||||||
|
endAddress?: string
|
||||||
|
}
|
||||||
|
}
|
@ -198,6 +198,19 @@
|
|||||||
*ngIf="!activeParsedDataset"
|
*ngIf="!activeParsedDataset"
|
||||||
class="no-table-selected pointer-events-none"
|
class="no-table-selected pointer-events-none"
|
||||||
>
|
>
|
||||||
|
<ng-container *ngIf="fileLoadingState !== FileLoadingState.parsed">
|
||||||
|
<clr-icon
|
||||||
|
shape="process-on-vm"
|
||||||
|
size="40"
|
||||||
|
class="is-info icon-dc-fill"
|
||||||
|
></clr-icon>
|
||||||
|
|
||||||
|
<p class="text-center color-gray mt-10" cds-text="section">
|
||||||
|
{{ fileLoadingState }}...
|
||||||
|
</p>
|
||||||
|
</ng-container>
|
||||||
|
|
||||||
|
<ng-container *ngIf="fileLoadingState === FileLoadingState.parsed">
|
||||||
<clr-icon
|
<clr-icon
|
||||||
shape="warning-standard"
|
shape="warning-standard"
|
||||||
size="40"
|
size="40"
|
||||||
@ -206,6 +219,7 @@
|
|||||||
<p class="text-center color-gray mt-10" cds-text="section">
|
<p class="text-center color-gray mt-10" cds-text="section">
|
||||||
Please select a dataset on the left to review the data
|
Please select a dataset on the left to review the data
|
||||||
</p>
|
</p>
|
||||||
|
</ng-container>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<ng-container *ngIf="activeParsedDataset">
|
<ng-container *ngIf="activeParsedDataset">
|
||||||
|
@ -30,6 +30,17 @@ import { UploadFile } from '@sasjs/adapter'
|
|||||||
import { UploadFileResponse } from '../models/UploadFile'
|
import { UploadFileResponse } from '../models/UploadFile'
|
||||||
import { RequestWrapperResponse } from '../models/request-wrapper/RequestWrapperResponse'
|
import { RequestWrapperResponse } from '../models/request-wrapper/RequestWrapperResponse'
|
||||||
import { ParseResult } from '../models/ParseResult.interface'
|
import { ParseResult } from '../models/ParseResult.interface'
|
||||||
|
import XLSX from 'xlsx'
|
||||||
|
|
||||||
|
enum FileLoadingState {
|
||||||
|
reading = 'Reading the file',
|
||||||
|
parsing = 'Searching for the data in the file',
|
||||||
|
parsed = 'Searching for the data finished',
|
||||||
|
/**
|
||||||
|
* Defualt value
|
||||||
|
*/
|
||||||
|
notSelected = 'File not selected'
|
||||||
|
}
|
||||||
|
|
||||||
@Component({
|
@Component({
|
||||||
selector: 'app-multi-dataset',
|
selector: 'app-multi-dataset',
|
||||||
@ -43,6 +54,11 @@ export class MultiDatasetComponent implements OnInit {
|
|||||||
public licenceState = this.licenceService.licenceState
|
public licenceState = this.licenceService.licenceState
|
||||||
public Infinity = Infinity
|
public Infinity = Infinity
|
||||||
|
|
||||||
|
public workbookInterval: any
|
||||||
|
public fileLoadingState: FileLoadingState = FileLoadingState.notSelected
|
||||||
|
|
||||||
|
public FileLoadingState = FileLoadingState
|
||||||
|
|
||||||
public hotTableLicenseKey: string | undefined = undefined
|
public hotTableLicenseKey: string | undefined = undefined
|
||||||
public hotTableMaxRows =
|
public hotTableMaxRows =
|
||||||
this.licenceState.value.viewer_rows_allowed || Infinity
|
this.licenceState.value.viewer_rows_allowed || Infinity
|
||||||
@ -163,7 +179,7 @@ export class MultiDatasetComponent implements OnInit {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
onFileChange(event: any) {
|
async onFileChange(event: any) {
|
||||||
const files = event?.target?.files || []
|
const files = event?.target?.files || []
|
||||||
|
|
||||||
if (files.length < 1) {
|
if (files.length < 1) {
|
||||||
@ -200,11 +216,20 @@ export class MultiDatasetComponent implements OnInit {
|
|||||||
// For EXCEL if multiple files, we only take one (the first one)
|
// For EXCEL if multiple files, we only take one (the first one)
|
||||||
this.selectedFile = event.target.files[0]
|
this.selectedFile = event.target.files[0]
|
||||||
|
|
||||||
if (this.selectedFile)
|
if (this.selectedFile) {
|
||||||
|
this.fileLoadingState = FileLoadingState.reading
|
||||||
|
|
||||||
this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB(
|
this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB(
|
||||||
this.selectedFile.size
|
this.selectedFile.size
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Read the excel file to be ready
|
||||||
|
this.spreadsheetService.xlsxReadFile(this.selectedFile!).then((wb) => {
|
||||||
|
this.fileLoadingState = FileLoadingState.parsing
|
||||||
|
this.selectedFile!.workbook = wb
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
this.initUserInputHot()
|
this.initUserInputHot()
|
||||||
this.onAutoDetectColumns()
|
this.onAutoDetectColumns()
|
||||||
} else if (matchedExtension === 'csv') {
|
} else if (matchedExtension === 'csv') {
|
||||||
@ -291,10 +316,12 @@ export class MultiDatasetComponent implements OnInit {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
this.workbookLoaded().then((workbook) => {
|
||||||
for (let parsedDataset of this.parsedDatasets) {
|
for (let parsedDataset of this.parsedDatasets) {
|
||||||
this.spreadsheetService
|
this.spreadsheetService
|
||||||
.parseExcelFile({
|
.parseExcelFile({
|
||||||
file: this.selectedFile!,
|
file: this.selectedFile!,
|
||||||
|
workbook: workbook,
|
||||||
password: this.selectedFile!.password || undefined,
|
password: this.selectedFile!.password || undefined,
|
||||||
dcValidator: parsedDataset.datasetInfo.dcValidator!,
|
dcValidator: parsedDataset.datasetInfo.dcValidator!,
|
||||||
headerPks: parsedDataset.datasetInfo.headerPks,
|
headerPks: parsedDataset.datasetInfo.headerPks,
|
||||||
@ -306,7 +333,7 @@ export class MultiDatasetComponent implements OnInit {
|
|||||||
xlRules: parsedDataset.datasetInfo.xlRules
|
xlRules: parsedDataset.datasetInfo.xlRules
|
||||||
})
|
})
|
||||||
.then((parseResult: ParseResult | undefined) => {
|
.then((parseResult: ParseResult | undefined) => {
|
||||||
console.log('parseResult', parseResult)
|
this.fileLoadingState = FileLoadingState.parsed
|
||||||
|
|
||||||
if (parseResult && parseResult.data) {
|
if (parseResult && parseResult.data) {
|
||||||
let datasource: any[] = []
|
let datasource: any[] = []
|
||||||
@ -339,6 +366,7 @@ export class MultiDatasetComponent implements OnInit {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
onSubmitAll() {
|
onSubmitAll() {
|
||||||
@ -826,6 +854,23 @@ export class MultiDatasetComponent implements OnInit {
|
|||||||
if (newSubmittedDataset) newSubmittedDataset.active = true
|
if (newSubmittedDataset) newSubmittedDataset.active = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @returns Promise once workbook is loaded because use XLSX.read in the background
|
||||||
|
*/
|
||||||
|
private workbookLoaded(): Promise<XLSX.WorkBook> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
if (!this.selectedFile) reject('No file selected')
|
||||||
|
|
||||||
|
this.workbookInterval = setInterval(() => {
|
||||||
|
if (this.selectedFile!.workbook) {
|
||||||
|
clearInterval(this.workbookInterval)
|
||||||
|
resolve(this.selectedFile!.workbook)
|
||||||
|
}
|
||||||
|
}, 500)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
private parseDatasetFromCsvName(fileName: string) {
|
private parseDatasetFromCsvName(fileName: string) {
|
||||||
const fileNameArr = fileName.split('.')
|
const fileNameArr = fileName.split('.')
|
||||||
fileNameArr.pop()
|
fileNameArr.pop()
|
||||||
@ -1044,4 +1089,5 @@ export interface SubmittedCsvDatasetResult {
|
|||||||
export interface SelectedFile extends File {
|
export interface SelectedFile extends File {
|
||||||
sizeMB?: number
|
sizeMB?: number
|
||||||
password?: string
|
password?: string
|
||||||
|
workbook?: XLSX.WorkBook
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,7 @@ export class SpreadsheetService {
|
|||||||
licenceState: this.licenceState
|
licenceState: this.licenceState
|
||||||
})
|
})
|
||||||
|
|
||||||
return spreadSheetUtil.parseExcelFile(
|
return spreadSheetUtil.parseSpreadsheetFile(
|
||||||
parseParams,
|
parseParams,
|
||||||
this.promptExcelPassword,
|
this.promptExcelPassword,
|
||||||
onParseStateChange,
|
onParseStateChange,
|
||||||
@ -38,6 +38,37 @@ export class SpreadsheetService {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads the excel file using the XLSX.read() function
|
||||||
|
* If possible, function will use the web worker to read it in background thread
|
||||||
|
* otherwise fallback method will be used
|
||||||
|
*
|
||||||
|
* @param file selected in an <input>
|
||||||
|
* @returns WorkBook
|
||||||
|
*/
|
||||||
|
public xlsxReadFile(file: any): Promise<XLSX.WorkBook> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const spreadSheetUtil = new SpreadsheetUtil({
|
||||||
|
licenceState: this.licenceState
|
||||||
|
})
|
||||||
|
|
||||||
|
let reader: FileReader = new FileReader()
|
||||||
|
|
||||||
|
reader.onload = (fileReaderResponse: any) => {
|
||||||
|
spreadSheetUtil
|
||||||
|
.xslxStartReading(fileReaderResponse, this.promptExcelPassword)
|
||||||
|
.then((response) => {
|
||||||
|
resolve(response)
|
||||||
|
})
|
||||||
|
.catch((err) => {
|
||||||
|
reject(err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.readAsArrayBuffer(file)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read the file minimally just to get the sheet names, not reading full file
|
* Read the file minimally just to get the sheet names, not reading full file
|
||||||
* to help boost the performance
|
* to help boost the performance
|
||||||
|
File diff suppressed because it is too large
Load Diff
8
sas/package-lock.json
generated
8
sas/package-lock.json
generated
@ -7,7 +7,7 @@
|
|||||||
"name": "dc-sas",
|
"name": "dc-sas",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@sasjs/cli": "^4.11.1",
|
"@sasjs/cli": "^4.11.1",
|
||||||
"@sasjs/core": "^4.52.4"
|
"@sasjs/core": "^4.52.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@coolaj86/urequest": {
|
"node_modules/@coolaj86/urequest": {
|
||||||
@ -116,9 +116,9 @@
|
|||||||
"integrity": "sha512-Grwydm5GxBsYk238PZw41XPjXVVQ9vWcvfZ06L2P0bQbvK0sGn7l69JA7H5MGr3QcaLpiD4Kg70cAh7PgE+JOw=="
|
"integrity": "sha512-Grwydm5GxBsYk238PZw41XPjXVVQ9vWcvfZ06L2P0bQbvK0sGn7l69JA7H5MGr3QcaLpiD4Kg70cAh7PgE+JOw=="
|
||||||
},
|
},
|
||||||
"node_modules/@sasjs/core": {
|
"node_modules/@sasjs/core": {
|
||||||
"version": "4.52.4",
|
"version": "4.52.5",
|
||||||
"resolved": "https://registry.npmjs.org/@sasjs/core/-/core-4.52.4.tgz",
|
"resolved": "https://registry.npmjs.org/@sasjs/core/-/core-4.52.5.tgz",
|
||||||
"integrity": "sha512-8lf5ixlA312EgA2DorwbpNXXPfLPzUHO67exIV7SjKiU23Tn1au5GD6hT0Ysr2kophOs10Mp1TCXJjhEq7Qk4A=="
|
"integrity": "sha512-fGuLC+DcH2AoIDDU/Eyn7d4ZIrIeQAN3PB9FgjBikiGuXZYooRELKz1WgaQvhI4qSKgczUANaT80ZJOpfH+sQQ=="
|
||||||
},
|
},
|
||||||
"node_modules/@sasjs/lint": {
|
"node_modules/@sasjs/lint": {
|
||||||
"version": "2.3.1",
|
"version": "2.3.1",
|
||||||
|
@ -29,6 +29,6 @@
|
|||||||
"private": true,
|
"private": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@sasjs/cli": "^4.11.1",
|
"@sasjs/cli": "^4.11.1",
|
||||||
"@sasjs/core": "^4.52.4"
|
"@sasjs/core": "^4.52.5"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -100,6 +100,7 @@ Areas for optimisation
|
|||||||
@li mf_getattrn.sas
|
@li mf_getattrn.sas
|
||||||
@li mf_getengine.sas
|
@li mf_getengine.sas
|
||||||
@li mf_getschema.sas
|
@li mf_getschema.sas
|
||||||
|
@li mf_getuniquefileref.sas
|
||||||
@li mf_getuniquename.sas
|
@li mf_getuniquename.sas
|
||||||
@li mf_getuser.sas
|
@li mf_getuser.sas
|
||||||
@li mf_getvarlist.sas
|
@li mf_getvarlist.sas
|
||||||
@ -621,7 +622,7 @@ data work.bitemp0_append &keepvars &outds_del(drop=&md5_col )
|
|||||||
%put DCNOTE: Extracting matching observations from &base_lib..&base_dsn;
|
%put DCNOTE: Extracting matching observations from &base_lib..&base_dsn;
|
||||||
|
|
||||||
%if &engine_type=OLEDB %then %do;
|
%if &engine_type=OLEDB %then %do;
|
||||||
%let temp_table=##BITEMP_&base_dsn;
|
%let temp_table=##%mf_getuniquefileref(prefix=BTMP)_&base_dsn;
|
||||||
%if &loadtype=BITEMPORAL or &loadtype=TXTEMPORAL %then
|
%if &loadtype=BITEMPORAL or &loadtype=TXTEMPORAL %then
|
||||||
%let base_table=(select * from [dbo].&base_dsn
|
%let base_table=(select * from [dbo].&base_dsn
|
||||||
where convert(datetime,&SQLNOW) < &tech_to );
|
where convert(datetime,&SQLNOW) < &tech_to );
|
||||||
@ -1049,7 +1050,7 @@ run;
|
|||||||
%let cat_string=catx('|' ,&bus_from,&bus_to);
|
%let cat_string=catx('|' ,&bus_from,&bus_to);
|
||||||
|
|
||||||
data work.bitemp5a_lkp (keep=&md5_col)
|
data work.bitemp5a_lkp (keep=&md5_col)
|
||||||
%if "%substr(&sysver,1,1)" ne "4" and "%substr(&sysver,1,1)" ne "5" %then %do;
|
%if "%substr(&sysver,1,1)" ne "4" & "%substr(&sysver,1,1)" ne "5" %then %do;
|
||||||
/nonote2err
|
/nonote2err
|
||||||
%end;
|
%end;
|
||||||
;
|
;
|
||||||
@ -1191,10 +1192,10 @@ run;
|
|||||||
/* if OLEDB then create a temp table for efficiency */
|
/* if OLEDB then create a temp table for efficiency */
|
||||||
%local innertable;
|
%local innertable;
|
||||||
%if &engine_type=OLEDB %then %do;
|
%if &engine_type=OLEDB %then %do;
|
||||||
%let innertable=[##BITEMP_&base_dsn];
|
%let innertable=[&temp_table];
|
||||||
%let top_table=[dbo].&base_dsn;
|
%let top_table=[dbo].&base_dsn;
|
||||||
%let flexinow=&SQLNOW;
|
%let flexinow=&SQLNOW;
|
||||||
create table &base_lib.."##BITEMP_&base_dsn"n as
|
create table &base_lib.."&temp_table"n as
|
||||||
select * from work.bitemp5d_subquery;
|
select * from work.bitemp5d_subquery;
|
||||||
/* open up a connection for pass through SQL */
|
/* open up a connection for pass through SQL */
|
||||||
%dc_assignlib(WRITE,&base_lib,passthru=myAlias)
|
%dc_assignlib(WRITE,&base_lib,passthru=myAlias)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user