feat: searching data in excel files using new algorithm (massive performance improvement) #123
| @@ -109,13 +109,8 @@ context('excel tests: ', function () { | ||||
|     openTableFromTree(libraryToOpenIncludes, 'mpe_x_test') | ||||
|  | ||||
|     attachExcelFile('duplicate_column_excel.xlsx', () => { | ||||
|       cy.get('.abortMsg', { timeout: longerCommandTimeout }) | ||||
|         .should('exist') | ||||
|         .then((elements: any) => { | ||||
|           if (elements[0]) { | ||||
|             if (elements[0].innerText.toLowerCase().includes('missing')) done() | ||||
|           } | ||||
|         }) | ||||
|       submitExcel() | ||||
|       rejectExcel(done) | ||||
|     }) | ||||
|   }) | ||||
|  | ||||
|   | ||||
| @@ -18,6 +18,7 @@ import { | ||||
|   ClarityIcons, | ||||
|   exclamationTriangleIcon, | ||||
|   moonIcon, | ||||
|   processOnVmIcon, | ||||
|   sunIcon, | ||||
|   tableIcon, | ||||
|   trashIcon | ||||
| @@ -28,7 +29,8 @@ ClarityIcons.addIcons( | ||||
|   sunIcon, | ||||
|   exclamationTriangleIcon, | ||||
|   tableIcon, | ||||
|   trashIcon | ||||
|   trashIcon, | ||||
|   processOnVmIcon | ||||
| ) | ||||
|  | ||||
| @Component({ | ||||
|   | ||||
| @@ -2,11 +2,17 @@ import { DcValidator } from '../shared/dc-validator/dc-validator' | ||||
| import { FileUploadEncoding } from './FileUploadEncoding' | ||||
| import { FileUploader } from './FileUploader.class' | ||||
| import { ExcelRule } from './TableData' | ||||
| import XLSX from 'xlsx' | ||||
|  | ||||
| export interface ParseParams { | ||||
|   file: File | ||||
|   password?: string | ||||
|   dcValidator: DcValidator | ||||
|   /** | ||||
|    * If workbook is provided, parse function will not run a XLSX.read() | ||||
|    * it will use this property instead. So the client must do a file read beforehand | ||||
|    */ | ||||
|   workbook?: XLSX.WorkBook | ||||
|   /** | ||||
|    * Parse function will manipulate and return the uploader array which can be provided with files already in the queue | ||||
|    * Otherwise new empty instance will be created. | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import { FileUploader } from './FileUploader.class' | ||||
| import SheetInfo from './SheetInfo' | ||||
| import FoundRangeInfo from './RangeInfo' | ||||
|  | ||||
| export interface ParseResult { | ||||
|   /** | ||||
| @@ -10,6 +10,6 @@ export interface ParseResult { | ||||
|    * In case of CSV file, won't be returned | ||||
|    */ | ||||
|   headerShow?: string[] | ||||
|   rangeSheetRes?: SheetInfo | ||||
|   rangeSheetRes?: FoundRangeInfo | ||||
|   uploader: FileUploader | ||||
| } | ||||
|   | ||||
							
								
								
									
										13
									
								
								client/src/app/models/RangeInfo.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								client/src/app/models/RangeInfo.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | ||||
| export default interface FoundRangeInfo { | ||||
|   found: boolean | ||||
|   sheetName: string | ||||
|   rangeStartAddress: string | ||||
|   rangeEndAddress: string | ||||
|   rangeAddress: string | ||||
|   missingHeaders: MissingHeaders[] | ||||
| } | ||||
|  | ||||
| export interface MissingHeaders { | ||||
|   sheetName: string | ||||
|   missingHeaders: string[] | ||||
| } | ||||
							
								
								
									
										13
									
								
								client/src/app/models/SearchDataExcelResult.interface.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								client/src/app/models/SearchDataExcelResult.interface.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | ||||
| import { MissingHeaders } from './RangeInfo' | ||||
|  | ||||
| export interface SearchDataExcelResult { | ||||
|   missing?: MissingHeaders[] | ||||
|   found?: { | ||||
|     data: any | ||||
|     arrayData: any[] | ||||
|     sheetName: string | ||||
|     headers: string[] | ||||
|     startAddress?: string | ||||
|     endAddress?: string | ||||
|   } | ||||
| } | ||||
| @@ -198,14 +198,28 @@ | ||||
|           *ngIf="!activeParsedDataset" | ||||
|           class="no-table-selected pointer-events-none" | ||||
|         > | ||||
|           <clr-icon | ||||
|             shape="warning-standard" | ||||
|             size="40" | ||||
|             class="is-info icon-dc-fill" | ||||
|           ></clr-icon> | ||||
|           <p class="text-center color-gray mt-10" cds-text="section"> | ||||
|             Please select a dataset on the left to review the data | ||||
|           </p> | ||||
|           <ng-container *ngIf="fileLoadingState !== FileLoadingState.parsed"> | ||||
|             <clr-icon | ||||
|               shape="process-on-vm" | ||||
|               size="40" | ||||
|               class="is-info icon-dc-fill" | ||||
|             ></clr-icon> | ||||
|  | ||||
|             <p class="text-center color-gray mt-10" cds-text="section"> | ||||
|               {{ fileLoadingState }}... | ||||
|             </p> | ||||
|           </ng-container> | ||||
|  | ||||
|           <ng-container *ngIf="fileLoadingState === FileLoadingState.parsed"> | ||||
|             <clr-icon | ||||
|               shape="warning-standard" | ||||
|               size="40" | ||||
|               class="is-info icon-dc-fill" | ||||
|             ></clr-icon> | ||||
|             <p class="text-center color-gray mt-10" cds-text="section"> | ||||
|               Please select a dataset on the left to review the data | ||||
|             </p> | ||||
|           </ng-container> | ||||
|         </div> | ||||
|  | ||||
|         <ng-container *ngIf="activeParsedDataset"> | ||||
|   | ||||
| @@ -30,6 +30,17 @@ import { UploadFile } from '@sasjs/adapter' | ||||
| import { UploadFileResponse } from '../models/UploadFile' | ||||
| import { RequestWrapperResponse } from '../models/request-wrapper/RequestWrapperResponse' | ||||
| import { ParseResult } from '../models/ParseResult.interface' | ||||
| import XLSX from 'xlsx' | ||||
|  | ||||
| enum FileLoadingState { | ||||
|   reading = 'Reading the file', | ||||
|   parsing = 'Searching for the data in the file', | ||||
|   parsed = 'Searching for the data finished', | ||||
|   /** | ||||
|    * Defualt value | ||||
|    */ | ||||
|   notSelected = 'File not selected' | ||||
| } | ||||
|  | ||||
| @Component({ | ||||
|   selector: 'app-multi-dataset', | ||||
| @@ -43,6 +54,11 @@ export class MultiDatasetComponent implements OnInit { | ||||
|   public licenceState = this.licenceService.licenceState | ||||
|   public Infinity = Infinity | ||||
|  | ||||
|   public workbookInterval: any | ||||
|   public fileLoadingState: FileLoadingState = FileLoadingState.notSelected | ||||
|  | ||||
|   public FileLoadingState = FileLoadingState | ||||
|  | ||||
|   public hotTableLicenseKey: string | undefined = undefined | ||||
|   public hotTableMaxRows = | ||||
|     this.licenceState.value.viewer_rows_allowed || Infinity | ||||
| @@ -163,7 +179,7 @@ export class MultiDatasetComponent implements OnInit { | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   onFileChange(event: any) { | ||||
|   async onFileChange(event: any) { | ||||
|     const files = event?.target?.files || [] | ||||
|  | ||||
|     if (files.length < 1) { | ||||
| @@ -200,11 +216,20 @@ export class MultiDatasetComponent implements OnInit { | ||||
|       // For EXCEL if multiple files, we only take one (the first one) | ||||
|       this.selectedFile = event.target.files[0] | ||||
|  | ||||
|       if (this.selectedFile) | ||||
|       if (this.selectedFile) { | ||||
|         this.fileLoadingState = FileLoadingState.reading | ||||
|  | ||||
|         this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB( | ||||
|           this.selectedFile.size | ||||
|         ) | ||||
|  | ||||
|         // Read the excel file to be ready | ||||
|         this.spreadsheetService.xlsxReadFile(this.selectedFile!).then((wb) => { | ||||
|           this.fileLoadingState = FileLoadingState.parsing | ||||
|           this.selectedFile!.workbook = wb | ||||
|         }) | ||||
|       } | ||||
|  | ||||
|       this.initUserInputHot() | ||||
|       this.onAutoDetectColumns() | ||||
|     } else if (matchedExtension === 'csv') { | ||||
| @@ -291,53 +316,56 @@ export class MultiDatasetComponent implements OnInit { | ||||
|         }) | ||||
|       }) | ||||
|  | ||||
|       for (let parsedDataset of this.parsedDatasets) { | ||||
|         this.spreadsheetService | ||||
|           .parseExcelFile({ | ||||
|             file: this.selectedFile!, | ||||
|             password: this.selectedFile!.password || undefined, | ||||
|             dcValidator: parsedDataset.datasetInfo.dcValidator!, | ||||
|             headerPks: parsedDataset.datasetInfo.headerPks, | ||||
|             headerArray: parsedDataset.datasetInfo.headerArray, | ||||
|             headerShow: [], | ||||
|             timeHeaders: parsedDataset.datasetInfo.timeHeaders, | ||||
|             dateHeaders: parsedDataset.datasetInfo.dateHeaders, | ||||
|             dateTimeHeaders: parsedDataset.datasetInfo.dateTimeHeaders, | ||||
|             xlRules: parsedDataset.datasetInfo.xlRules | ||||
|           }) | ||||
|           .then((parseResult: ParseResult | undefined) => { | ||||
|             console.log('parseResult', parseResult) | ||||
|       this.workbookLoaded().then((workbook) => { | ||||
|         for (let parsedDataset of this.parsedDatasets) { | ||||
|           this.spreadsheetService | ||||
|             .parseExcelFile({ | ||||
|               file: this.selectedFile!, | ||||
|               workbook: workbook, | ||||
|               password: this.selectedFile!.password || undefined, | ||||
|               dcValidator: parsedDataset.datasetInfo.dcValidator!, | ||||
|               headerPks: parsedDataset.datasetInfo.headerPks, | ||||
|               headerArray: parsedDataset.datasetInfo.headerArray, | ||||
|               headerShow: [], | ||||
|               timeHeaders: parsedDataset.datasetInfo.timeHeaders, | ||||
|               dateHeaders: parsedDataset.datasetInfo.dateHeaders, | ||||
|               dateTimeHeaders: parsedDataset.datasetInfo.dateTimeHeaders, | ||||
|               xlRules: parsedDataset.datasetInfo.xlRules | ||||
|             }) | ||||
|             .then((parseResult: ParseResult | undefined) => { | ||||
|               this.fileLoadingState = FileLoadingState.parsed | ||||
|  | ||||
|             if (parseResult && parseResult.data) { | ||||
|               let datasource: any[] = [] | ||||
|               if (parseResult && parseResult.data) { | ||||
|                 let datasource: any[] = [] | ||||
|  | ||||
|               parseResult.data.map((item) => { | ||||
|                 let itemObject: any = {} | ||||
|                 parseResult.data.map((item) => { | ||||
|                   let itemObject: any = {} | ||||
|  | ||||
|                 parseResult.headerShow!.map((header: any, index: number) => { | ||||
|                   itemObject[header] = item[index] | ||||
|                   parseResult.headerShow!.map((header: any, index: number) => { | ||||
|                     itemObject[header] = item[index] | ||||
|                   }) | ||||
|  | ||||
|                   // If Delete? column is not set in the file, we set it to NO | ||||
|                   if (!itemObject['_____DELETE__THIS__RECORD_____']) | ||||
|                     itemObject['_____DELETE__THIS__RECORD_____'] = 'No' | ||||
|  | ||||
|                   datasource.push(itemObject) | ||||
|                 }) | ||||
|  | ||||
|                 // If Delete? column is not set in the file, we set it to NO | ||||
|                 if (!itemObject['_____DELETE__THIS__RECORD_____']) | ||||
|                   itemObject['_____DELETE__THIS__RECORD_____'] = 'No' | ||||
|                 parsedDataset.datasource = datasource | ||||
|                 parsedDataset.parseResult = parseResult | ||||
|                 parsedDataset.parsingTable = false | ||||
|               } | ||||
|             }) | ||||
|             .catch((error: string) => { | ||||
|               console.warn('Parsing excel file error.', error) | ||||
|  | ||||
|                 datasource.push(itemObject) | ||||
|               }) | ||||
|  | ||||
|               parsedDataset.datasource = datasource | ||||
|               parsedDataset.parseResult = parseResult | ||||
|               parsedDataset.datasource = [] | ||||
|               parsedDataset.includeInSubmission = false | ||||
|               parsedDataset.parsingTable = false | ||||
|             } | ||||
|           }) | ||||
|           .catch((error: string) => { | ||||
|             console.warn('Parsing excel file error.', error) | ||||
|  | ||||
|             parsedDataset.datasource = [] | ||||
|             parsedDataset.includeInSubmission = false | ||||
|             parsedDataset.parsingTable = false | ||||
|           }) | ||||
|       } | ||||
|             }) | ||||
|         } | ||||
|       }) | ||||
|     }) | ||||
|   } | ||||
|  | ||||
| @@ -826,6 +854,23 @@ export class MultiDatasetComponent implements OnInit { | ||||
|     if (newSubmittedDataset) newSubmittedDataset.active = true | ||||
|   } | ||||
|  | ||||
|   /** | ||||
|    * | ||||
|    * @returns Promise once workbook is loaded because use XLSX.read in the background | ||||
|    */ | ||||
|   private workbookLoaded(): Promise<XLSX.WorkBook> { | ||||
|     return new Promise((resolve, reject) => { | ||||
|       if (!this.selectedFile) reject('No file selected') | ||||
|  | ||||
|       this.workbookInterval = setInterval(() => { | ||||
|         if (this.selectedFile!.workbook) { | ||||
|           clearInterval(this.workbookInterval) | ||||
|           resolve(this.selectedFile!.workbook) | ||||
|         } | ||||
|       }, 500) | ||||
|     }) | ||||
|   } | ||||
|  | ||||
|   private parseDatasetFromCsvName(fileName: string) { | ||||
|     const fileNameArr = fileName.split('.') | ||||
|     fileNameArr.pop() | ||||
| @@ -1044,4 +1089,5 @@ export interface SubmittedCsvDatasetResult { | ||||
| export interface SelectedFile extends File { | ||||
|   sizeMB?: number | ||||
|   password?: string | ||||
|   workbook?: XLSX.WorkBook | ||||
| } | ||||
|   | ||||
| @@ -30,7 +30,7 @@ export class SpreadsheetService { | ||||
|       licenceState: this.licenceState | ||||
|     }) | ||||
|  | ||||
|     return spreadSheetUtil.parseExcelFile( | ||||
|     return spreadSheetUtil.parseSpreadsheetFile( | ||||
|       parseParams, | ||||
|       this.promptExcelPassword, | ||||
|       onParseStateChange, | ||||
| @@ -38,6 +38,37 @@ export class SpreadsheetService { | ||||
|     ) | ||||
|   } | ||||
|  | ||||
|   /** | ||||
|    * Reads the excel file using the XLSX.read() function | ||||
|    * If possible, function will use the web worker to read it in background thread | ||||
|    * otherwise fallback method will be used | ||||
|    * | ||||
|    * @param file selected in an <input> | ||||
|    * @returns WorkBook | ||||
|    */ | ||||
|   public xlsxReadFile(file: any): Promise<XLSX.WorkBook> { | ||||
|     return new Promise((resolve, reject) => { | ||||
|       const spreadSheetUtil = new SpreadsheetUtil({ | ||||
|         licenceState: this.licenceState | ||||
|       }) | ||||
|  | ||||
|       let reader: FileReader = new FileReader() | ||||
|  | ||||
|       reader.onload = (fileReaderResponse: any) => { | ||||
|         spreadSheetUtil | ||||
|           .xslxStartReading(fileReaderResponse, this.promptExcelPassword) | ||||
|           .then((response) => { | ||||
|             resolve(response) | ||||
|           }) | ||||
|           .catch((err) => { | ||||
|             reject(err) | ||||
|           }) | ||||
|       } | ||||
|  | ||||
|       reader.readAsArrayBuffer(file) | ||||
|     }) | ||||
|   } | ||||
|  | ||||
|   /** | ||||
|    * Read the file minimally just to get the sheet names, not reading full file | ||||
|    * to help boost the performance | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										8
									
								
								sas/package-lock.json
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										8
									
								
								sas/package-lock.json
									
									
									
										generated
									
									
									
								
							| @@ -7,7 +7,7 @@ | ||||
|       "name": "dc-sas", | ||||
|       "dependencies": { | ||||
|         "@sasjs/cli": "^4.11.1", | ||||
|         "@sasjs/core": "^4.52.4" | ||||
|         "@sasjs/core": "^4.52.5" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/@coolaj86/urequest": { | ||||
| @@ -116,9 +116,9 @@ | ||||
|       "integrity": "sha512-Grwydm5GxBsYk238PZw41XPjXVVQ9vWcvfZ06L2P0bQbvK0sGn7l69JA7H5MGr3QcaLpiD4Kg70cAh7PgE+JOw==" | ||||
|     }, | ||||
|     "node_modules/@sasjs/core": { | ||||
|       "version": "4.52.4", | ||||
|       "resolved": "https://registry.npmjs.org/@sasjs/core/-/core-4.52.4.tgz", | ||||
|       "integrity": "sha512-8lf5ixlA312EgA2DorwbpNXXPfLPzUHO67exIV7SjKiU23Tn1au5GD6hT0Ysr2kophOs10Mp1TCXJjhEq7Qk4A==" | ||||
|       "version": "4.52.5", | ||||
|       "resolved": "https://registry.npmjs.org/@sasjs/core/-/core-4.52.5.tgz", | ||||
|       "integrity": "sha512-fGuLC+DcH2AoIDDU/Eyn7d4ZIrIeQAN3PB9FgjBikiGuXZYooRELKz1WgaQvhI4qSKgczUANaT80ZJOpfH+sQQ==" | ||||
|     }, | ||||
|     "node_modules/@sasjs/lint": { | ||||
|       "version": "2.3.1", | ||||
|   | ||||
| @@ -29,6 +29,6 @@ | ||||
|   "private": true, | ||||
|   "dependencies": { | ||||
|     "@sasjs/cli": "^4.11.1", | ||||
|     "@sasjs/core": "^4.52.4" | ||||
|     "@sasjs/core": "^4.52.5" | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -100,6 +100,7 @@ Areas for optimisation | ||||
|   @li mf_getattrn.sas | ||||
|   @li mf_getengine.sas | ||||
|   @li mf_getschema.sas | ||||
|   @li mf_getuniquefileref.sas | ||||
|   @li mf_getuniquename.sas | ||||
|   @li mf_getuser.sas | ||||
|   @li mf_getvarlist.sas | ||||
| @@ -621,7 +622,7 @@ data work.bitemp0_append &keepvars &outds_del(drop=&md5_col ) | ||||
| %put DCNOTE: Extracting matching observations from &base_lib..&base_dsn; | ||||
|  | ||||
| %if &engine_type=OLEDB %then %do; | ||||
|   %let temp_table=##BITEMP_&base_dsn; | ||||
|   %let temp_table=##%mf_getuniquefileref(prefix=BTMP)_&base_dsn; | ||||
|   %if &loadtype=BITEMPORAL or &loadtype=TXTEMPORAL %then | ||||
|         %let base_table=(select * from [dbo].&base_dsn | ||||
|                         where convert(datetime,&SQLNOW) < &tech_to ); | ||||
| @@ -1049,7 +1050,7 @@ run; | ||||
|   %let cat_string=catx('|' ,&bus_from,&bus_to); | ||||
|  | ||||
|   data work.bitemp5a_lkp (keep=&md5_col) | ||||
|     %if "%substr(&sysver,1,1)" ne "4" and "%substr(&sysver,1,1)" ne "5" %then %do; | ||||
|     %if "%substr(&sysver,1,1)" ne "4" & "%substr(&sysver,1,1)" ne "5" %then %do; | ||||
|       /nonote2err | ||||
|     %end; | ||||
|     ; | ||||
| @@ -1191,10 +1192,10 @@ run; | ||||
|   /* if OLEDB then create a temp table for efficiency */ | ||||
|   %local innertable; | ||||
|   %if &engine_type=OLEDB %then %do; | ||||
|     %let innertable=[##BITEMP_&base_dsn]; | ||||
|     %let innertable=[&temp_table]; | ||||
|     %let top_table=[dbo].&base_dsn; | ||||
|     %let flexinow=&SQLNOW; | ||||
|     create table &base_lib.."##BITEMP_&base_dsn"n as | ||||
|     create table &base_lib.."&temp_table"n as | ||||
|       select * from work.bitemp5d_subquery; | ||||
|     /* open up a connection for pass through SQL */ | ||||
|     %dc_assignlib(WRITE,&base_lib,passthru=myAlias) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user