feat: searching data in excel files using new algorithm (massive performance improvement) #123
| @@ -109,13 +109,8 @@ context('excel tests: ', function () { | |||||||
|     openTableFromTree(libraryToOpenIncludes, 'mpe_x_test') |     openTableFromTree(libraryToOpenIncludes, 'mpe_x_test') | ||||||
|  |  | ||||||
|     attachExcelFile('duplicate_column_excel.xlsx', () => { |     attachExcelFile('duplicate_column_excel.xlsx', () => { | ||||||
|       cy.get('.abortMsg', { timeout: longerCommandTimeout }) |       submitExcel() | ||||||
|         .should('exist') |       rejectExcel(done) | ||||||
|         .then((elements: any) => { |  | ||||||
|           if (elements[0]) { |  | ||||||
|             if (elements[0].innerText.toLowerCase().includes('missing')) done() |  | ||||||
|           } |  | ||||||
|         }) |  | ||||||
|     }) |     }) | ||||||
|   }) |   }) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -18,6 +18,7 @@ import { | |||||||
|   ClarityIcons, |   ClarityIcons, | ||||||
|   exclamationTriangleIcon, |   exclamationTriangleIcon, | ||||||
|   moonIcon, |   moonIcon, | ||||||
|  |   processOnVmIcon, | ||||||
|   sunIcon, |   sunIcon, | ||||||
|   tableIcon, |   tableIcon, | ||||||
|   trashIcon |   trashIcon | ||||||
| @@ -28,7 +29,8 @@ ClarityIcons.addIcons( | |||||||
|   sunIcon, |   sunIcon, | ||||||
|   exclamationTriangleIcon, |   exclamationTriangleIcon, | ||||||
|   tableIcon, |   tableIcon, | ||||||
|   trashIcon |   trashIcon, | ||||||
|  |   processOnVmIcon | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @Component({ | @Component({ | ||||||
|   | |||||||
| @@ -2,11 +2,17 @@ import { DcValidator } from '../shared/dc-validator/dc-validator' | |||||||
| import { FileUploadEncoding } from './FileUploadEncoding' | import { FileUploadEncoding } from './FileUploadEncoding' | ||||||
| import { FileUploader } from './FileUploader.class' | import { FileUploader } from './FileUploader.class' | ||||||
| import { ExcelRule } from './TableData' | import { ExcelRule } from './TableData' | ||||||
|  | import XLSX from 'xlsx' | ||||||
|  |  | ||||||
| export interface ParseParams { | export interface ParseParams { | ||||||
|   file: File |   file: File | ||||||
|   password?: string |   password?: string | ||||||
|   dcValidator: DcValidator |   dcValidator: DcValidator | ||||||
|  |   /** | ||||||
|  |    * If workbook is provided, parse function will not run a XLSX.read() | ||||||
|  |    * it will use this property instead. So the client must do a file read beforehand | ||||||
|  |    */ | ||||||
|  |   workbook?: XLSX.WorkBook | ||||||
|   /** |   /** | ||||||
|    * Parse function will manipulate and return the uploader array which can be provided with files already in the queue |    * Parse function will manipulate and return the uploader array which can be provided with files already in the queue | ||||||
|    * Otherwise new empty instance will be created. |    * Otherwise new empty instance will be created. | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| import { FileUploader } from './FileUploader.class' | import { FileUploader } from './FileUploader.class' | ||||||
| import SheetInfo from './SheetInfo' | import FoundRangeInfo from './RangeInfo' | ||||||
|  |  | ||||||
| export interface ParseResult { | export interface ParseResult { | ||||||
|   /** |   /** | ||||||
| @@ -10,6 +10,6 @@ export interface ParseResult { | |||||||
|    * In case of CSV file, won't be returned |    * In case of CSV file, won't be returned | ||||||
|    */ |    */ | ||||||
|   headerShow?: string[] |   headerShow?: string[] | ||||||
|   rangeSheetRes?: SheetInfo |   rangeSheetRes?: FoundRangeInfo | ||||||
|   uploader: FileUploader |   uploader: FileUploader | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										13
									
								
								client/src/app/models/RangeInfo.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								client/src/app/models/RangeInfo.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | |||||||
|  | export default interface FoundRangeInfo { | ||||||
|  |   found: boolean | ||||||
|  |   sheetName: string | ||||||
|  |   rangeStartAddress: string | ||||||
|  |   rangeEndAddress: string | ||||||
|  |   rangeAddress: string | ||||||
|  |   missingHeaders: MissingHeaders[] | ||||||
|  | } | ||||||
|  |  | ||||||
|  | export interface MissingHeaders { | ||||||
|  |   sheetName: string | ||||||
|  |   missingHeaders: string[] | ||||||
|  | } | ||||||
							
								
								
									
										13
									
								
								client/src/app/models/SearchDataExcelResult.interface.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								client/src/app/models/SearchDataExcelResult.interface.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | |||||||
|  | import { MissingHeaders } from './RangeInfo' | ||||||
|  |  | ||||||
|  | export interface SearchDataExcelResult { | ||||||
|  |   missing?: MissingHeaders[] | ||||||
|  |   found?: { | ||||||
|  |     data: any | ||||||
|  |     arrayData: any[] | ||||||
|  |     sheetName: string | ||||||
|  |     headers: string[] | ||||||
|  |     startAddress?: string | ||||||
|  |     endAddress?: string | ||||||
|  |   } | ||||||
|  | } | ||||||
| @@ -198,6 +198,19 @@ | |||||||
|           *ngIf="!activeParsedDataset" |           *ngIf="!activeParsedDataset" | ||||||
|           class="no-table-selected pointer-events-none" |           class="no-table-selected pointer-events-none" | ||||||
|         > |         > | ||||||
|  |           <ng-container *ngIf="fileLoadingState !== FileLoadingState.parsed"> | ||||||
|  |             <clr-icon | ||||||
|  |               shape="process-on-vm" | ||||||
|  |               size="40" | ||||||
|  |               class="is-info icon-dc-fill" | ||||||
|  |             ></clr-icon> | ||||||
|  |  | ||||||
|  |             <p class="text-center color-gray mt-10" cds-text="section"> | ||||||
|  |               {{ fileLoadingState }}... | ||||||
|  |             </p> | ||||||
|  |           </ng-container> | ||||||
|  |  | ||||||
|  |           <ng-container *ngIf="fileLoadingState === FileLoadingState.parsed"> | ||||||
|             <clr-icon |             <clr-icon | ||||||
|               shape="warning-standard" |               shape="warning-standard" | ||||||
|               size="40" |               size="40" | ||||||
| @@ -206,6 +219,7 @@ | |||||||
|             <p class="text-center color-gray mt-10" cds-text="section"> |             <p class="text-center color-gray mt-10" cds-text="section"> | ||||||
|               Please select a dataset on the left to review the data |               Please select a dataset on the left to review the data | ||||||
|             </p> |             </p> | ||||||
|  |           </ng-container> | ||||||
|         </div> |         </div> | ||||||
|  |  | ||||||
|         <ng-container *ngIf="activeParsedDataset"> |         <ng-container *ngIf="activeParsedDataset"> | ||||||
|   | |||||||
| @@ -30,6 +30,17 @@ import { UploadFile } from '@sasjs/adapter' | |||||||
| import { UploadFileResponse } from '../models/UploadFile' | import { UploadFileResponse } from '../models/UploadFile' | ||||||
| import { RequestWrapperResponse } from '../models/request-wrapper/RequestWrapperResponse' | import { RequestWrapperResponse } from '../models/request-wrapper/RequestWrapperResponse' | ||||||
| import { ParseResult } from '../models/ParseResult.interface' | import { ParseResult } from '../models/ParseResult.interface' | ||||||
|  | import XLSX from 'xlsx' | ||||||
|  |  | ||||||
|  | enum FileLoadingState { | ||||||
|  |   reading = 'Reading the file', | ||||||
|  |   parsing = 'Searching for the data in the file', | ||||||
|  |   parsed = 'Searching for the data finished', | ||||||
|  |   /** | ||||||
|  |    * Defualt value | ||||||
|  |    */ | ||||||
|  |   notSelected = 'File not selected' | ||||||
|  | } | ||||||
|  |  | ||||||
| @Component({ | @Component({ | ||||||
|   selector: 'app-multi-dataset', |   selector: 'app-multi-dataset', | ||||||
| @@ -43,6 +54,11 @@ export class MultiDatasetComponent implements OnInit { | |||||||
|   public licenceState = this.licenceService.licenceState |   public licenceState = this.licenceService.licenceState | ||||||
|   public Infinity = Infinity |   public Infinity = Infinity | ||||||
|  |  | ||||||
|  |   public workbookInterval: any | ||||||
|  |   public fileLoadingState: FileLoadingState = FileLoadingState.notSelected | ||||||
|  |  | ||||||
|  |   public FileLoadingState = FileLoadingState | ||||||
|  |  | ||||||
|   public hotTableLicenseKey: string | undefined = undefined |   public hotTableLicenseKey: string | undefined = undefined | ||||||
|   public hotTableMaxRows = |   public hotTableMaxRows = | ||||||
|     this.licenceState.value.viewer_rows_allowed || Infinity |     this.licenceState.value.viewer_rows_allowed || Infinity | ||||||
| @@ -163,7 +179,7 @@ export class MultiDatasetComponent implements OnInit { | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   onFileChange(event: any) { |   async onFileChange(event: any) { | ||||||
|     const files = event?.target?.files || [] |     const files = event?.target?.files || [] | ||||||
|  |  | ||||||
|     if (files.length < 1) { |     if (files.length < 1) { | ||||||
| @@ -200,11 +216,20 @@ export class MultiDatasetComponent implements OnInit { | |||||||
|       // For EXCEL if multiple files, we only take one (the first one) |       // For EXCEL if multiple files, we only take one (the first one) | ||||||
|       this.selectedFile = event.target.files[0] |       this.selectedFile = event.target.files[0] | ||||||
|  |  | ||||||
|       if (this.selectedFile) |       if (this.selectedFile) { | ||||||
|  |         this.fileLoadingState = FileLoadingState.reading | ||||||
|  |  | ||||||
|         this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB( |         this.selectedFile.sizeMB = this.spreadsheetService.bytesToMB( | ||||||
|           this.selectedFile.size |           this.selectedFile.size | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|  |         // Read the excel file to be ready | ||||||
|  |         this.spreadsheetService.xlsxReadFile(this.selectedFile!).then((wb) => { | ||||||
|  |           this.fileLoadingState = FileLoadingState.parsing | ||||||
|  |           this.selectedFile!.workbook = wb | ||||||
|  |         }) | ||||||
|  |       } | ||||||
|  |  | ||||||
|       this.initUserInputHot() |       this.initUserInputHot() | ||||||
|       this.onAutoDetectColumns() |       this.onAutoDetectColumns() | ||||||
|     } else if (matchedExtension === 'csv') { |     } else if (matchedExtension === 'csv') { | ||||||
| @@ -291,10 +316,12 @@ export class MultiDatasetComponent implements OnInit { | |||||||
|         }) |         }) | ||||||
|       }) |       }) | ||||||
|  |  | ||||||
|  |       this.workbookLoaded().then((workbook) => { | ||||||
|         for (let parsedDataset of this.parsedDatasets) { |         for (let parsedDataset of this.parsedDatasets) { | ||||||
|           this.spreadsheetService |           this.spreadsheetService | ||||||
|             .parseExcelFile({ |             .parseExcelFile({ | ||||||
|               file: this.selectedFile!, |               file: this.selectedFile!, | ||||||
|  |               workbook: workbook, | ||||||
|               password: this.selectedFile!.password || undefined, |               password: this.selectedFile!.password || undefined, | ||||||
|               dcValidator: parsedDataset.datasetInfo.dcValidator!, |               dcValidator: parsedDataset.datasetInfo.dcValidator!, | ||||||
|               headerPks: parsedDataset.datasetInfo.headerPks, |               headerPks: parsedDataset.datasetInfo.headerPks, | ||||||
| @@ -306,7 +333,7 @@ export class MultiDatasetComponent implements OnInit { | |||||||
|               xlRules: parsedDataset.datasetInfo.xlRules |               xlRules: parsedDataset.datasetInfo.xlRules | ||||||
|             }) |             }) | ||||||
|             .then((parseResult: ParseResult | undefined) => { |             .then((parseResult: ParseResult | undefined) => { | ||||||
|             console.log('parseResult', parseResult) |               this.fileLoadingState = FileLoadingState.parsed | ||||||
|  |  | ||||||
|               if (parseResult && parseResult.data) { |               if (parseResult && parseResult.data) { | ||||||
|                 let datasource: any[] = [] |                 let datasource: any[] = [] | ||||||
| @@ -339,6 +366,7 @@ export class MultiDatasetComponent implements OnInit { | |||||||
|             }) |             }) | ||||||
|         } |         } | ||||||
|       }) |       }) | ||||||
|  |     }) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   onSubmitAll() { |   onSubmitAll() { | ||||||
| @@ -826,6 +854,23 @@ export class MultiDatasetComponent implements OnInit { | |||||||
|     if (newSubmittedDataset) newSubmittedDataset.active = true |     if (newSubmittedDataset) newSubmittedDataset.active = true | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * | ||||||
|  |    * @returns Promise once workbook is loaded because use XLSX.read in the background | ||||||
|  |    */ | ||||||
|  |   private workbookLoaded(): Promise<XLSX.WorkBook> { | ||||||
|  |     return new Promise((resolve, reject) => { | ||||||
|  |       if (!this.selectedFile) reject('No file selected') | ||||||
|  |  | ||||||
|  |       this.workbookInterval = setInterval(() => { | ||||||
|  |         if (this.selectedFile!.workbook) { | ||||||
|  |           clearInterval(this.workbookInterval) | ||||||
|  |           resolve(this.selectedFile!.workbook) | ||||||
|  |         } | ||||||
|  |       }, 500) | ||||||
|  |     }) | ||||||
|  |   } | ||||||
|  |  | ||||||
|   private parseDatasetFromCsvName(fileName: string) { |   private parseDatasetFromCsvName(fileName: string) { | ||||||
|     const fileNameArr = fileName.split('.') |     const fileNameArr = fileName.split('.') | ||||||
|     fileNameArr.pop() |     fileNameArr.pop() | ||||||
| @@ -1044,4 +1089,5 @@ export interface SubmittedCsvDatasetResult { | |||||||
| export interface SelectedFile extends File { | export interface SelectedFile extends File { | ||||||
|   sizeMB?: number |   sizeMB?: number | ||||||
|   password?: string |   password?: string | ||||||
|  |   workbook?: XLSX.WorkBook | ||||||
| } | } | ||||||
|   | |||||||
| @@ -30,7 +30,7 @@ export class SpreadsheetService { | |||||||
|       licenceState: this.licenceState |       licenceState: this.licenceState | ||||||
|     }) |     }) | ||||||
|  |  | ||||||
|     return spreadSheetUtil.parseExcelFile( |     return spreadSheetUtil.parseSpreadsheetFile( | ||||||
|       parseParams, |       parseParams, | ||||||
|       this.promptExcelPassword, |       this.promptExcelPassword, | ||||||
|       onParseStateChange, |       onParseStateChange, | ||||||
| @@ -38,6 +38,37 @@ export class SpreadsheetService { | |||||||
|     ) |     ) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Reads the excel file using the XLSX.read() function | ||||||
|  |    * If possible, function will use the web worker to read it in background thread | ||||||
|  |    * otherwise fallback method will be used | ||||||
|  |    * | ||||||
|  |    * @param file selected in an <input> | ||||||
|  |    * @returns WorkBook | ||||||
|  |    */ | ||||||
|  |   public xlsxReadFile(file: any): Promise<XLSX.WorkBook> { | ||||||
|  |     return new Promise((resolve, reject) => { | ||||||
|  |       const spreadSheetUtil = new SpreadsheetUtil({ | ||||||
|  |         licenceState: this.licenceState | ||||||
|  |       }) | ||||||
|  |  | ||||||
|  |       let reader: FileReader = new FileReader() | ||||||
|  |  | ||||||
|  |       reader.onload = (fileReaderResponse: any) => { | ||||||
|  |         spreadSheetUtil | ||||||
|  |           .xslxStartReading(fileReaderResponse, this.promptExcelPassword) | ||||||
|  |           .then((response) => { | ||||||
|  |             resolve(response) | ||||||
|  |           }) | ||||||
|  |           .catch((err) => { | ||||||
|  |             reject(err) | ||||||
|  |           }) | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       reader.readAsArrayBuffer(file) | ||||||
|  |     }) | ||||||
|  |   } | ||||||
|  |  | ||||||
|   /** |   /** | ||||||
|    * Read the file minimally just to get the sheet names, not reading full file |    * Read the file minimally just to get the sheet names, not reading full file | ||||||
|    * to help boost the performance |    * to help boost the performance | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										8
									
								
								sas/package-lock.json
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										8
									
								
								sas/package-lock.json
									
									
									
										generated
									
									
									
								
							| @@ -7,7 +7,7 @@ | |||||||
|       "name": "dc-sas", |       "name": "dc-sas", | ||||||
|       "dependencies": { |       "dependencies": { | ||||||
|         "@sasjs/cli": "^4.11.1", |         "@sasjs/cli": "^4.11.1", | ||||||
|         "@sasjs/core": "^4.52.4" |         "@sasjs/core": "^4.52.5" | ||||||
|       } |       } | ||||||
|     }, |     }, | ||||||
|     "node_modules/@coolaj86/urequest": { |     "node_modules/@coolaj86/urequest": { | ||||||
| @@ -116,9 +116,9 @@ | |||||||
|       "integrity": "sha512-Grwydm5GxBsYk238PZw41XPjXVVQ9vWcvfZ06L2P0bQbvK0sGn7l69JA7H5MGr3QcaLpiD4Kg70cAh7PgE+JOw==" |       "integrity": "sha512-Grwydm5GxBsYk238PZw41XPjXVVQ9vWcvfZ06L2P0bQbvK0sGn7l69JA7H5MGr3QcaLpiD4Kg70cAh7PgE+JOw==" | ||||||
|     }, |     }, | ||||||
|     "node_modules/@sasjs/core": { |     "node_modules/@sasjs/core": { | ||||||
|       "version": "4.52.4", |       "version": "4.52.5", | ||||||
|       "resolved": "https://registry.npmjs.org/@sasjs/core/-/core-4.52.4.tgz", |       "resolved": "https://registry.npmjs.org/@sasjs/core/-/core-4.52.5.tgz", | ||||||
|       "integrity": "sha512-8lf5ixlA312EgA2DorwbpNXXPfLPzUHO67exIV7SjKiU23Tn1au5GD6hT0Ysr2kophOs10Mp1TCXJjhEq7Qk4A==" |       "integrity": "sha512-fGuLC+DcH2AoIDDU/Eyn7d4ZIrIeQAN3PB9FgjBikiGuXZYooRELKz1WgaQvhI4qSKgczUANaT80ZJOpfH+sQQ==" | ||||||
|     }, |     }, | ||||||
|     "node_modules/@sasjs/lint": { |     "node_modules/@sasjs/lint": { | ||||||
|       "version": "2.3.1", |       "version": "2.3.1", | ||||||
|   | |||||||
| @@ -29,6 +29,6 @@ | |||||||
|   "private": true, |   "private": true, | ||||||
|   "dependencies": { |   "dependencies": { | ||||||
|     "@sasjs/cli": "^4.11.1", |     "@sasjs/cli": "^4.11.1", | ||||||
|     "@sasjs/core": "^4.52.4" |     "@sasjs/core": "^4.52.5" | ||||||
|   } |   } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -100,6 +100,7 @@ Areas for optimisation | |||||||
|   @li mf_getattrn.sas |   @li mf_getattrn.sas | ||||||
|   @li mf_getengine.sas |   @li mf_getengine.sas | ||||||
|   @li mf_getschema.sas |   @li mf_getschema.sas | ||||||
|  |   @li mf_getuniquefileref.sas | ||||||
|   @li mf_getuniquename.sas |   @li mf_getuniquename.sas | ||||||
|   @li mf_getuser.sas |   @li mf_getuser.sas | ||||||
|   @li mf_getvarlist.sas |   @li mf_getvarlist.sas | ||||||
| @@ -621,7 +622,7 @@ data work.bitemp0_append &keepvars &outds_del(drop=&md5_col ) | |||||||
| %put DCNOTE: Extracting matching observations from &base_lib..&base_dsn; | %put DCNOTE: Extracting matching observations from &base_lib..&base_dsn; | ||||||
|  |  | ||||||
| %if &engine_type=OLEDB %then %do; | %if &engine_type=OLEDB %then %do; | ||||||
|   %let temp_table=##BITEMP_&base_dsn; |   %let temp_table=##%mf_getuniquefileref(prefix=BTMP)_&base_dsn; | ||||||
|   %if &loadtype=BITEMPORAL or &loadtype=TXTEMPORAL %then |   %if &loadtype=BITEMPORAL or &loadtype=TXTEMPORAL %then | ||||||
|         %let base_table=(select * from [dbo].&base_dsn |         %let base_table=(select * from [dbo].&base_dsn | ||||||
|                         where convert(datetime,&SQLNOW) < &tech_to ); |                         where convert(datetime,&SQLNOW) < &tech_to ); | ||||||
| @@ -1049,7 +1050,7 @@ run; | |||||||
|   %let cat_string=catx('|' ,&bus_from,&bus_to); |   %let cat_string=catx('|' ,&bus_from,&bus_to); | ||||||
|  |  | ||||||
|   data work.bitemp5a_lkp (keep=&md5_col) |   data work.bitemp5a_lkp (keep=&md5_col) | ||||||
|     %if "%substr(&sysver,1,1)" ne "4" and "%substr(&sysver,1,1)" ne "5" %then %do; |     %if "%substr(&sysver,1,1)" ne "4" & "%substr(&sysver,1,1)" ne "5" %then %do; | ||||||
|       /nonote2err |       /nonote2err | ||||||
|     %end; |     %end; | ||||||
|     ; |     ; | ||||||
| @@ -1191,10 +1192,10 @@ run; | |||||||
|   /* if OLEDB then create a temp table for efficiency */ |   /* if OLEDB then create a temp table for efficiency */ | ||||||
|   %local innertable; |   %local innertable; | ||||||
|   %if &engine_type=OLEDB %then %do; |   %if &engine_type=OLEDB %then %do; | ||||||
|     %let innertable=[##BITEMP_&base_dsn]; |     %let innertable=[&temp_table]; | ||||||
|     %let top_table=[dbo].&base_dsn; |     %let top_table=[dbo].&base_dsn; | ||||||
|     %let flexinow=&SQLNOW; |     %let flexinow=&SQLNOW; | ||||||
|     create table &base_lib.."##BITEMP_&base_dsn"n as |     create table &base_lib.."&temp_table"n as | ||||||
|       select * from work.bitemp5d_subquery; |       select * from work.bitemp5d_subquery; | ||||||
|     /* open up a connection for pass through SQL */ |     /* open up a connection for pass through SQL */ | ||||||
|     %dc_assignlib(WRITE,&base_lib,passthru=myAlias) |     %dc_assignlib(WRITE,&base_lib,passthru=myAlias) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user