diff --git a/src/components/pnp/findStepColumns.ts b/src/components/pnp/findStepColumns.ts index 1e6bd24f5a..c53094e9ac 100644 --- a/src/components/pnp/findStepColumns.ts +++ b/src/components/pnp/findStepColumns.ts @@ -1,12 +1,29 @@ -import { sortBy } from '@seedcompany/common'; +import { mapOf, sortBy } from '@seedcompany/common'; import levenshtein from 'fastest-levenshtein'; -import { startCase, without } from 'lodash'; +import { startCase } from 'lodash'; import { type Column } from '~/common/xlsx.util'; import { ProductStep as Step } from '../product/dto'; import { type PnpExtractionResult, PnpProblemType } from './extraction-result'; import { type PlanningSheet } from './planning-sheet'; import { type ProgressSheet } from './progress-sheet'; +const ApprovedAliases = mapOf([ + ['draft & keyboard', Step.ExegesisAndFirstDraft], + ['first draft', Step.ExegesisAndFirstDraft], + ['exegesis, 1st draft, keyboard', Step.ExegesisAndFirstDraft], + ['internalization & first draft', Step.ExegesisAndFirstDraft], + ['exegesis 1st draft & keybrd', Step.ExegesisAndFirstDraft], + ['first draft & keyboard', Step.ExegesisAndFirstDraft], + ['exegesis, 1st draft. keyboard', Step.ExegesisAndFirstDraft], + ['team check & 1st testing', Step.TeamCheck], + ['team check & revision', Step.TeamCheck], + ['team check & 1st test', Step.TeamCheck], + ['field test', Step.CommunityTesting], + ['community check', Step.CommunityTesting], + ['community review', Step.CommunityTesting], + ['community testing & revision', Step.CommunityTesting], +]); + /** * Fuzzy match available steps to their column address. */ @@ -15,43 +32,60 @@ export function findStepColumns( result?: PnpExtractionResult, availableSteps: readonly Step[] = [...Step], ) { - const matchedColumns: Partial> = {}; - let remainingSteps = availableSteps; + const matchedColumns = new Map(); + const remainingSteps = new Set(availableSteps); const possibleSteps = sheet.stepLabels .walkRight() .filter((cell) => !!cell.asString) - .map((cell) => ({ label: cell.asString!, column: cell.column, cell })) + .map((cell) => ({ + label: cell.asString!.trim(), + column: cell.column, + cell, + })) .toArray(); possibleSteps.forEach(({ label, column, cell }, index) => { if (index === possibleSteps.length - 1) { // The last step should always be called Completed in CORD per Seth. - // Written PnP already match, but OBS calls it Record. This is mislabeled - // depending on the methodology. - matchedColumns[Step.Completed] = column; + // Written PnP already matches, but OBS calls it Record. + // This is mislabeled depending on the methodology. + matchedColumns.set(Step.Completed, column); return; } - const distances = remainingSteps.map((step) => { - const humanLabel = startCase(step).replace(' And ', ' & '); - const distance = levenshtein.distance(label, humanLabel); - return [step, distance] as const; - }); - // Pick the step that is the closest fuzzy match - const chosen = sortBy( - // 5 is too far ignore those - distances.filter(([_, distance]) => distance < 5), - ([_, distance]) => distance, - )[0]?.[0]; + + const chosen = chooseStep(label, remainingSteps); if (!chosen) { result?.addProblem(NonStandardStep, cell, { label }); return; } - matchedColumns[chosen] = column; - - remainingSteps = without(remainingSteps, chosen); + matchedColumns.set(chosen, column); + remainingSteps.delete(chosen); }); - return matchedColumns as Record; + return matchedColumns as ReadonlyMap; } +const chooseStep = ( + label: string, + available: ReadonlySet, +): Step | undefined => { + const alias = ApprovedAliases.get(label.toLowerCase()); + if (alias) { + return available.has(alias) ? alias : undefined; + } + + const distances = available.values().map((step) => { + const humanLabel = startCase(step).replace(' And ', ' & '); + const distance = levenshtein.distance(label, humanLabel); + return { step, distance }; + }); + // Pick the step that is the closest fuzzy match + const chosen = sortBy( + // 5 is too far ignoring those + distances.filter(({ distance }) => distance < 5), + ({ distance }) => distance, + ).at(0); + return chosen?.step; +}; + const NonStandardStep = PnpProblemType.register({ name: 'NonStandardStep', severity: 'Error', diff --git a/src/components/product-progress/step-progress-extractor.service.ts b/src/components/product-progress/step-progress-extractor.service.ts index b07e1aed50..0bd5825a1e 100644 --- a/src/components/product-progress/step-progress-extractor.service.ts +++ b/src/components/product-progress/step-progress-extractor.service.ts @@ -66,8 +66,8 @@ export class StepProgressExtractor { const parseProgressRow = ( pnp: Pnp, - stepColumns: Record, - planningStepColumns: Record, + stepColumns: ReadonlyMap, + planningStepColumns: ReadonlyMap, result: PnpProgressExtractionResult, ) => (cell: Cell, index: number): ExtractedRow => { @@ -81,7 +81,7 @@ const parseProgressRow = const steps = entries(stepColumns).flatMap( ([step, column]) => { const fiscalYear = pnp.planning.cell( - planningStepColumns[step], + planningStepColumns.get(step)!, planningRow, ); diff --git a/src/components/product/product.extractor.ts b/src/components/product/product.extractor.ts index 06b2813bc8..84ee8855f1 100644 --- a/src/components/product/product.extractor.ts +++ b/src/components/product/product.extractor.ts @@ -81,8 +81,8 @@ export class ProductExtractor { const parseProductRow = ( pnp: Pnp, - stepColumns: Record, - progressStepColumns: Record, + stepColumns: ReadonlyMap, + progressStepColumns: ReadonlyMap, result: PnpPlanningExtractionResult, ) => (cell: Cell, index: number): ExtractedRow => { @@ -94,7 +94,7 @@ const parseProductRow = const steps = entries(stepColumns).flatMap(([step, column]) => { const plannedCell = sheet.cell(column, row); const progressCell = pnp.progress.cell( - progressStepColumns[step], + progressStepColumns.get(step)!, progressRow, );