Skip to content

Support specific aliases for PnP step labels #3461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 57 additions & 23 deletions src/components/pnp/findStepColumns.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,29 @@
import { sortBy } from '@seedcompany/common';
import { mapOf, sortBy } from '@seedcompany/common';
import levenshtein from 'fastest-levenshtein';
import { startCase, without } from 'lodash';
import { startCase } from 'lodash';
import { type Column } from '~/common/xlsx.util';
import { ProductStep as Step } from '../product/dto';
import { type PnpExtractionResult, PnpProblemType } from './extraction-result';
import { type PlanningSheet } from './planning-sheet';
import { type ProgressSheet } from './progress-sheet';

const ApprovedAliases = mapOf<string, Step>([
['draft & keyboard', Step.ExegesisAndFirstDraft],
['first draft', Step.ExegesisAndFirstDraft],
['exegesis, 1st draft, keyboard', Step.ExegesisAndFirstDraft],
['internalization & first draft', Step.ExegesisAndFirstDraft],
['exegesis 1st draft & keybrd', Step.ExegesisAndFirstDraft],
['first draft & keyboard', Step.ExegesisAndFirstDraft],
['exegesis, 1st draft. keyboard', Step.ExegesisAndFirstDraft],
['team check & 1st testing', Step.TeamCheck],
['team check & revision', Step.TeamCheck],
['team check & 1st test', Step.TeamCheck],
['field test', Step.CommunityTesting],
['community check', Step.CommunityTesting],
['community review', Step.CommunityTesting],
['community testing & revision', Step.CommunityTesting],
]);

/**
* Fuzzy match available steps to their column address.
*/
Expand All @@ -15,43 +32,60 @@ export function findStepColumns(
result?: PnpExtractionResult,
availableSteps: readonly Step[] = [...Step],
) {
const matchedColumns: Partial<Record<Step, Column>> = {};
let remainingSteps = availableSteps;
const matchedColumns = new Map<Step, Column>();
const remainingSteps = new Set(availableSteps);
const possibleSteps = sheet.stepLabels
.walkRight()
.filter((cell) => !!cell.asString)
.map((cell) => ({ label: cell.asString!, column: cell.column, cell }))
.map((cell) => ({
label: cell.asString!.trim(),
column: cell.column,
cell,
}))
.toArray();
possibleSteps.forEach(({ label, column, cell }, index) => {
if (index === possibleSteps.length - 1) {
// The last step should always be called Completed in CORD per Seth.
// Written PnP already match, but OBS calls it Record. This is mislabeled
// depending on the methodology.
matchedColumns[Step.Completed] = column;
// Written PnP already matches, but OBS calls it Record.
// This is mislabeled depending on the methodology.
matchedColumns.set(Step.Completed, column);
return;
}
const distances = remainingSteps.map((step) => {
const humanLabel = startCase(step).replace(' And ', ' & ');
const distance = levenshtein.distance(label, humanLabel);
return [step, distance] as const;
});
// Pick the step that is the closest fuzzy match
const chosen = sortBy(
// 5 is too far ignore those
distances.filter(([_, distance]) => distance < 5),
([_, distance]) => distance,
)[0]?.[0];

const chosen = chooseStep(label, remainingSteps);
if (!chosen) {
result?.addProblem(NonStandardStep, cell, { label });
return;
}
matchedColumns[chosen] = column;

remainingSteps = without(remainingSteps, chosen);
matchedColumns.set(chosen, column);
remainingSteps.delete(chosen);
});
return matchedColumns as Record<Step, Column>;
return matchedColumns as ReadonlyMap<Step, Column>;
}

const chooseStep = (
label: string,
available: ReadonlySet<Step>,
): Step | undefined => {
const alias = ApprovedAliases.get(label.toLowerCase());
if (alias) {
return available.has(alias) ? alias : undefined;
}

const distances = available.values().map((step) => {
const humanLabel = startCase(step).replace(' And ', ' & ');
const distance = levenshtein.distance(label, humanLabel);
return { step, distance };
});
// Pick the step that is the closest fuzzy match
const chosen = sortBy(
// 5 is too far ignoring those
distances.filter(({ distance }) => distance < 5),
({ distance }) => distance,
).at(0);
return chosen?.step;
};

const NonStandardStep = PnpProblemType.register({
name: 'NonStandardStep',
severity: 'Error',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ export class StepProgressExtractor {
const parseProgressRow =
(
pnp: Pnp,
stepColumns: Record<Step, Column>,
planningStepColumns: Record<Step, Column>,
stepColumns: ReadonlyMap<Step, Column>,
planningStepColumns: ReadonlyMap<Step, Column>,
result: PnpProgressExtractionResult,
) =>
(cell: Cell<ProgressSheet>, index: number): ExtractedRow => {
Expand All @@ -81,7 +81,7 @@ const parseProgressRow =
const steps = entries(stepColumns).flatMap<StepProgressInput>(
([step, column]) => {
const fiscalYear = pnp.planning.cell(
planningStepColumns[step],
planningStepColumns.get(step)!,
planningRow,
);

Expand Down
6 changes: 3 additions & 3 deletions src/components/product/product.extractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ export class ProductExtractor {
const parseProductRow =
(
pnp: Pnp,
stepColumns: Record<Step, Column>,
progressStepColumns: Record<Step, Column>,
stepColumns: ReadonlyMap<Step, Column>,
progressStepColumns: ReadonlyMap<Step, Column>,
result: PnpPlanningExtractionResult,
) =>
(cell: Cell<PlanningSheet>, index: number): ExtractedRow => {
Expand All @@ -94,7 +94,7 @@ const parseProductRow =
const steps = entries(stepColumns).flatMap(([step, column]) => {
const plannedCell = sheet.cell(column, row);
const progressCell = pnp.progress.cell(
progressStepColumns[step],
progressStepColumns.get(step)!,
progressRow,
);

Expand Down