Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BHBC-1530: Update Moose transformations to account for all fields. #680

Merged
merged 7 commits into from
Dec 21, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 48 additions & 17 deletions api/src/json-schema/transformation-schema.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,40 +208,71 @@ describe('example submission transformation schema', () => {
{
fileName: 'event',
columns: [
{ source: 'id', target: 'id' },
{ source: 'eventID', target: 'eventID' },
{ source: 'eventDate', target: 'eventDate' },
{ source: ['verbatimCoordinatesUTM', 'verbatimCoordinatesLatLong'], target: 'verbatimCoordinates' }
{ source: { columns: ['id'] }, target: 'id' },
{ source: { columns: ['eventID'] }, target: 'eventID' },
{ source: { columns: ['eventDate'] }, target: 'eventDate' },
{
source: { columns: ['verbatimCoordinatesUTM', 'verbatimCoordinatesLatLong'] },
target: 'verbatimCoordinates'
}
]
},
{
fileName: 'occurrence',
conditionalFields: ['individualCount'],
columns: [
{ source: 'id', target: 'id' },
{ source: 'occurrenceID', target: 'occurrenceID' },
{ source: 'individualCount', target: 'individualCount' },
{ source: 'vernacularName', target: 'associatedTaxa' },
{ source: 'lifeStage', target: 'lifeStage' },
{ source: 'sex', target: 'sex' }
{ source: { columns: ['id'] }, target: 'id' },
{ source: { columns: ['occurrenceID'] }, target: 'occurrenceID' },
{ source: { columns: ['individualCount'] }, target: 'individualCount' },
{ source: { columns: ['vernacularName'] }, target: 'associatedTaxa' },
{ source: { columns: ['lifeStage'] }, target: 'lifeStage' },
{ source: { columns: ['sex'] }, target: 'sex' },
{ source: { value: 'Approved' }, target: 'Status' }
]
},
{
fileName: 'taxon',
columns: [
{ source: 'id', target: 'id' },
{ source: 'occurrenceID', target: 'occurrenceID' },
{ source: 'vernacularName', target: 'vernacularName' }
{ source: { columns: ['id'] }, target: 'id' },
{ source: { columns: ['occurrenceID'] }, target: 'occurrenceID' },
{ source: { columns: ['vernacularName'] }, target: 'vernacularName' }
]
},
{
fileName: 'resourcerelationship',
conditionalFields: ['resourceID'],
columns: [
{ source: 'id', target: 'id' },
{ source: 'resourceID', target: 'resourceID' },
{ source: 'relatedResourceID', target: 'relatedResourceID' },
{ source: 'relationshipOfResource', target: 'relationshipOfResource' }
{ source: { columns: ['id'] }, target: 'id' },
{ source: { columns: ['resourceID'] }, target: 'resourceID' },
{ source: { columns: ['relatedResourceID'] }, target: 'relatedResourceID' },
{ source: { columns: ['relationshipOfResource'] }, target: 'relationshipOfResource' }
]
},
{
fileName: 'measurementorfact',
columns: [
{ source: { columns: ['id'] }, target: 'id' },
{ source: { columns: ['eventID'] }, target: 'measurementID' },
{ source: { value: 'Habitat Description' }, target: 'measurementType' },
{ source: { columns: ['effort_habitat_description'] }, target: 'measurementValue' }
]
},
{
fileName: 'measurementorfact',
columns: [
{ source: { columns: ['id'] }, target: 'id' },
{ source: { columns: ['occurrenceID'] }, target: 'measurementID' },
{ source: { value: 'Stratum' }, target: 'measurementType' },
{ source: { columns: ['summary_stratum'] }, target: 'measurementValue' }
]
},
{
fileName: 'measurementorfact',
columns: [
{ source: { columns: ['id'] }, target: 'id' },
{ source: { columns: ['occurrenceID'] }, target: 'measurementID' },
{ source: { value: 'Activity' }, target: 'measurementType' },
{ source: { columns: ['observation_activity'] }, target: 'measurementValue' }
]
}
]
Expand Down
37 changes: 28 additions & 9 deletions api/src/json-schema/transformation-schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ export const submissionTransformationSchema = {
}
]
}
}
},
additionalProperties: false
}
},
additionalProperties: false
Expand Down Expand Up @@ -164,23 +165,41 @@ export const submissionTransformationSchema = {
source: {
oneOf: [
{
type: 'string',
description: 'The name of a source property'
type: 'object',
required: ['columns'],
properties: {
columns: {
type: 'array',
items: {
type: 'string'
}
},
separator: {
type: 'string'
},
unique: {
type: 'string'
}
},
additionalProperties: false
},
{
type: 'array',
items: {
type: 'string'
type: 'object',
required: ['value'],
properties: {
value: {
type: ['string', 'number']
}
},
description:
'An array of source properties. The first property with a defined value will be used.'
additionalProperties: false
}
]
},
target: {
type: 'string'
}
}
},
additionalProperties: false
}
},
conditionalFields: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ GET.apiDoc = {
type: 'string'
}
},

rows: {
type: 'array',
items: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ export type TransformSchema = {
postTransformations?: PostTransformationRelatopnshipSchema[];
};

export type ParseColumnSchema = { source: string; target: string };
export type ParseColumnSchema = { source: { columns?: string[]; value?: any }; target: string };

export type ParseSchema = {
fileName: string;
Expand All @@ -59,8 +59,12 @@ export class TransformationSchemaParser {
}
}

getAllFlattenSchemas(): FlattenSchema[] | [] {
return jsonpath.query(this.transformationSchema, this.getFlattenJsonPath())?.[0] || [];
}

getFlattenSchemas(fileName: string): FlattenSchema | null {
return jsonpath.query(this.transformationSchema, this.getFlattenJsonPath(fileName))?.[0] || null;
return jsonpath.query(this.transformationSchema, this.getFlattenJsonPathByFileName(fileName))?.[0] || null;
}

getTransformSchemas(): TransformSchema[] {
Expand All @@ -71,7 +75,11 @@ export class TransformationSchemaParser {
return jsonpath.query(this.transformationSchema, this.getParseJsonPath())?.[0] || [];
}

getFlattenJsonPath(fileName: string): string {
getFlattenJsonPath(): string {
return `$.flatten`;
}

getFlattenJsonPathByFileName(fileName: string): string {
return `$.flatten[?(@.fileName == '${fileName}')]`;
}

Expand Down
95 changes: 60 additions & 35 deletions api/src/utils/media/xlsx/transformation/xlsx-transformation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { CSVWorksheet } from '../../csv/csv-file';
import { XLSXCSV } from '../xlsx-file';
import {
Condition,
FlattenSchema,
PostTransformationRelatopnshipSchema,
TransformationFieldSchema,
TransformationSchemaParser,
Expand Down Expand Up @@ -62,49 +63,55 @@ export class XLSXTransformation {
* @memberof XLSXTransformation
*/
_flattenData(): FlattenedRowPartsBySourceFile[][] {
const rowsBySourceFileArray: FlattenedRowPartsBySourceFile[][] = [];
let rowsBySourceFileArray: FlattenedRowPartsBySourceFile[][] = [];

Object.entries(this.xlsxCsv.workbook.worksheets).forEach(([worksheetName, worksheet]) => {
// Get the file structure schema for the given fileName
const fileStructure = this.transformationSchemaParser.getFlattenSchemas(worksheetName);
// Get all flatten schemas
const flattenSchemas = this.transformationSchemaParser.getAllFlattenSchemas();

if (!fileStructure) {
// Build an array of [worksheetName, worksheet] based on the order of the flatten schemas. This is necessary
// because the flattening process requires parsing the worksheets in a specific order, as specified by the flatten
// section of the transformation schema.
const orderedWorksheetsByFlattenSchema: [string, CSVWorksheet][] = [];
flattenSchemas.forEach((flattenSchema) => {
const worksheet = this.xlsxCsv.workbook.worksheets[flattenSchema.fileName];

if (worksheet) {
orderedWorksheetsByFlattenSchema.push([flattenSchema.fileName, worksheet]);
}
});

// Iterate over each worksheet in the ordered array of worksheets
orderedWorksheetsByFlattenSchema.forEach(([worksheetName, worksheet]) => {
// Get the flatten file structure schema for the worksheet, based on the worksheet name
const flattenSchema = this.transformationSchemaParser.getFlattenSchemas(worksheetName);

if (!flattenSchema) {
// No schema for this worksheet, skip it
return;
}

// Get all rows, as objects
// Get all worksheet rows as an array of objects
const rowObjects = worksheet.getRowObjects();

if (!fileStructure.parent) {
if (!flattenSchema.parent) {
// Handle root records, that have no parent record

rowObjects.forEach((rowObject, rowIndex) => {
const uniqueId = this._buildMultiColumnID(worksheet, rowIndex, fileStructure.uniqueId);

const newRecord = {
sourceFile: fileStructure.fileName,
uniqueId: uniqueId,
row: rowObject
};

rowsBySourceFileArray.push([newRecord]);
});
const flattenedRootRecords = this._flattenRootRecords(flattenSchema, worksheet, rowObjects);
rowsBySourceFileArray = rowsBySourceFileArray.concat(flattenedRootRecords);
} else {
// Handle child records, that have a parent record
const parentFileName = flattenSchema.parent.fileName.toLowerCase();
const parentUniqueIdColumns = flattenSchema.parent.uniqueId;

const parentFileName = fileStructure.parent.fileName.toLowerCase();
const parentUniqueIdColumns = fileStructure.parent.uniqueId;

const fileName = fileStructure.fileName;
const uniqueIdColumns = fileStructure.uniqueId;
const childFileName = flattenSchema.fileName;
const childUniqueIdColumns = flattenSchema.uniqueId;

rowObjects.forEach((rowObject, rowIndex) => {
const parentUniqueId = this._buildMultiColumnID(worksheet, rowIndex, parentUniqueIdColumns).toLowerCase();

const uniqueId = this._buildMultiColumnID(worksheet, rowIndex, uniqueIdColumns);
const uniqueId = this._buildMultiColumnID(worksheet, rowIndex, childUniqueIdColumns);

const newRecord = {
sourceFile: fileName,
sourceFile: childFileName,
uniqueId: uniqueId,
row: rowObject
};
Expand Down Expand Up @@ -143,7 +150,7 @@ export class XLSXTransformation {
};

foundRecordToModify = true;
} else if (existingRowFileName === fileName.toLowerCase()) {
} else if (existingRowFileName === childFileName.toLowerCase()) {
// This array already contains a record from the same file as `newRecord` and will need to be duplicated
recordsToModify[recordsToModifyIndex] = {
...recordsToModify[recordsToModifyIndex],
Expand Down Expand Up @@ -248,6 +255,28 @@ export class XLSXTransformation {
return rowsBySourceFileArray;
}

_flattenRootRecords(
flattenSchema: FlattenSchema,
worksheet: CSVWorksheet,
rowObjects: object[]
): FlattenedRowPartsBySourceFile[][] {
const newRecords: FlattenedRowPartsBySourceFile[][] = [];

rowObjects.forEach((rowObject, rowIndex) => {
const uniqueId = this._buildMultiColumnID(worksheet, rowIndex, flattenSchema.uniqueId);

const newRecord = {
sourceFile: flattenSchema.fileName,
uniqueId: uniqueId,
row: rowObject
};

newRecords.push([newRecord]);
});

return newRecords;
}

_buildMultiColumnID(worksheet: CSVWorksheet, rowIndex: number, columnNames: string[]) {
return this._buildMultiColumnValue(worksheet, rowIndex, columnNames, ':');
}
Expand Down Expand Up @@ -520,9 +549,9 @@ export class XLSXTransformation {
const newRowObject = {};

for (const column of columns) {
if (Array.isArray(column.source)) {
if (column.source.columns && column.source.columns?.length) {
// iterate over source columns
for (const sourceColumn of column.source) {
for (const sourceColumn of column.source.columns) {
const sourceValue = rowObject[sourceColumn];

if (sourceValue) {
Expand All @@ -531,12 +560,8 @@ export class XLSXTransformation {
break;
}
}
} else {
const sourceValue = rowObject[column.source];

if (sourceValue) {
newRowObject[column.target] = sourceValue;
}
} else if (column.source.value) {
newRowObject[column.target] = column.source.value;
}
}

Expand Down
2 changes: 1 addition & 1 deletion app/src/features/surveys/view/SurveyObservations.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export enum ClassGrouping {
WARNING = 'Warning'
}

const finalStatus = ['Rejected', 'Darwin Core Validated', 'Template Validated', 'System Error'];
const finalStatus = ['Rejected', 'Darwin Core Validated', 'Template Validated', 'Template Transformed', 'System Error'];

const SurveyObservations: React.FC<ISurveyObservationsProps> = (props) => {
const biohubApi = useBiohubApi();
Expand Down
Loading