mapping.js

/**
 * mapping module.
 * @module mapping
 */

import { RAWError } from "./utils";
import { getAggregator, getAggregatorArray } from "./expressionRegister";
import difference from "lodash/difference";
import get from "lodash/get";
import set from "lodash/set";
import groupBy from "lodash/groupBy";
import groupByAsMap from "./groupBy";
import mapValues from "lodash/mapValues";
import flatten from "lodash/flatten";
import keyBy from "lodash/keyBy";
import find from "lodash/find";
import range from "lodash/range";
import { group, groups, rollup, rollups } from "d3-array";
import isString from "lodash/isString";

/**
 * dimensions validator
 *
 * @param {array} dimensions
 */

export function validateMapperDefinition(dimensions) {
  if (!Array.isArray(dimensions)) {
    throw new RAWError("dimesions must be an array");
  }

  if (dimensions.length === 0) {
    throw new RAWError("empty dimensions");
  }
}

export function validateDeclarativeMapperDefinition(dimensions) {
  if (dimensions.length === 0) {
    throw new RAWError("empty dimensions");
  }

  const getters = dimensions.filter((d) => d.operation === "get");
  const grouperTypes = ["rollup", "rollups"];
  let grouperDimension = dimensions.filter(
    (d) => grouperTypes.indexOf(d.operation) !== -1
  );
  if (grouperDimension.length > 1) {
    throw new RAWError(
      `only one operation among ${grouperTypes.join(",")} is allowed`
    );
  }

  if (getters.length === 0 && !grouperDimension.length) {
    throw new RAWError(
      `at least one get operation must be present in a dimension set, or an operation among ${grouperTypes.join(
        ","
      )} must be specified`
    );
  }

  if (getters.length > 0 && grouperDimension.length) {
    throw new RAWError(
      `'${grouperDimension[0].operation}' operation was specified, you cannot define other get operations`
    );
  }
}

/**
 * mapping validator
 *
 * @param {array} mapper definition
 * @param {object} mapping configuration
 * @param {object} types column datatypes
 *
 */

export function validateMapping(dimensions, _mapping, types) {
  //mapping values must be column names
  let mapping = mapValues(_mapping, (v) => ({
    ...v,
    value: Array.isArray(v.value)
      ? v.value
      : isString(v.value)
      ? [v.value]
      : [],
  }));

  // dimensions.forEach(dim => {
  //   //dimension not mapped: set value to empty array
  //   if(!mapping[dim.id]){
  //     mapping[dim.id] = {
  //       value: [],
  //     }
  //   }

  // })

  const dimensionsById = keyBy(dimensions, "id");

  // validating that all required dimensions are provided to mapping
  const requiredDimensions = dimensions
    .filter((d) => d.required)
    .map((d) => d.id)
    .sort();

  const providedDimensions = Object.keys(mapping)
    .filter((k) => get(mapping[k], "value") && mapping[k].value.length > 0)
    .sort();

  const missing = difference(requiredDimensions, providedDimensions);

  let errors = [];

  if (missing.length > 0) {
    const err = `Some required dimensions were not mapped. Missing ids are: ${missing.join(
      ", "
    )}`;
    errors.push(err);
  }

  // validating that provided dimensions are mapped to correct types ("validTypes" attibute of dimension)
  // validating multiple attribute
  providedDimensions.forEach((d) => {
    const values = mapping[d].value || [];
    const dim = dimensionsById[d];
    let validTypes = get(dim, "validTypes");
    if (validTypes && types) {
      validTypes = Array.isArray(validTypes) ? validTypes : [validTypes];
      validTypes = validTypes.map((item) => item.toLowerCase());

      values.forEach((v) => {
        const type = types[v];
        if (validTypes && validTypes.indexOf(type.toLowerCase()) === -1) {
          errors.push(
            `Invalid type: column ${v} of type ${type} cannot be used on dimension with id ${d}, accepting ${validTypes.join(
              ", "
            )}`
          );
        }
      });
    }

    let multiple = get(dim, "multiple", false);
    if (!multiple && values && values.length > 1) {
      errors.push(
        `dimension ${d} does not support multiple columns in mapping`
      );
    }

    let minValues = get(dim, "minValues");
    if (minValues !== undefined && (!values || values.length < minValues)) {
      errors.push(
        `dimension ${d} requires at least ${minValues} columns in mappung`
      );
    }

    let maxValues = get(dim, "maxValues");
    if (maxValues !== undefined && (!values || values.length > maxValues)) {
      errors.push(
        `dimension ${d} accepts at most ${maxValues} columns in mappung`
      );
    }
  });

  // #TODO: [future] if using registered functions check for existence
  // #TODO: [future] if using expressions check for existence
  // #TODO: check for multiple, minValues, maxValues

  if (errors.length) {
    throw new RAWError(errors.join("\n"));
  }
}

export function annotateMapping(dimensions, _mapping, types) {
  const dimensionsById = keyBy(dimensions, "id");
  const mapping = { ..._mapping };

  Object.keys(_mapping).forEach((id) => {
    const dim = dimensionsById[id];
    //dimension not mapped: set value to undefined
    if (!mapping[id].value || mapping[id].value === undefined) {
      mapping[id].value = undefined;
    } else {
      //not-multiple values back to scalar
      if (!dim.multiple) {
        const v = Array.isArray(mapping[id].value)
          ? mapping[id].value[0]
          : mapping[id].value;
        mapping[id].value = v
        //setting data type
        mapping[id].dataType = get(types, v);
      } else {
        //setting data types for multiple dimensions
        mapping[id].dataType = mapping[id].value.map((v) => get(types, v));
      }
    }
  });
  return mapping;
}

function hydrateProxies(dimensions, mapping) {
  let m = mapValues(mapping, (v) => ({
    ...v,
    value: Array.isArray(v.value) ? v.value : [v.value],
  }));

  const proxiesDimensions = dimensions.filter(
    (dim) => dim.operation === "proxy"
  );

  proxiesDimensions.forEach((dimension) => {
    const targets = get(dimension, "targets");
    if (!targets) {
      return;
    }

    const targetDimensions = Object.keys(targets);

    targetDimensions.forEach((targetDimensionId) => {
      const targetsMap = targets[targetDimensionId];
      //should be an obj with keys as target expressions and values as source expressions
      Object.keys(targetsMap).forEach((targetExpression) => {
        const sourceExpression = targetsMap[targetExpression];
        const value = get(mapping, `[${dimension.id}][${sourceExpression}]`);
        if (!m[targetDimensionId]) {
          m[targetDimensionId] = {};
        }
        set(m[targetDimensionId], targetExpression, value);
      });
    });
  });
  return m;
}

export function arrayGetter(names) {
  if (Array.isArray(names)) {
    return names.length === 1
      ? (item) => get(item, names[0])
      : (item) => names.map((name) => get(item, name));
  }
  return (item) => get(item, names);
}

/**
 * mapper generator
 *
 * @param {array} dimensions mapper definition
 * @param {object} mapping mapping configuration
 * @param {types} types column types
 * @return {function} the mapper function
 */

// #TODO: REFACTOR
function makeMapper(dimensionsWithOperations, _mapping, types) {
  validateDeclarativeMapperDefinition(dimensionsWithOperations);
  let mapping = hydrateProxies(dimensionsWithOperations, _mapping);
  validateMapping(dimensionsWithOperations, mapping, types);

  mapping = mapValues(_mapping, (v) => ({
    ...v,
    value: Array.isArray(v.value)
      ? v.value
      : isString(v.value)
      ? [v.value]
      : [],
  }));

  const mappingValues = mapValues(mapping, (v) => v.value);
  const mappingConfigs = mapValues(mapping, (v) => get(v, "config"));

  const getDimensions = dimensionsWithOperations
    .filter((d) => d.operation === "get" && mappingValues[d.id] !== undefined)
    .map((g) => g.id);

  const groupAggregateDimension = get(
    find(
      dimensionsWithOperations,
      (d) =>
        d.operation === "groupAggregate" && mappingValues[d.id] !== undefined
    ),
    "id"
  );

  const groupByDimension = get(
    find(
      dimensionsWithOperations,
      (d) => d.operation === "groupBy" && mappingValues[d.id] !== undefined
    ),
    "id"
  );
  const groupDimension = get(
    find(
      dimensionsWithOperations,
      (d) => d.operation === "group" && mappingValues[d.id] !== undefined
    ),
    "id"
  );
  const groupsDimension = get(
    find(
      dimensionsWithOperations,
      (d) => d.operation === "groups" && mappingValues[d.id] !== undefined
    ),
    "id"
  );

  const rollupDimension = get(
    find(
      dimensionsWithOperations,
      (d) => d.operation === "rollup" && mappingValues[d.id] !== undefined
    ),
    "id"
  );

  const rollupsDimension = get(
    find(
      dimensionsWithOperations,
      (d) => d.operation === "rollups" && mappingValues[d.id] !== undefined
    ),
    "id"
  );

  const rollupLeafDimension = get(
    find(
      dimensionsWithOperations,
      (d) => d.operation === "rollupLeaf" && mappingValues[d.id] !== undefined
    ),
    "id"
  );

  //#TODO ... is this still needed?
  const hierarchyDimension = get(
    find(
      dimensionsWithOperations,
      (d) => d.operation === "hierarchy" && mappingValues[d.id] !== undefined
    ),
    "id"
  );

  //#TODO: TAKE IN ACCOUNT GROUP AGGREGATE DUE TO FORMATS
  const formatAggregateDimensions = getDimensions.filter((id) =>
    get(mappingConfigs[id], "format")
  );

  const candidateGroupers = [
    groupByDimension,
    groupDimension,
    groupsDimension,
    rollupDimension,
    rollupsDimension,
  ].filter((x) => !!x);
  if (candidateGroupers.length > 1) {
    throw new RAWError(
      "only one of these operations is allowed in a mapper definition: 'group', 'groups', 'groupBy', 'rollup', 'rollups'"
    );
  }

  let grouperDimension;
  if (candidateGroupers.length) {
    grouperDimension = candidateGroupers[0];
  }

  const rollupGrouperDimension = rollupDimension || rollupsDimension;

  return function (data) {
    if (!data) {
      return;
    }

    let tabularData;

    //apply grouping operations if any
    if (groupAggregateDimension) {
      // #todo: this is complex. allow only strings in this case
      const identifiers = flatten([mappingValues[groupAggregateDimension]]);

      const dataGroups = groupBy(data, (row) => {
        const labelPieces = identifiers.map((x) => get(row, x));
        return JSON.stringify(labelPieces);
      });

      tabularData = Object.keys(dataGroups).map((label) => {
        let item = {};
        const group = dataGroups[label];

        item[groupAggregateDimension] = JSON.parse(label);
        if (item[groupAggregateDimension].length === 1) {
          item[groupAggregateDimension] = item[groupAggregateDimension][0];
        }

        getDimensions.forEach((getter) => {
          const getterColumn = mappingValues[getter];
          //#GET HERE
          const getterFunction = arrayGetter(getterColumn);
          const allData = group.map((d) => getterFunction(d));
          const getterInAggregator = identifiers.indexOf(getterColumn) !== -1;
          const aggregator = get(
            mappingConfigs[getter],
            "aggregation",
            getterInAggregator ? (data) => data[0] : (data) => data.length
          );
          const aggregatorFunction =
            Array.isArray(getterColumn) && getterColumn.length > 1
              ? getAggregatorArray(aggregator, getterColumn.length)
              : getAggregator(aggregator);
          item[getter] = aggregatorFunction(allData);
        });
        if (groupDimension || groupsDimension) {
          if (Array.isArray(mappingValues[groupDimension])) {
            item[groupDimension] = mappingValues[groupDimension].map((v) =>
              get(group[0], v)
            );
          } else {
            item[groupDimension] = get(group[0], mappingValues[groupDimension]);
          }
        }

        return item;
      });
    } else {
      let getterFunctionsById = getDimensions.reduce((acc, id) => {
        acc[id] = arrayGetter(mappingValues[id]);
        return acc;
      }, {});

      let itemFiller = (row) => mapValues(getterFunctionsById, (f) => f(row));

      tabularData = data.map((row) => {
        let item = itemFiller(row);
        if (grouperDimension && mappingValues[grouperDimension]) {
          if (Array.isArray(mappingValues[grouperDimension])) {
            item[grouperDimension] = mappingValues[grouperDimension].map((v) =>
              get(row, v)
            );
          } else {
            item[grouperDimension] = get(row, mappingValues[grouperDimension]);
          }
        }
        // getter for rollup aggregation
        // notice that the name __leaf is used only internally.
        if (
          (rollupGrouperDimension && mappingConfigs[rollupGrouperDimension]) ||
          rollupLeafDimension
        ) {
          let rollupConfigAggregationTarget;
          if (rollupLeafDimension) {
            rollupConfigAggregationTarget = get(
              mappingValues,
              rollupLeafDimension
            );
          } else {
            rollupConfigAggregationTarget = get(
              mappingConfigs,
              `[${rollupGrouperDimension}].leafAggregation[1]`
            );
          }
          const getterFunction = arrayGetter(rollupConfigAggregationTarget);
          item["__leaf"] = getterFunction(row);
        }

        return item;
      });
    }

    //#TODO
    //apply hierarchy operation if any
    // if (hierarchyDimension) {
    //   // ...
    // }

    if (grouperDimension) {
      if (groupByDimension) {
        return groupByAsMap(tabularData, groupByDimension);
      }

      const grouperDims = Array.isArray(mappingValues[grouperDimension])
        ? mappingValues[grouperDimension]
        : [mappingValues[grouperDimension]];

      const grouperGetters = range(grouperDims.length).map((idx) => (item) =>
        item[grouperDimension][idx]
      );

      if (groupDimension) {
        return group(tabularData, ...grouperGetters);
      }
      if (groupsDimension) {
        return groups(tabularData, ...grouperGetters);
      }

      if (rollupGrouperDimension) {
        let rollupAggregation = (v) => v.length;
        let aggregatorFunction;

        if (rollupLeafDimension) {
          let [aggName, targetColumn] = [
            get(mappingConfigs, `[${rollupLeafDimension}].aggregation`),
            get(mappingValues, rollupLeafDimension),
          ];

          aggregatorFunction =
            Array.isArray(targetColumn) && targetColumn.length > 1
              ? getAggregatorArray(aggName, targetColumn.length)
              : getAggregator(aggName);
        } else {
          const rollupConfigAggregation = get(
            mappingConfigs,
            `[${rollupGrouperDimension}].leafAggregation`
          );

          if (rollupConfigAggregation) {
            if (
              !Array.isArray(rollupConfigAggregation) ||
              rollupConfigAggregation.length !== 2
            ) {
              throw new RAWError(
                "Rollup aggregation should be an array with aggregation function and target column"
              );
            }
            const [aggName, targetColumn] = rollupConfigAggregation;

            aggregatorFunction =
              Array.isArray(targetColumn) && targetColumn.length > 1
                ? getAggregatorArray(aggName, targetColumn.length)
                : getAggregator(aggName);
          }
        }

        if (aggregatorFunction) {
          const leafGetter = arrayGetter("__leaf");
          const wrappedAggregatorFunction = (items) => {
            return aggregatorFunction(items.map(leafGetter));
          };

          rollupAggregation = wrappedAggregatorFunction;
        }

        const finalRollupFunction = rollupDimension ? rollup : rollups;
        return finalRollupFunction(
          tabularData,
          rollupAggregation,
          ...grouperGetters
        );
      }
    }

    return tabularData;
  };
}
//#TODO: SHOULD NOT BE DEFAULT
export default makeMapper;