dataset.js

/**
 * dataset module.
 * @module dataset
 */

import isNumber from "lodash/isNumber";
import isBoolean from "lodash/isBoolean";
import isDate from "lodash/isDate";
import isPlainObject from "lodash/isPlainObject";
import isString from "lodash/isString";
import get from "lodash/get";
import isFunction from "lodash/isFunction";
import maxBy from "lodash/maxBy";
import dayjs from "dayjs";
import customParseFormat from "dayjs/plugin/customParseFormat";
import utc from "dayjs/plugin/utc";
import { RAWError } from './utils'

dayjs.extend(customParseFormat);

dayjs.extend(utc);

function getType(dataType) {
  if (isPlainObject(dataType)) {
    return getType(dataType.type);
  }

  if (isString(dataType)) {
    switch (dataType.toLowerCase()) {
      case "string":
        return String;
      case "number":
        return Number;
      case "boolean":
        return Boolean;
      case "date":
        return Date;

      default:
        return String;
    }
  }

  return dataType;
}

function getFormatter(dataType) {
  if (!isPlainObject(dataType)) {
    return undefined;
  }

  if (isFunction(dataType.decode)) {
    return dataType.decode;
  }

  if (getType(dataType) === Date) {
    if (isString(dataType.dateFormat)) {
      return (value) => dayjs(value, dataType.dateFormat).utc().toDate();
    }
  }

  if (dataType.type === Boolean) {
  }

  return undefined;
}

function getValueType(value, strict) {
  let jsonValue = value;
  if (!strict) {
    try {
      jsonValue = JSON.parse(value);
    } catch (err) {}
  }

  if (isNumber(jsonValue)) {
    return Number;
  }

  if (isBoolean(jsonValue)) {
    return Boolean;
  }

  if (isDate(value)) {
    return Date;
  }

  return String;
}

function castTypeToString(type) {
  return type.name ? type.name.toLowerCase() : type;
}

function castTypesToString(types) {
  return Object.keys(types).reduce((acc, item) => {
    acc[item] = castTypeToString(types[item]);
    return acc;
  }, {});
}

/**
 * Types guessing
 *
 * @param {array} data data to be parsed (list of objects)
 * @param {boolean} strict if strict is false, a JSON parsing of the values is tried. (if strict=false: "true" -> true)
 * @return {object} the types guessed (object with column names as keys and value type as value)
 */
export function inferTypes(data, strict) {
  let candidateTypes = {};
  if (!Array.isArray(data)) {
    return candidateTypes;
  }

  data.forEach((datum) => {
    Object.keys(datum).forEach((key) => {
      if (candidateTypes[key] === undefined) {
        candidateTypes[key] = [];
      }
      const inferredType = getValueType(datum[key], strict);
      candidateTypes[key].push(castTypeToString(inferredType));
    });
  });

  let inferredTypes = {};
  Object.keys(candidateTypes).map((k) => {
    let counts = {};
    candidateTypes[k].forEach((type) => {
      if (!counts[type]) {
        counts[type] = { count: 0, value: type };
      }
      counts[type].count += 1;
    });

    const mostFrequentTypeKey = maxBy(
      Object.keys(counts),
      (t) => counts[t].count
    );
    inferredTypes[k] = counts[mostFrequentTypeKey].value;
  });
  return inferredTypes;
}

function basicGetter(rowValue, dataType) {
  if (rowValue === null || rowValue === undefined) {
    return null;
  }
  return dataType(rowValue);
}


function checkType(value, type){
 
  if(type === Number && isNaN(value )){
    throw new RAWError(`invalid type number for value ${value}`)
  }

  if(type === Date && (!(value instanceof Date) || !dayjs(value).isValid())){
    throw new RAWError(`invalid type date for value ${value}`)
  }

}

// builds a parser function
function rowParser(types, onError) {
  let propGetters = {};

  Object.keys(types).forEach((k) => {
    let dataType = types[k];
    const type = getType(dataType);
    const formatter = getFormatter(dataType);
    propGetters[k] = (row) => {
      const rowValue = get(row, k);
      const formattedValue = formatter ? formatter(rowValue) : rowValue;
      const out = basicGetter(formattedValue, formatter ? (x) => x : type);
      checkType(out, type)
      return out
    };
  });

  return function (row, i) {
    const error = {};
    let out = {};
    Object.keys(propGetters).forEach((k) => {
      const getter = propGetters[k];
      try {
        out[k] = getter(row);
      } catch (err) {
        out[k] = null;
        error[k] = err;
      }
    });
    
    if (Object.keys(error).length) {
      onError && onError(error, i);
    }
    return out;
  };
}


function parseRows(data, dataTypes) {
  let errors = [];
  const parser = rowParser(dataTypes, (error, i) => errors.push({row: i, error}));
  const dataset = data.map(parser);
  return [dataset, errors];
}

/**
 * @typedef ParserResult
 * @global
 * @type {object}
 * @property {Array} dataset parsed dataset (list of objects)
 * @property {Object} dataTypes dataTypes used for parsing dataset 
 * @property {Array} errors list of errors from parsing
 */


/**
 * Dataset parser
 *
 * @param {array} data data to be parsed (list of objects)
 * @param {object} types optional column types
 * @return {ParserResult} dataset, dataTypes, errors
 */
export function parseDataset(data, types) {
  const dataTypes = types || inferTypes(data);
  const [dataset, errors] = parseRows(data, dataTypes);

  return {dataset, dataTypes, errors};
}