| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369 |
- // Libraries
- import Papa, { ParseResult, ParseConfig, Parser } from 'papaparse';
- import defaults from 'lodash/defaults';
- import isNumber from 'lodash/isNumber';
- // Types
- import { DataFrame, Field, FieldType } from '../types/index';
- import { guessFieldTypeFromValue } from './processDataFrame';
- export enum CSVHeaderStyle {
- full,
- name,
- none,
- }
- // Subset of all parse options
- export interface CSVConfig {
- delimiter?: string; // default: ","
- newline?: string; // default: "\r\n"
- quoteChar?: string; // default: '"'
- encoding?: string; // default: "",
- headerStyle?: CSVHeaderStyle;
- }
- export interface CSVParseCallbacks {
- /**
- * Get a callback before any rows are processed
- * This can return a modified table to force any
- * Column configurations
- */
- onHeader: (table: DataFrame) => void;
- // Called after each row is read and
- onRow: (row: any[]) => void;
- }
- export interface CSVOptions {
- config?: CSVConfig;
- callback?: CSVParseCallbacks;
- }
- export function readCSV(csv: string, options?: CSVOptions): DataFrame[] {
- return new CSVReader(options).readCSV(csv);
- }
- enum ParseState {
- Starting,
- InHeader,
- ReadingRows,
- }
- type FieldParser = (value: string) => any;
- export class CSVReader {
- config: CSVConfig;
- callback?: CSVParseCallbacks;
- field: FieldParser[];
- series: DataFrame;
- state: ParseState;
- data: DataFrame[];
- constructor(options?: CSVOptions) {
- if (!options) {
- options = {};
- }
- this.config = options.config || {};
- this.callback = options.callback;
- this.field = [];
- this.state = ParseState.Starting;
- this.series = {
- fields: [],
- rows: [],
- };
- this.data = [];
- }
- // PapaParse callback on each line
- private step = (results: ParseResult, parser: Parser): void => {
- for (let i = 0; i < results.data.length; i++) {
- const line: string[] = results.data[i];
- if (line.length < 1) {
- continue;
- }
- const first = line[0]; // null or value, papaparse does not return ''
- if (first) {
- // Comment or header queue
- if (first.startsWith('#')) {
- // Look for special header column
- // #{columkey}#a,b,c
- const idx = first.indexOf('#', 2);
- if (idx > 0) {
- const k = first.substr(1, idx - 1);
- // Simple object used to check if headers match
- const headerKeys: Field = {
- name: '#',
- type: FieldType.number,
- unit: '#',
- dateFormat: '#',
- };
- // Check if it is a known/supported column
- if (headerKeys.hasOwnProperty(k)) {
- // Starting a new table after reading rows
- if (this.state === ParseState.ReadingRows) {
- this.series = {
- fields: [],
- rows: [],
- };
- this.data.push(this.series);
- }
- padColumnWidth(this.series.fields, line.length);
- const fields: any[] = this.series.fields; // cast to any so we can lookup by key
- const v = first.substr(idx + 1);
- fields[0][k] = v;
- for (let j = 1; j < fields.length; j++) {
- fields[j][k] = line[j];
- }
- this.state = ParseState.InHeader;
- continue;
- }
- } else if (this.state === ParseState.Starting) {
- this.series.fields = makeFieldsFor(line);
- this.state = ParseState.InHeader;
- continue;
- }
- // Ignore comment lines
- continue;
- }
- if (this.state === ParseState.Starting) {
- const type = guessFieldTypeFromValue(first);
- if (type === FieldType.string) {
- this.series.fields = makeFieldsFor(line);
- this.state = ParseState.InHeader;
- continue;
- }
- this.series.fields = makeFieldsFor(new Array(line.length));
- this.series.fields[0].type = type;
- this.state = ParseState.InHeader; // fall through to read rows
- }
- }
- if (this.state === ParseState.InHeader) {
- padColumnWidth(this.series.fields, line.length);
- this.state = ParseState.ReadingRows;
- }
- if (this.state === ParseState.ReadingRows) {
- // Make sure colum structure is valid
- if (line.length > this.series.fields.length) {
- padColumnWidth(this.series.fields, line.length);
- if (this.callback) {
- this.callback.onHeader(this.series);
- } else {
- // Expand all rows with nulls
- for (let x = 0; x < this.series.rows.length; x++) {
- const row = this.series.rows[x];
- while (row.length < line.length) {
- row.push(null);
- }
- }
- }
- }
- const row: any[] = [];
- for (let j = 0; j < line.length; j++) {
- const v = line[j];
- if (v) {
- if (!this.field[j]) {
- this.field[j] = makeFieldParser(v, this.series.fields[j]);
- }
- row.push(this.field[j](v));
- } else {
- row.push(null);
- }
- }
- if (this.callback) {
- // Send the header after we guess the type
- if (this.series.rows.length === 0) {
- this.callback.onHeader(this.series);
- this.series.rows.push(row); // Only add the first row
- }
- this.callback.onRow(row);
- } else {
- this.series.rows.push(row);
- }
- }
- }
- };
- readCSV(text: string): DataFrame[] {
- this.data = [this.series];
- const papacfg = {
- ...this.config,
- dynamicTyping: false,
- skipEmptyLines: true,
- comments: false, // Keep comment lines
- step: this.step,
- } as ParseConfig;
- Papa.parse(text, papacfg);
- return this.data;
- }
- }
- function makeFieldParser(value: string, field: Field): FieldParser {
- if (!field.type) {
- if (field.name === 'time' || field.name === 'Time') {
- field.type = FieldType.time;
- } else {
- field.type = guessFieldTypeFromValue(value);
- }
- }
- if (field.type === FieldType.number) {
- return (value: string) => {
- return parseFloat(value);
- };
- }
- // Will convert anything that starts with "T" to true
- if (field.type === FieldType.boolean) {
- return (value: string) => {
- return !(value[0] === 'F' || value[0] === 'f' || value[0] === '0');
- };
- }
- // Just pass the string back
- return (value: string) => value;
- }
- /**
- * Creates a field object for each string in the list
- */
- function makeFieldsFor(line: string[]): Field[] {
- const fields: Field[] = [];
- for (let i = 0; i < line.length; i++) {
- const v = line[i] ? line[i] : 'Column ' + (i + 1);
- fields.push({ name: v });
- }
- return fields;
- }
- /**
- * Makes sure the colum has valid entries up the the width
- */
- function padColumnWidth(fields: Field[], width: number) {
- if (fields.length < width) {
- for (let i = fields.length; i < width; i++) {
- fields.push({
- name: 'Field ' + (i + 1),
- });
- }
- }
- }
- type FieldWriter = (value: any) => string;
- function writeValue(value: any, config: CSVConfig): string {
- const str = value.toString();
- if (str.includes('"')) {
- // Escape the double quote characters
- return config.quoteChar + str.replace(/"/gi, '""') + config.quoteChar;
- }
- if (str.includes('\n') || str.includes(config.delimiter)) {
- return config.quoteChar + str + config.quoteChar;
- }
- return str;
- }
- function makeFieldWriter(field: Field, config: CSVConfig): FieldWriter {
- if (field.type) {
- if (field.type === FieldType.boolean) {
- return (value: any) => {
- return value ? 'true' : 'false';
- };
- }
- if (field.type === FieldType.number) {
- return (value: any) => {
- if (isNumber(value)) {
- return value.toString();
- }
- return writeValue(value, config);
- };
- }
- }
- return (value: any) => writeValue(value, config);
- }
- function getHeaderLine(key: string, fields: Field[], config: CSVConfig): string {
- for (const f of fields) {
- if (f.hasOwnProperty(key)) {
- let line = '#' + key + '#';
- for (let i = 0; i < fields.length; i++) {
- if (i > 0) {
- line = line + config.delimiter;
- }
- const v = (fields[i] as any)[key];
- if (v) {
- line = line + writeValue(v, config);
- }
- }
- return line + config.newline;
- }
- }
- return '';
- }
- export function toCSV(data: DataFrame[], config?: CSVConfig): string {
- if (!data) {
- return '';
- }
- let csv = '';
- config = defaults(config, {
- delimiter: ',',
- newline: '\r\n',
- quoteChar: '"',
- encoding: '',
- headerStyle: CSVHeaderStyle.name,
- });
- for (const series of data) {
- const { rows, fields } = series;
- if (config.headerStyle === CSVHeaderStyle.full) {
- csv =
- csv +
- getHeaderLine('name', fields, config) +
- getHeaderLine('type', fields, config) +
- getHeaderLine('unit', fields, config) +
- getHeaderLine('dateFormat', fields, config);
- } else if (config.headerStyle === CSVHeaderStyle.name) {
- for (let i = 0; i < fields.length; i++) {
- if (i > 0) {
- csv += config.delimiter;
- }
- csv += fields[i].name;
- }
- csv += config.newline;
- }
- const writers = fields.map(field => makeFieldWriter(field, config!));
- for (let i = 0; i < rows.length; i++) {
- const row = rows[i];
- for (let j = 0; j < row.length; j++) {
- if (j > 0) {
- csv = csv + config.delimiter;
- }
- const v = row[j];
- if (v !== null) {
- csv = csv + writers[j](v);
- }
- }
- csv = csv + config.newline;
- }
- csv = csv + config.newline;
- }
- return csv;
- }
|