csv.ts 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. // Libraries
  2. import Papa, { ParseResult, ParseConfig, Parser } from 'papaparse';
  3. import defaults from 'lodash/defaults';
  4. import isNumber from 'lodash/isNumber';
  5. // Types
  6. import { DataFrame, Field, FieldType } from '../types/index';
  7. import { guessFieldTypeFromValue } from './processDataFrame';
  8. export enum CSVHeaderStyle {
  9. full,
  10. name,
  11. none,
  12. }
  13. // Subset of all parse options
  14. export interface CSVConfig {
  15. delimiter?: string; // default: ","
  16. newline?: string; // default: "\r\n"
  17. quoteChar?: string; // default: '"'
  18. encoding?: string; // default: "",
  19. headerStyle?: CSVHeaderStyle;
  20. }
  21. export interface CSVParseCallbacks {
  22. /**
  23. * Get a callback before any rows are processed
  24. * This can return a modified table to force any
  25. * Column configurations
  26. */
  27. onHeader: (table: DataFrame) => void;
  28. // Called after each row is read and
  29. onRow: (row: any[]) => void;
  30. }
  31. export interface CSVOptions {
  32. config?: CSVConfig;
  33. callback?: CSVParseCallbacks;
  34. }
  35. export function readCSV(csv: string, options?: CSVOptions): DataFrame[] {
  36. return new CSVReader(options).readCSV(csv);
  37. }
  38. enum ParseState {
  39. Starting,
  40. InHeader,
  41. ReadingRows,
  42. }
  43. type FieldParser = (value: string) => any;
  44. export class CSVReader {
  45. config: CSVConfig;
  46. callback?: CSVParseCallbacks;
  47. field: FieldParser[];
  48. series: DataFrame;
  49. state: ParseState;
  50. data: DataFrame[];
  51. constructor(options?: CSVOptions) {
  52. if (!options) {
  53. options = {};
  54. }
  55. this.config = options.config || {};
  56. this.callback = options.callback;
  57. this.field = [];
  58. this.state = ParseState.Starting;
  59. this.series = {
  60. fields: [],
  61. rows: [],
  62. };
  63. this.data = [];
  64. }
  65. // PapaParse callback on each line
  66. private step = (results: ParseResult, parser: Parser): void => {
  67. for (let i = 0; i < results.data.length; i++) {
  68. const line: string[] = results.data[i];
  69. if (line.length < 1) {
  70. continue;
  71. }
  72. const first = line[0]; // null or value, papaparse does not return ''
  73. if (first) {
  74. // Comment or header queue
  75. if (first.startsWith('#')) {
  76. // Look for special header column
  77. // #{columkey}#a,b,c
  78. const idx = first.indexOf('#', 2);
  79. if (idx > 0) {
  80. const k = first.substr(1, idx - 1);
  81. // Simple object used to check if headers match
  82. const headerKeys: Field = {
  83. name: '#',
  84. type: FieldType.number,
  85. unit: '#',
  86. dateFormat: '#',
  87. };
  88. // Check if it is a known/supported column
  89. if (headerKeys.hasOwnProperty(k)) {
  90. // Starting a new table after reading rows
  91. if (this.state === ParseState.ReadingRows) {
  92. this.series = {
  93. fields: [],
  94. rows: [],
  95. };
  96. this.data.push(this.series);
  97. }
  98. padColumnWidth(this.series.fields, line.length);
  99. const fields: any[] = this.series.fields; // cast to any so we can lookup by key
  100. const v = first.substr(idx + 1);
  101. fields[0][k] = v;
  102. for (let j = 1; j < fields.length; j++) {
  103. fields[j][k] = line[j];
  104. }
  105. this.state = ParseState.InHeader;
  106. continue;
  107. }
  108. } else if (this.state === ParseState.Starting) {
  109. this.series.fields = makeFieldsFor(line);
  110. this.state = ParseState.InHeader;
  111. continue;
  112. }
  113. // Ignore comment lines
  114. continue;
  115. }
  116. if (this.state === ParseState.Starting) {
  117. const type = guessFieldTypeFromValue(first);
  118. if (type === FieldType.string) {
  119. this.series.fields = makeFieldsFor(line);
  120. this.state = ParseState.InHeader;
  121. continue;
  122. }
  123. this.series.fields = makeFieldsFor(new Array(line.length));
  124. this.series.fields[0].type = type;
  125. this.state = ParseState.InHeader; // fall through to read rows
  126. }
  127. }
  128. if (this.state === ParseState.InHeader) {
  129. padColumnWidth(this.series.fields, line.length);
  130. this.state = ParseState.ReadingRows;
  131. }
  132. if (this.state === ParseState.ReadingRows) {
  133. // Make sure colum structure is valid
  134. if (line.length > this.series.fields.length) {
  135. padColumnWidth(this.series.fields, line.length);
  136. if (this.callback) {
  137. this.callback.onHeader(this.series);
  138. } else {
  139. // Expand all rows with nulls
  140. for (let x = 0; x < this.series.rows.length; x++) {
  141. const row = this.series.rows[x];
  142. while (row.length < line.length) {
  143. row.push(null);
  144. }
  145. }
  146. }
  147. }
  148. const row: any[] = [];
  149. for (let j = 0; j < line.length; j++) {
  150. const v = line[j];
  151. if (v) {
  152. if (!this.field[j]) {
  153. this.field[j] = makeFieldParser(v, this.series.fields[j]);
  154. }
  155. row.push(this.field[j](v));
  156. } else {
  157. row.push(null);
  158. }
  159. }
  160. if (this.callback) {
  161. // Send the header after we guess the type
  162. if (this.series.rows.length === 0) {
  163. this.callback.onHeader(this.series);
  164. this.series.rows.push(row); // Only add the first row
  165. }
  166. this.callback.onRow(row);
  167. } else {
  168. this.series.rows.push(row);
  169. }
  170. }
  171. }
  172. };
  173. readCSV(text: string): DataFrame[] {
  174. this.data = [this.series];
  175. const papacfg = {
  176. ...this.config,
  177. dynamicTyping: false,
  178. skipEmptyLines: true,
  179. comments: false, // Keep comment lines
  180. step: this.step,
  181. } as ParseConfig;
  182. Papa.parse(text, papacfg);
  183. return this.data;
  184. }
  185. }
  186. function makeFieldParser(value: string, field: Field): FieldParser {
  187. if (!field.type) {
  188. if (field.name === 'time' || field.name === 'Time') {
  189. field.type = FieldType.time;
  190. } else {
  191. field.type = guessFieldTypeFromValue(value);
  192. }
  193. }
  194. if (field.type === FieldType.number) {
  195. return (value: string) => {
  196. return parseFloat(value);
  197. };
  198. }
  199. // Will convert anything that starts with "T" to true
  200. if (field.type === FieldType.boolean) {
  201. return (value: string) => {
  202. return !(value[0] === 'F' || value[0] === 'f' || value[0] === '0');
  203. };
  204. }
  205. // Just pass the string back
  206. return (value: string) => value;
  207. }
  208. /**
  209. * Creates a field object for each string in the list
  210. */
  211. function makeFieldsFor(line: string[]): Field[] {
  212. const fields: Field[] = [];
  213. for (let i = 0; i < line.length; i++) {
  214. const v = line[i] ? line[i] : 'Column ' + (i + 1);
  215. fields.push({ name: v });
  216. }
  217. return fields;
  218. }
  219. /**
  220. * Makes sure the colum has valid entries up the the width
  221. */
  222. function padColumnWidth(fields: Field[], width: number) {
  223. if (fields.length < width) {
  224. for (let i = fields.length; i < width; i++) {
  225. fields.push({
  226. name: 'Field ' + (i + 1),
  227. });
  228. }
  229. }
  230. }
  231. type FieldWriter = (value: any) => string;
  232. function writeValue(value: any, config: CSVConfig): string {
  233. const str = value.toString();
  234. if (str.includes('"')) {
  235. // Escape the double quote characters
  236. return config.quoteChar + str.replace(/"/gi, '""') + config.quoteChar;
  237. }
  238. if (str.includes('\n') || str.includes(config.delimiter)) {
  239. return config.quoteChar + str + config.quoteChar;
  240. }
  241. return str;
  242. }
  243. function makeFieldWriter(field: Field, config: CSVConfig): FieldWriter {
  244. if (field.type) {
  245. if (field.type === FieldType.boolean) {
  246. return (value: any) => {
  247. return value ? 'true' : 'false';
  248. };
  249. }
  250. if (field.type === FieldType.number) {
  251. return (value: any) => {
  252. if (isNumber(value)) {
  253. return value.toString();
  254. }
  255. return writeValue(value, config);
  256. };
  257. }
  258. }
  259. return (value: any) => writeValue(value, config);
  260. }
  261. function getHeaderLine(key: string, fields: Field[], config: CSVConfig): string {
  262. for (const f of fields) {
  263. if (f.hasOwnProperty(key)) {
  264. let line = '#' + key + '#';
  265. for (let i = 0; i < fields.length; i++) {
  266. if (i > 0) {
  267. line = line + config.delimiter;
  268. }
  269. const v = (fields[i] as any)[key];
  270. if (v) {
  271. line = line + writeValue(v, config);
  272. }
  273. }
  274. return line + config.newline;
  275. }
  276. }
  277. return '';
  278. }
  279. export function toCSV(data: DataFrame[], config?: CSVConfig): string {
  280. if (!data) {
  281. return '';
  282. }
  283. let csv = '';
  284. config = defaults(config, {
  285. delimiter: ',',
  286. newline: '\r\n',
  287. quoteChar: '"',
  288. encoding: '',
  289. headerStyle: CSVHeaderStyle.name,
  290. });
  291. for (const series of data) {
  292. const { rows, fields } = series;
  293. if (config.headerStyle === CSVHeaderStyle.full) {
  294. csv =
  295. csv +
  296. getHeaderLine('name', fields, config) +
  297. getHeaderLine('type', fields, config) +
  298. getHeaderLine('unit', fields, config) +
  299. getHeaderLine('dateFormat', fields, config);
  300. } else if (config.headerStyle === CSVHeaderStyle.name) {
  301. for (let i = 0; i < fields.length; i++) {
  302. if (i > 0) {
  303. csv += config.delimiter;
  304. }
  305. csv += fields[i].name;
  306. }
  307. csv += config.newline;
  308. }
  309. const writers = fields.map(field => makeFieldWriter(field, config!));
  310. for (let i = 0; i < rows.length; i++) {
  311. const row = rows[i];
  312. for (let j = 0; j < row.length; j++) {
  313. if (j > 0) {
  314. csv = csv + config.delimiter;
  315. }
  316. const v = row[j];
  317. if (v !== null) {
  318. csv = csv + writers[j](v);
  319. }
  320. }
  321. csv = csv + config.newline;
  322. }
  323. csv = csv + config.newline;
  324. }
  325. return csv;
  326. }