papaparse.js 53 KB


  1. /* @license
  2. Papa Parse
  3. v5.5.2
  4. https://github.com/mholt/PapaParse
  5. License: MIT
  6. */
  7. (function(root, factory)
  8. {
  9. /* globals define */
  10. if (typeof define === 'function' && define.amd)
  11. {
  12. // AMD. Register as an anonymous module.
  13. define([], factory);
  14. }
  15. else if (typeof module === 'object' && typeof exports !== 'undefined')
  16. {
  17. // Node. Does not work with strict CommonJS, but
  18. // only CommonJS-like environments that support module.exports,
  19. // like Node.
  20. module.exports = factory();
  21. }
  22. else
  23. {
  24. // Browser globals (root is window)
  25. root.Papa = factory();
  26. }
  27. // in strict mode we cannot access arguments.callee, so we need a named reference to
  28. // stringify the factory method for the blob worker
  29. // eslint-disable-next-line func-name
  30. }(this, function moduleFactory()
  31. {
  32. 'use strict';
  33. var global = (function() {
  34. // alternative method, similar to `Function('return this')()`
  35. // but without using `eval` (which is disabled when
  36. // using Content Security Policy).
  37. if (typeof self !== 'undefined') { return self; }
  38. if (typeof window !== 'undefined') { return window; }
  39. if (typeof global !== 'undefined') { return global; }
  40. // When running tests none of the above have been defined
  41. return {};
  42. })();
  43. function getWorkerBlob() {
  44. var URL = global.URL || global.webkitURL || null;
  45. var code = moduleFactory.toString();
  46. return Papa.BLOB_URL || (Papa.BLOB_URL = URL.createObjectURL(new Blob(["var global = (function() { if (typeof self !== 'undefined') { return self; } if (typeof window !== 'undefined') { return window; } if (typeof global !== 'undefined') { return global; } return {}; })(); global.IS_PAPA_WORKER=true; ", '(', code, ')();'], {type: 'text/javascript'})));
  47. }
  48. var IS_WORKER = !global.document && !!global.postMessage,
  49. IS_PAPA_WORKER = global.IS_PAPA_WORKER || false;
  50. var workers = {}, workerIdCounter = 0;
  51. var Papa = {};
  52. Papa.parse = CsvToJson;
  53. Papa.unparse = JsonToCsv;
  54. Papa.RECORD_SEP = String.fromCharCode(30);
  55. Papa.UNIT_SEP = String.fromCharCode(31);
  56. Papa.BYTE_ORDER_MARK = '\ufeff';
  57. Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
  58. Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker;
  59. Papa.NODE_STREAM_INPUT = 1;
  60. // Configurable chunk sizes for local and remote files, respectively
  61. Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB
  62. Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB
  63. Papa.DefaultDelimiter = ','; // Used if not specified and detection fails
  64. // Exposed for testing and development only
  65. Papa.Parser = Parser;
  66. Papa.ParserHandle = ParserHandle;
  67. Papa.NetworkStreamer = NetworkStreamer;
  68. Papa.FileStreamer = FileStreamer;
  69. Papa.StringStreamer = StringStreamer;
  70. Papa.ReadableStreamStreamer = ReadableStreamStreamer;
  71. if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
  72. Papa.DuplexStreamStreamer = DuplexStreamStreamer;
  73. }
  74. if (global.jQuery)
  75. {
  76. var $ = global.jQuery;
  77. $.fn.parse = function(options)
  78. {
  79. var config = options.config || {};
  80. var queue = [];
  81. this.each(function(idx)
  82. {
  83. var supported = $(this).prop('tagName').toUpperCase() === 'INPUT'
  84. && $(this).attr('type').toLowerCase() === 'file'
  85. && global.FileReader;
  86. if (!supported || !this.files || this.files.length === 0)
  87. return true; // continue to next input element
  88. for (var i = 0; i < this.files.length; i++)
  89. {
  90. queue.push({
  91. file: this.files[i],
  92. inputElem: this,
  93. instanceConfig: $.extend({}, config)
  94. });
  95. }
  96. });
  97. parseNextFile(); // begin parsing
  98. return this; // maintains chainability
  99. function parseNextFile()
  100. {
  101. if (queue.length === 0)
  102. {
  103. if (isFunction(options.complete))
  104. options.complete();
  105. return;
  106. }
  107. var f = queue[0];
  108. if (isFunction(options.before))
  109. {
  110. var returned = options.before(f.file, f.inputElem);
  111. if (typeof returned === 'object')
  112. {
  113. if (returned.action === 'abort')
  114. {
  115. error('AbortError', f.file, f.inputElem, returned.reason);
  116. return; // Aborts all queued files immediately
  117. }
  118. else if (returned.action === 'skip')
  119. {
  120. fileComplete(); // parse the next file in the queue, if any
  121. return;
  122. }
  123. else if (typeof returned.config === 'object')
  124. f.instanceConfig = $.extend(f.instanceConfig, returned.config);
  125. }
  126. else if (returned === 'skip')
  127. {
  128. fileComplete(); // parse the next file in the queue, if any
  129. return;
  130. }
  131. }
  132. // Wrap up the user's complete callback, if any, so that ours also gets executed
  133. var userCompleteFunc = f.instanceConfig.complete;
  134. f.instanceConfig.complete = function(results)
  135. {
  136. if (isFunction(userCompleteFunc))
  137. userCompleteFunc(results, f.file, f.inputElem);
  138. fileComplete();
  139. };
  140. Papa.parse(f.file, f.instanceConfig);
  141. }
  142. function error(name, file, elem, reason)
  143. {
  144. if (isFunction(options.error))
  145. options.error({name: name}, file, elem, reason);
  146. }
  147. function fileComplete()
  148. {
  149. queue.splice(0, 1);
  150. parseNextFile();
  151. }
  152. };
  153. }
  154. if (IS_PAPA_WORKER)
  155. {
  156. global.onmessage = workerThreadReceivedMessage;
  157. }
  158. function CsvToJson(_input, _config)
  159. {
  160. _config = _config || {};
  161. var dynamicTyping = _config.dynamicTyping || false;
  162. if (isFunction(dynamicTyping)) {
  163. _config.dynamicTypingFunction = dynamicTyping;
  164. // Will be filled on first row call
  165. dynamicTyping = {};
  166. }
  167. _config.dynamicTyping = dynamicTyping;
  168. _config.transform = isFunction(_config.transform) ? _config.transform : false;
  169. if (_config.worker && Papa.WORKERS_SUPPORTED)
  170. {
  171. var w = newWorker();
  172. w.userStep = _config.step;
  173. w.userChunk = _config.chunk;
  174. w.userComplete = _config.complete;
  175. w.userError = _config.error;
  176. _config.step = isFunction(_config.step);
  177. _config.chunk = isFunction(_config.chunk);
  178. _config.complete = isFunction(_config.complete);
  179. _config.error = isFunction(_config.error);
  180. delete _config.worker; // prevent infinite loop
  181. w.postMessage({
  182. input: _input,
  183. config: _config,
  184. workerId: w.id
  185. });
  186. return;
  187. }
  188. var streamer = null;
  189. if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined')
  190. {
  191. // create a node Duplex stream for use
  192. // with .pipe
  193. streamer = new DuplexStreamStreamer(_config);
  194. return streamer.getStream();
  195. }
  196. else if (typeof _input === 'string')
  197. {
  198. _input = stripBom(_input);
  199. if (_config.download)
  200. streamer = new NetworkStreamer(_config);
  201. else
  202. streamer = new StringStreamer(_config);
  203. }
  204. else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on))
  205. {
  206. streamer = new ReadableStreamStreamer(_config);
  207. }
  208. else if ((global.File && _input instanceof File) || _input instanceof Object) // ...Safari. (see issue #106)
  209. streamer = new FileStreamer(_config);
  210. return streamer.stream(_input);
  211. // Strip character from UTF-8 BOM encoded files that cause issue parsing the file
  212. function stripBom(string) {
  213. if (string.charCodeAt(0) === 0xfeff) {
  214. return string.slice(1);
  215. }
  216. return string;
  217. }
  218. }
  219. function JsonToCsv(_input, _config)
  220. {
  221. // Default configuration
  222. /** whether to surround every datum with quotes */
  223. var _quotes = false;
  224. /** whether to write headers */
  225. var _writeHeader = true;
  226. /** delimiting character(s) */
  227. var _delimiter = ',';
  228. /** newline character(s) */
  229. var _newline = '\r\n';
  230. /** quote character */
  231. var _quoteChar = '"';
  232. /** escaped quote character, either "" or <config.escapeChar>" */
  233. var _escapedQuote = _quoteChar + _quoteChar;
  234. /** whether to skip empty lines */
  235. var _skipEmptyLines = false;
  236. /** the columns (keys) we expect when we unparse objects */
  237. var _columns = null;
  238. /** whether to prevent outputting cells that can be parsed as formulae by spreadsheet software (Excel and LibreOffice) */
  239. var _escapeFormulae = false;
  240. unpackConfig();
  241. var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
  242. if (typeof _input === 'string')
  243. _input = JSON.parse(_input);
  244. if (Array.isArray(_input))
  245. {
  246. if (!_input.length || Array.isArray(_input[0]))
  247. return serialize(null, _input, _skipEmptyLines);
  248. else if (typeof _input[0] === 'object')
  249. return serialize(_columns || Object.keys(_input[0]), _input, _skipEmptyLines);
  250. }
  251. else if (typeof _input === 'object')
  252. {
  253. if (typeof _input.data === 'string')
  254. _input.data = JSON.parse(_input.data);
  255. if (Array.isArray(_input.data))
  256. {
  257. if (!_input.fields)
  258. _input.fields = _input.meta && _input.meta.fields || _columns;
  259. if (!_input.fields)
  260. _input.fields = Array.isArray(_input.data[0])
  261. ? _input.fields
  262. : typeof _input.data[0] === 'object'
  263. ? Object.keys(_input.data[0])
  264. : [];
  265. if (!(Array.isArray(_input.data[0])) && typeof _input.data[0] !== 'object')
  266. _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
  267. }
  268. return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
  269. }
  270. // Default (any valid paths should return before this)
  271. throw new Error('Unable to serialize unrecognized input');
  272. function unpackConfig()
  273. {
  274. if (typeof _config !== 'object')
  275. return;
  276. if (typeof _config.delimiter === 'string'
  277. && !Papa.BAD_DELIMITERS.filter(function(value) { return _config.delimiter.indexOf(value) !== -1; }).length)
  278. {
  279. _delimiter = _config.delimiter;
  280. }
  281. if (typeof _config.quotes === 'boolean'
  282. || typeof _config.quotes === 'function'
  283. || Array.isArray(_config.quotes))
  284. _quotes = _config.quotes;
  285. if (typeof _config.skipEmptyLines === 'boolean'
  286. || typeof _config.skipEmptyLines === 'string')
  287. _skipEmptyLines = _config.skipEmptyLines;
  288. if (typeof _config.newline === 'string')
  289. _newline = _config.newline;
  290. if (typeof _config.quoteChar === 'string')
  291. _quoteChar = _config.quoteChar;
  292. if (typeof _config.header === 'boolean')
  293. _writeHeader = _config.header;
  294. if (Array.isArray(_config.columns)) {
  295. if (_config.columns.length === 0) throw new Error('Option columns is empty');
  296. _columns = _config.columns;
  297. }
  298. if (_config.escapeChar !== undefined) {
  299. _escapedQuote = _config.escapeChar + _quoteChar;
  300. }
  301. if (_config.escapeFormulae instanceof RegExp) {
  302. _escapeFormulae = _config.escapeFormulae;
  303. } else if (typeof _config.escapeFormulae === 'boolean' && _config.escapeFormulae) {
  304. _escapeFormulae = /^[=+\-@\t\r].*$/;
  305. }
  306. }
  307. /** The double for loop that iterates the data and writes out a CSV string including header row */
  308. function serialize(fields, data, skipEmptyLines)
  309. {
  310. var csv = '';
  311. if (typeof fields === 'string')
  312. fields = JSON.parse(fields);
  313. if (typeof data === 'string')
  314. data = JSON.parse(data);
  315. var hasHeader = Array.isArray(fields) && fields.length > 0;
  316. var dataKeyedByField = !(Array.isArray(data[0]));
  317. // If there a header row, write it first
  318. if (hasHeader && _writeHeader)
  319. {
  320. for (var i = 0; i < fields.length; i++)
  321. {
  322. if (i > 0)
  323. csv += _delimiter;
  324. csv += safe(fields[i], i);
  325. }
  326. if (data.length > 0)
  327. csv += _newline;
  328. }
  329. // Then write out the data
  330. for (var row = 0; row < data.length; row++)
  331. {
  332. var maxCol = hasHeader ? fields.length : data[row].length;
  333. var emptyLine = false;
  334. var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
  335. if (skipEmptyLines && !hasHeader)
  336. {
  337. emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0;
  338. }
  339. if (skipEmptyLines === 'greedy' && hasHeader) {
  340. var line = [];
  341. for (var c = 0; c < maxCol; c++) {
  342. var cx = dataKeyedByField ? fields[c] : c;
  343. line.push(data[row][cx]);
  344. }
  345. emptyLine = line.join('').trim() === '';
  346. }
  347. if (!emptyLine)
  348. {
  349. for (var col = 0; col < maxCol; col++)
  350. {
  351. if (col > 0 && !nullLine)
  352. csv += _delimiter;
  353. var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
  354. csv += safe(data[row][colIdx], col);
  355. }
  356. if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine)))
  357. {
  358. csv += _newline;
  359. }
  360. }
  361. }
  362. return csv;
  363. }
  364. /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
  365. function safe(str, col)
  366. {
  367. if (typeof str === 'undefined' || str === null)
  368. return '';
  369. if (str.constructor === Date)
  370. return JSON.stringify(str).slice(1, 25);
  371. var needsQuotes = false;
  372. if (_escapeFormulae && typeof str === "string" && _escapeFormulae.test(str)) {
  373. str = "'" + str;
  374. needsQuotes = true;
  375. }
  376. var escapedQuoteStr = str.toString().replace(quoteCharRegex, _escapedQuote);
  377. needsQuotes = needsQuotes
  378. || _quotes === true
  379. || (typeof _quotes === 'function' && _quotes(str, col))
  380. || (Array.isArray(_quotes) && _quotes[col])
  381. || hasAny(escapedQuoteStr, Papa.BAD_DELIMITERS)
  382. || escapedQuoteStr.indexOf(_delimiter) > -1
  383. || escapedQuoteStr.charAt(0) === ' '
  384. || escapedQuoteStr.charAt(escapedQuoteStr.length - 1) === ' ';
  385. return needsQuotes ? _quoteChar + escapedQuoteStr + _quoteChar : escapedQuoteStr;
  386. }
  387. function hasAny(str, substrings)
  388. {
  389. for (var i = 0; i < substrings.length; i++)
  390. if (str.indexOf(substrings[i]) > -1)
  391. return true;
  392. return false;
  393. }
  394. }
  395. /** ChunkStreamer is the base prototype for various streamer implementations. */
  396. function ChunkStreamer(config)
  397. {
  398. this._handle = null;
  399. this._finished = false;
  400. this._completed = false;
  401. this._halted = false;
  402. this._input = null;
  403. this._baseIndex = 0;
  404. this._partialLine = '';
  405. this._rowCount = 0;
  406. this._start = 0;
  407. this._nextChunk = null;
  408. this.isFirstChunk = true;
  409. this._completeResults = {
  410. data: [],
  411. errors: [],
  412. meta: {}
  413. };
  414. replaceConfig.call(this, config);
  415. this.parseChunk = function(chunk, isFakeChunk)
  416. {
  417. // First chunk pre-processing
  418. const skipFirstNLines = parseInt(this._config.skipFirstNLines) || 0;
  419. if (this.isFirstChunk && skipFirstNLines > 0) {
  420. let _newline = this._config.newline;
  421. if (!_newline) {
  422. const quoteChar = this._config.quoteChar || '"';
  423. _newline = this._handle.guessLineEndings(chunk, quoteChar);
  424. }
  425. const splitChunk = chunk.split(_newline);
  426. chunk = [...splitChunk.slice(skipFirstNLines)].join(_newline);
  427. }
  428. if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk))
  429. {
  430. var modifiedChunk = this._config.beforeFirstChunk(chunk);
  431. if (modifiedChunk !== undefined)
  432. chunk = modifiedChunk;
  433. }
  434. this.isFirstChunk = false;
  435. this._halted = false;
  436. // Rejoin the line we likely just split in two by chunking the file
  437. var aggregate = this._partialLine + chunk;
  438. this._partialLine = '';
  439. var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
  440. if (this._handle.paused() || this._handle.aborted()) {
  441. this._halted = true;
  442. return;
  443. }
  444. var lastIndex = results.meta.cursor;
  445. if (!this._finished)
  446. {
  447. this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
  448. this._baseIndex = lastIndex;
  449. }
  450. if (results && results.data)
  451. this._rowCount += results.data.length;
  452. var finishedIncludingPreview = this._finished || (this._config.preview && this._rowCount >= this._config.preview);
  453. if (IS_PAPA_WORKER)
  454. {
  455. global.postMessage({
  456. results: results,
  457. workerId: Papa.WORKER_ID,
  458. finished: finishedIncludingPreview
  459. });
  460. }
  461. else if (isFunction(this._config.chunk) && !isFakeChunk)
  462. {
  463. this._config.chunk(results, this._handle);
  464. if (this._handle.paused() || this._handle.aborted()) {
  465. this._halted = true;
  466. return;
  467. }
  468. results = undefined;
  469. this._completeResults = undefined;
  470. }
  471. if (!this._config.step && !this._config.chunk) {
  472. this._completeResults.data = this._completeResults.data.concat(results.data);
  473. this._completeResults.errors = this._completeResults.errors.concat(results.errors);
  474. this._completeResults.meta = results.meta;
  475. }
  476. if (!this._completed && finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) {
  477. this._config.complete(this._completeResults, this._input);
  478. this._completed = true;
  479. }
  480. if (!finishedIncludingPreview && (!results || !results.meta.paused))
  481. this._nextChunk();
  482. return results;
  483. };
  484. this._sendError = function(error)
  485. {
  486. if (isFunction(this._config.error))
  487. this._config.error(error);
  488. else if (IS_PAPA_WORKER && this._config.error)
  489. {
  490. global.postMessage({
  491. workerId: Papa.WORKER_ID,
  492. error: error,
  493. finished: false
  494. });
  495. }
  496. };
  497. function replaceConfig(config)
  498. {
  499. // Deep-copy the config so we can edit it
  500. var configCopy = copy(config);
  501. configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
  502. if (!config.step && !config.chunk)
  503. configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
  504. this._handle = new ParserHandle(configCopy);
  505. this._handle.streamer = this;
  506. this._config = configCopy; // persist the copy to the caller
  507. }
  508. }
  509. function NetworkStreamer(config)
  510. {
  511. config = config || {};
  512. if (!config.chunkSize)
  513. config.chunkSize = Papa.RemoteChunkSize;
  514. ChunkStreamer.call(this, config);
  515. var xhr;
  516. if (IS_WORKER)
  517. {
  518. this._nextChunk = function()
  519. {
  520. this._readChunk();
  521. this._chunkLoaded();
  522. };
  523. }
  524. else
  525. {
  526. this._nextChunk = function()
  527. {
  528. this._readChunk();
  529. };
  530. }
  531. this.stream = function(url)
  532. {
  533. this._input = url;
  534. this._nextChunk(); // Starts streaming
  535. };
  536. this._readChunk = function()
  537. {
  538. if (this._finished)
  539. {
  540. this._chunkLoaded();
  541. return;
  542. }
  543. xhr = new XMLHttpRequest();
  544. if (this._config.withCredentials)
  545. {
  546. xhr.withCredentials = this._config.withCredentials;
  547. }
  548. if (!IS_WORKER)
  549. {
  550. xhr.onload = bindFunction(this._chunkLoaded, this);
  551. xhr.onerror = bindFunction(this._chunkError, this);
  552. }
  553. xhr.open(this._config.downloadRequestBody ? 'POST' : 'GET', this._input, !IS_WORKER);
  554. // Headers can only be set when once the request state is OPENED
  555. if (this._config.downloadRequestHeaders)
  556. {
  557. var headers = this._config.downloadRequestHeaders;
  558. for (var headerName in headers)
  559. {
  560. xhr.setRequestHeader(headerName, headers[headerName]);
  561. }
  562. }
  563. if (this._config.chunkSize)
  564. {
  565. var end = this._start + this._config.chunkSize - 1; // minus one because byte range is inclusive
  566. xhr.setRequestHeader('Range', 'bytes=' + this._start + '-' + end);
  567. }
  568. try {
  569. xhr.send(this._config.downloadRequestBody);
  570. }
  571. catch (err) {
  572. this._chunkError(err.message);
  573. }
  574. if (IS_WORKER && xhr.status === 0)
  575. this._chunkError();
  576. };
  577. this._chunkLoaded = function()
  578. {
  579. if (xhr.readyState !== 4)
  580. return;
  581. if (xhr.status < 200 || xhr.status >= 400)
  582. {
  583. this._chunkError();
  584. return;
  585. }
  586. // Use chunckSize as it may be a diference on reponse lentgh due to characters with more than 1 byte
  587. this._start += this._config.chunkSize ? this._config.chunkSize : xhr.responseText.length;
  588. this._finished = !this._config.chunkSize || this._start >= getFileSize(xhr);
  589. this.parseChunk(xhr.responseText);
  590. };
  591. this._chunkError = function(errorMessage)
  592. {
  593. var errorText = xhr.statusText || errorMessage;
  594. this._sendError(new Error(errorText));
  595. };
  596. function getFileSize(xhr)
  597. {
  598. var contentRange = xhr.getResponseHeader('Content-Range');
  599. if (contentRange === null) { // no content range, then finish!
  600. return -1;
  601. }
  602. return parseInt(contentRange.substring(contentRange.lastIndexOf('/') + 1));
  603. }
  604. }
  605. NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype);
  606. NetworkStreamer.prototype.constructor = NetworkStreamer;
  607. function FileStreamer(config)
  608. {
  609. config = config || {};
  610. if (!config.chunkSize)
  611. config.chunkSize = Papa.LocalChunkSize;
  612. ChunkStreamer.call(this, config);
  613. var reader, slice;
  614. // FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862
  615. // But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76
  616. var usingAsyncReader = typeof FileReader !== 'undefined'; // Safari doesn't consider it a function - see issue #105
  617. this.stream = function(file)
  618. {
  619. this._input = file;
  620. slice = file.slice || file.webkitSlice || file.mozSlice;
  621. if (usingAsyncReader)
  622. {
  623. reader = new FileReader(); // Preferred method of reading files, even in workers
  624. reader.onload = bindFunction(this._chunkLoaded, this);
  625. reader.onerror = bindFunction(this._chunkError, this);
  626. }
  627. else
  628. reader = new FileReaderSync(); // Hack for running in a web worker in Firefox
  629. this._nextChunk(); // Starts streaming
  630. };
  631. this._nextChunk = function()
  632. {
  633. if (!this._finished && (!this._config.preview || this._rowCount < this._config.preview))
  634. this._readChunk();
  635. };
  636. this._readChunk = function()
  637. {
  638. var input = this._input;
  639. if (this._config.chunkSize)
  640. {
  641. var end = Math.min(this._start + this._config.chunkSize, this._input.size);
  642. input = slice.call(input, this._start, end);
  643. }
  644. var txt = reader.readAsText(input, this._config.encoding);
  645. if (!usingAsyncReader)
  646. this._chunkLoaded({ target: { result: txt } }); // mimic the async signature
  647. };
  648. this._chunkLoaded = function(event)
  649. {
  650. // Very important to increment start each time before handling results
  651. this._start += this._config.chunkSize;
  652. this._finished = !this._config.chunkSize || this._start >= this._input.size;
  653. this.parseChunk(event.target.result);
  654. };
  655. this._chunkError = function()
  656. {
  657. this._sendError(reader.error);
  658. };
  659. }
  660. FileStreamer.prototype = Object.create(ChunkStreamer.prototype);
  661. FileStreamer.prototype.constructor = FileStreamer;
  662. function StringStreamer(config)
  663. {
  664. config = config || {};
  665. ChunkStreamer.call(this, config);
  666. var remaining;
  667. this.stream = function(s)
  668. {
  669. remaining = s;
  670. return this._nextChunk();
  671. };
  672. this._nextChunk = function()
  673. {
  674. if (this._finished) return;
  675. var size = this._config.chunkSize;
  676. var chunk;
  677. if(size) {
  678. chunk = remaining.substring(0, size);
  679. remaining = remaining.substring(size);
  680. } else {
  681. chunk = remaining;
  682. remaining = '';
  683. }
  684. this._finished = !remaining;
  685. return this.parseChunk(chunk);
  686. };
  687. }
  688. StringStreamer.prototype = Object.create(StringStreamer.prototype);
  689. StringStreamer.prototype.constructor = StringStreamer;
  690. function ReadableStreamStreamer(config)
  691. {
  692. config = config || {};
  693. ChunkStreamer.call(this, config);
  694. var queue = [];
  695. var parseOnData = true;
  696. var streamHasEnded = false;
  697. this.pause = function()
  698. {
  699. ChunkStreamer.prototype.pause.apply(this, arguments);
  700. this._input.pause();
  701. };
  702. this.resume = function()
  703. {
  704. ChunkStreamer.prototype.resume.apply(this, arguments);
  705. this._input.resume();
  706. };
  707. this.stream = function(stream)
  708. {
  709. this._input = stream;
  710. this._input.on('data', this._streamData);
  711. this._input.on('end', this._streamEnd);
  712. this._input.on('error', this._streamError);
  713. };
  714. this._checkIsFinished = function()
  715. {
  716. if (streamHasEnded && queue.length === 1) {
  717. this._finished = true;
  718. }
  719. };
  720. this._nextChunk = function()
  721. {
  722. this._checkIsFinished();
  723. if (queue.length)
  724. {
  725. this.parseChunk(queue.shift());
  726. }
  727. else
  728. {
  729. parseOnData = true;
  730. }
  731. };
  732. this._streamData = bindFunction(function(chunk)
  733. {
  734. try
  735. {
  736. queue.push(typeof chunk === 'string' ? chunk : chunk.toString(this._config.encoding));
  737. if (parseOnData)
  738. {
  739. parseOnData = false;
  740. this._checkIsFinished();
  741. this.parseChunk(queue.shift());
  742. }
  743. }
  744. catch (error)
  745. {
  746. this._streamError(error);
  747. }
  748. }, this);
  749. this._streamError = bindFunction(function(error)
  750. {
  751. this._streamCleanUp();
  752. this._sendError(error);
  753. }, this);
  754. this._streamEnd = bindFunction(function()
  755. {
  756. this._streamCleanUp();
  757. streamHasEnded = true;
  758. this._streamData('');
  759. }, this);
  760. this._streamCleanUp = bindFunction(function()
  761. {
  762. this._input.removeListener('data', this._streamData);
  763. this._input.removeListener('end', this._streamEnd);
  764. this._input.removeListener('error', this._streamError);
  765. }, this);
  766. }
  767. ReadableStreamStreamer.prototype = Object.create(ChunkStreamer.prototype);
  768. ReadableStreamStreamer.prototype.constructor = ReadableStreamStreamer;
  769. function DuplexStreamStreamer(_config) {
  770. var Duplex = require('stream').Duplex;
  771. var config = copy(_config);
  772. var parseOnWrite = true;
  773. var writeStreamHasFinished = false;
  774. var parseCallbackQueue = [];
  775. var stream = null;
  776. this._onCsvData = function(results)
  777. {
  778. var data = results.data;
  779. if (!stream.push(data) && !this._handle.paused()) {
  780. // the writeable consumer buffer has filled up
  781. // so we need to pause until more items
  782. // can be processed
  783. this._handle.pause();
  784. }
  785. };
  786. this._onCsvComplete = function()
  787. {
  788. // node will finish the read stream when
  789. // null is pushed
  790. stream.push(null);
  791. };
  792. config.step = bindFunction(this._onCsvData, this);
  793. config.complete = bindFunction(this._onCsvComplete, this);
  794. ChunkStreamer.call(this, config);
  795. this._nextChunk = function()
  796. {
  797. if (writeStreamHasFinished && parseCallbackQueue.length === 1) {
  798. this._finished = true;
  799. }
  800. if (parseCallbackQueue.length) {
  801. parseCallbackQueue.shift()();
  802. } else {
  803. parseOnWrite = true;
  804. }
  805. };
  806. this._addToParseQueue = function(chunk, callback)
  807. {
  808. // add to queue so that we can indicate
  809. // completion via callback
  810. // node will automatically pause the incoming stream
  811. // when too many items have been added without their
  812. // callback being invoked
  813. parseCallbackQueue.push(bindFunction(function() {
  814. this.parseChunk(typeof chunk === 'string' ? chunk : chunk.toString(config.encoding));
  815. if (isFunction(callback)) {
  816. return callback();
  817. }
  818. }, this));
  819. if (parseOnWrite) {
  820. parseOnWrite = false;
  821. this._nextChunk();
  822. }
  823. };
  824. this._onRead = function()
  825. {
  826. if (this._handle.paused()) {
  827. // the writeable consumer can handle more data
  828. // so resume the chunk parsing
  829. this._handle.resume();
  830. }
  831. };
  832. this._onWrite = function(chunk, encoding, callback)
  833. {
  834. this._addToParseQueue(chunk, callback);
  835. };
  836. this._onWriteComplete = function()
  837. {
  838. writeStreamHasFinished = true;
  839. // have to write empty string
  840. // so parser knows its done
  841. this._addToParseQueue('');
  842. };
  843. this.getStream = function()
  844. {
  845. return stream;
  846. };
  847. stream = new Duplex({
  848. readableObjectMode: true,
  849. decodeStrings: false,
  850. read: bindFunction(this._onRead, this),
  851. write: bindFunction(this._onWrite, this)
  852. });
  853. stream.once('finish', bindFunction(this._onWriteComplete, this));
  854. }
  855. if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
  856. DuplexStreamStreamer.prototype = Object.create(ChunkStreamer.prototype);
  857. DuplexStreamStreamer.prototype.constructor = DuplexStreamStreamer;
  858. }
  859. // Use one ParserHandle per entire CSV file or string
  860. function ParserHandle(_config)
  861. {
  862. // One goal is to minimize the use of regular expressions...
  863. var MAX_FLOAT = Math.pow(2, 53);
  864. var MIN_FLOAT = -MAX_FLOAT;
  865. var FLOAT = /^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/;
  866. var ISO_DATE = /^((\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z)))$/;
  867. var self = this;
  868. var _stepCounter = 0; // Number of times step was called (number of rows parsed)
  869. var _rowCounter = 0; // Number of rows that have been parsed so far
  870. var _input; // The input being parsed
  871. var _parser; // The core parser being used
  872. var _paused = false; // Whether we are paused or not
  873. var _aborted = false; // Whether the parser has aborted or not
  874. var _delimiterError; // Temporary state between delimiter detection and processing results
  875. var _fields = []; // Fields are from the header row of the input, if there is one
  876. var _results = { // The last results returned from the parser
  877. data: [],
  878. errors: [],
  879. meta: {}
  880. };
  881. if (isFunction(_config.step))
  882. {
  883. var userStep = _config.step;
  884. _config.step = function(results)
  885. {
  886. _results = results;
  887. if (needsHeaderRow())
  888. processResults();
  889. else // only call user's step function after header row
  890. {
  891. processResults();
  892. // It's possbile that this line was empty and there's no row here after all
  893. if (_results.data.length === 0)
  894. return;
  895. _stepCounter += results.data.length;
  896. if (_config.preview && _stepCounter > _config.preview)
  897. _parser.abort();
  898. else {
  899. _results.data = _results.data[0];
  900. userStep(_results, self);
  901. }
  902. }
  903. };
  904. }
  905. /**
  906. * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
  907. * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
  908. * when an input comes in multiple chunks, like from a file.
  909. */
  910. this.parse = function(input, baseIndex, ignoreLastRow)
  911. {
  912. var quoteChar = _config.quoteChar || '"';
  913. if (!_config.newline)
  914. _config.newline = this.guessLineEndings(input, quoteChar);
  915. _delimiterError = false;
  916. if (!_config.delimiter)
  917. {
  918. var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
  919. if (delimGuess.successful)
  920. _config.delimiter = delimGuess.bestDelimiter;
  921. else
  922. {
  923. _delimiterError = true; // add error after parsing (otherwise it would be overwritten)
  924. _config.delimiter = Papa.DefaultDelimiter;
  925. }
  926. _results.meta.delimiter = _config.delimiter;
  927. }
  928. else if(isFunction(_config.delimiter))
  929. {
  930. _config.delimiter = _config.delimiter(input);
  931. _results.meta.delimiter = _config.delimiter;
  932. }
  933. var parserConfig = copy(_config);
  934. if (_config.preview && _config.header)
  935. parserConfig.preview++; // to compensate for header row
  936. _input = input;
  937. _parser = new Parser(parserConfig);
  938. _results = _parser.parse(_input, baseIndex, ignoreLastRow);
  939. processResults();
  940. return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } });
  941. };
  942. this.paused = function()
  943. {
  944. return _paused;
  945. };
  946. this.pause = function()
  947. {
  948. _paused = true;
  949. _parser.abort();
  950. // If it is streaming via "chunking", the reader will start appending correctly already so no need to substring,
  951. // otherwise we can get duplicate content within a row
  952. _input = isFunction(_config.chunk) ? "" : _input.substring(_parser.getCharIndex());
  953. };
  954. this.resume = function()
  955. {
  956. if(self.streamer._halted) {
  957. _paused = false;
  958. self.streamer.parseChunk(_input, true);
  959. } else {
  960. // Bugfix: #636 In case the processing hasn't halted yet
  961. // wait for it to halt in order to resume
  962. setTimeout(self.resume, 3);
  963. }
  964. };
  965. this.aborted = function()
  966. {
  967. return _aborted;
  968. };
  969. this.abort = function()
  970. {
  971. _aborted = true;
  972. _parser.abort();
  973. _results.meta.aborted = true;
  974. if (isFunction(_config.complete))
  975. _config.complete(_results);
  976. _input = '';
  977. };
  978. this.guessLineEndings = function(input, quoteChar)
  979. {
  980. input = input.substring(0, 1024 * 1024); // max length 1 MB
  981. // Replace all the text inside quotes
  982. var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
  983. input = input.replace(re, '');
  984. var r = input.split('\r');
  985. var n = input.split('\n');
  986. var nAppearsFirst = (n.length > 1 && n[0].length < r[0].length);
  987. if (r.length === 1 || nAppearsFirst)
  988. return '\n';
  989. var numWithN = 0;
  990. for (var i = 0; i < r.length; i++)
  991. {
  992. if (r[i][0] === '\n')
  993. numWithN++;
  994. }
  995. return numWithN >= r.length / 2 ? '\r\n' : '\r';
  996. };
  997. function testEmptyLine(s) {
  998. return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0;
  999. }
  1000. function testFloat(s) {
  1001. if (FLOAT.test(s)) {
  1002. var floatValue = parseFloat(s);
  1003. if (floatValue > MIN_FLOAT && floatValue < MAX_FLOAT) {
  1004. return true;
  1005. }
  1006. }
  1007. return false;
  1008. }
  1009. function processResults()
  1010. {
  1011. if (_results && _delimiterError)
  1012. {
  1013. addError('Delimiter', 'UndetectableDelimiter', 'Unable to auto-detect delimiting character; defaulted to \'' + Papa.DefaultDelimiter + '\'');
  1014. _delimiterError = false;
  1015. }
  1016. if (_config.skipEmptyLines)
  1017. {
  1018. _results.data = _results.data.filter(function(d) {
  1019. return !testEmptyLine(d);
  1020. });
  1021. }
  1022. if (needsHeaderRow())
  1023. fillHeaderFields();
  1024. return applyHeaderAndDynamicTypingAndTransformation();
  1025. }
  1026. function needsHeaderRow()
  1027. {
  1028. return _config.header && _fields.length === 0;
  1029. }
  1030. function fillHeaderFields()
  1031. {
  1032. if (!_results)
  1033. return;
  1034. function addHeader(header, i)
  1035. {
  1036. if (isFunction(_config.transformHeader))
  1037. header = _config.transformHeader(header, i);
  1038. _fields.push(header);
  1039. }
  1040. if (Array.isArray(_results.data[0]))
  1041. {
  1042. for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
  1043. _results.data[i].forEach(addHeader);
  1044. _results.data.splice(0, 1);
  1045. }
  1046. // if _results.data[0] is not an array, we are in a step where _results.data is the row.
  1047. else
  1048. _results.data.forEach(addHeader);
  1049. }
  1050. function shouldApplyDynamicTyping(field) {
  1051. // Cache function values to avoid calling it for each row
  1052. if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
  1053. _config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
  1054. }
  1055. return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
  1056. }
  1057. function parseDynamic(field, value)
  1058. {
  1059. if (shouldApplyDynamicTyping(field))
  1060. {
  1061. if (value === 'true' || value === 'TRUE')
  1062. return true;
  1063. else if (value === 'false' || value === 'FALSE')
  1064. return false;
  1065. else if (testFloat(value))
  1066. return parseFloat(value);
  1067. else if (ISO_DATE.test(value))
  1068. return new Date(value);
  1069. else
  1070. return (value === '' ? null : value);
  1071. }
  1072. return value;
  1073. }
  1074. function applyHeaderAndDynamicTypingAndTransformation()
  1075. {
  1076. if (!_results || (!_config.header && !_config.dynamicTyping && !_config.transform))
  1077. return _results;
  1078. function processRow(rowSource, i)
  1079. {
  1080. var row = _config.header ? {} : [];
  1081. var j;
  1082. for (j = 0; j < rowSource.length; j++)
  1083. {
  1084. var field = j;
  1085. var value = rowSource[j];
  1086. if (_config.header)
  1087. field = j >= _fields.length ? '__parsed_extra' : _fields[j];
  1088. if (_config.transform)
  1089. value = _config.transform(value,field);
  1090. value = parseDynamic(field, value);
  1091. if (field === '__parsed_extra')
  1092. {
  1093. row[field] = row[field] || [];
  1094. row[field].push(value);
  1095. }
  1096. else
  1097. row[field] = value;
  1098. }
  1099. if (_config.header)
  1100. {
  1101. if (j > _fields.length)
  1102. addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
  1103. else if (j < _fields.length)
  1104. addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
  1105. }
  1106. return row;
  1107. }
  1108. var incrementBy = 1;
  1109. if (!_results.data.length || Array.isArray(_results.data[0]))
  1110. {
  1111. _results.data = _results.data.map(processRow);
  1112. incrementBy = _results.data.length;
  1113. }
  1114. else
  1115. _results.data = processRow(_results.data, 0);
  1116. if (_config.header && _results.meta)
  1117. _results.meta.fields = _fields;
  1118. _rowCounter += incrementBy;
  1119. return _results;
  1120. }
  1121. function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
  1122. var bestDelim, bestDelta, fieldCountPrevRow, maxFieldCount;
  1123. delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
  1124. for (var i = 0; i < delimitersToGuess.length; i++) {
  1125. var delim = delimitersToGuess[i];
  1126. var delta = 0, avgFieldCount = 0, emptyLinesCount = 0;
  1127. fieldCountPrevRow = undefined;
  1128. var preview = new Parser({
  1129. comments: comments,
  1130. delimiter: delim,
  1131. newline: newline,
  1132. preview: 10
  1133. }).parse(input);
  1134. for (var j = 0; j < preview.data.length; j++) {
  1135. if (skipEmptyLines && testEmptyLine(preview.data[j])) {
  1136. emptyLinesCount++;
  1137. continue;
  1138. }
  1139. var fieldCount = preview.data[j].length;
  1140. avgFieldCount += fieldCount;
  1141. if (typeof fieldCountPrevRow === 'undefined') {
  1142. fieldCountPrevRow = fieldCount;
  1143. continue;
  1144. }
  1145. else if (fieldCount > 0) {
  1146. delta += Math.abs(fieldCount - fieldCountPrevRow);
  1147. fieldCountPrevRow = fieldCount;
  1148. }
  1149. }
  1150. if (preview.data.length > 0)
  1151. avgFieldCount /= (preview.data.length - emptyLinesCount);
  1152. if ((typeof bestDelta === 'undefined' || delta <= bestDelta)
  1153. && (typeof maxFieldCount === 'undefined' || avgFieldCount > maxFieldCount) && avgFieldCount > 1.99) {
  1154. bestDelta = delta;
  1155. bestDelim = delim;
  1156. maxFieldCount = avgFieldCount;
  1157. }
  1158. }
  1159. _config.delimiter = bestDelim;
  1160. return {
  1161. successful: !!bestDelim,
  1162. bestDelimiter: bestDelim
  1163. };
  1164. }
  1165. function addError(type, code, msg, row)
  1166. {
  1167. var error = {
  1168. type: type,
  1169. code: code,
  1170. message: msg
  1171. };
  1172. if(row !== undefined) {
  1173. error.row = row;
  1174. }
  1175. _results.errors.push(error);
  1176. }
  1177. }
  1178. /** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
  1179. function escapeRegExp(string)
  1180. {
  1181. return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
  1182. }
  1183. /** The core parser implements speedy and correct CSV parsing */
  1184. function Parser(config)
  1185. {
  1186. // Unpack the config object
  1187. config = config || {};
  1188. var delim = config.delimiter;
  1189. var newline = config.newline;
  1190. var comments = config.comments;
  1191. var step = config.step;
  1192. var preview = config.preview;
  1193. var fastMode = config.fastMode;
  1194. var quoteChar;
  1195. var renamedHeaders = null;
  1196. var headerParsed = false;
  1197. if (config.quoteChar === undefined || config.quoteChar === null) {
  1198. quoteChar = '"';
  1199. } else {
  1200. quoteChar = config.quoteChar;
  1201. }
  1202. var escapeChar = quoteChar;
  1203. if (config.escapeChar !== undefined) {
  1204. escapeChar = config.escapeChar;
  1205. }
  1206. // Delimiter must be valid
  1207. if (typeof delim !== 'string'
  1208. || Papa.BAD_DELIMITERS.indexOf(delim) > -1)
  1209. delim = ',';
  1210. // Comment character must be valid
  1211. if (comments === delim)
  1212. throw new Error('Comment character same as delimiter');
  1213. else if (comments === true)
  1214. comments = '#';
  1215. else if (typeof comments !== 'string'
  1216. || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
  1217. comments = false;
  1218. // Newline must be valid: \r, \n, or \r\n
  1219. if (newline !== '\n' && newline !== '\r' && newline !== '\r\n')
  1220. newline = '\n';
  1221. // We're gonna need these at the Parser scope
  1222. var cursor = 0;
  1223. var aborted = false;
  1224. this.parse = function(input, baseIndex, ignoreLastRow)
  1225. {
  1226. // For some reason, in Chrome, this speeds things up (!?)
  1227. if (typeof input !== 'string')
  1228. throw new Error('Input must be a string');
  1229. // We don't need to compute some of these every time parse() is called,
  1230. // but having them in a more local scope seems to perform better
  1231. var inputLen = input.length,
  1232. delimLen = delim.length,
  1233. newlineLen = newline.length,
  1234. commentsLen = comments.length;
  1235. var stepIsFunction = isFunction(step);
  1236. // Establish starting state
  1237. cursor = 0;
  1238. var data = [], errors = [], row = [], lastCursor = 0;
  1239. if (!input)
  1240. return returnable();
  1241. if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
  1242. {
  1243. var rows = input.split(newline);
  1244. for (var i = 0; i < rows.length; i++)
  1245. {
  1246. row = rows[i];
  1247. cursor += row.length;
  1248. if (i !== rows.length - 1)
  1249. cursor += newline.length;
  1250. else if (ignoreLastRow)
  1251. return returnable();
  1252. if (comments && row.substring(0, commentsLen) === comments)
  1253. continue;
  1254. if (stepIsFunction)
  1255. {
  1256. data = [];
  1257. pushRow(row.split(delim));
  1258. doStep();
  1259. if (aborted)
  1260. return returnable();
  1261. }
  1262. else
  1263. pushRow(row.split(delim));
  1264. if (preview && i >= preview)
  1265. {
  1266. data = data.slice(0, preview);
  1267. return returnable(true);
  1268. }
  1269. }
  1270. return returnable();
  1271. }
  1272. var nextDelim = input.indexOf(delim, cursor);
  1273. var nextNewline = input.indexOf(newline, cursor);
  1274. var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
  1275. var quoteSearch = input.indexOf(quoteChar, cursor);
  1276. // Parser loop
  1277. for (;;)
  1278. {
  1279. // Field has opening quote
  1280. if (input[cursor] === quoteChar)
  1281. {
  1282. // Start our search for the closing quote where the cursor is
  1283. quoteSearch = cursor;
  1284. // Skip the opening quote
  1285. cursor++;
  1286. for (;;)
  1287. {
  1288. // Find closing quote
  1289. quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
  1290. //No other quotes are found - no other delimiters
  1291. if (quoteSearch === -1)
  1292. {
  1293. if (!ignoreLastRow) {
  1294. // No closing quote... what a pity
  1295. errors.push({
  1296. type: 'Quotes',
  1297. code: 'MissingQuotes',
  1298. message: 'Quoted field unterminated',
  1299. row: data.length, // row has yet to be inserted
  1300. index: cursor
  1301. });
  1302. }
  1303. return finish();
  1304. }
  1305. // Closing quote at EOF
  1306. if (quoteSearch === inputLen - 1)
  1307. {
  1308. var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
  1309. return finish(value);
  1310. }
  1311. // If this quote is escaped, it's part of the data; skip it
  1312. // If the quote character is the escape character, then check if the next character is the escape character
  1313. if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar)
  1314. {
  1315. quoteSearch++;
  1316. continue;
  1317. }
  1318. // If the quote character is not the escape character, then check if the previous character was the escape character
  1319. if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch - 1] === escapeChar)
  1320. {
  1321. continue;
  1322. }
  1323. if(nextDelim !== -1 && nextDelim < (quoteSearch + 1)) {
  1324. nextDelim = input.indexOf(delim, (quoteSearch + 1));
  1325. }
  1326. if(nextNewline !== -1 && nextNewline < (quoteSearch + 1)) {
  1327. nextNewline = input.indexOf(newline, (quoteSearch + 1));
  1328. }
  1329. // Check up to nextDelim or nextNewline, whichever is closest
  1330. var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
  1331. var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
  1332. // Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
  1333. if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndDelimiter, delimLen) === delim)
  1334. {
  1335. row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
  1336. cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
  1337. // If char after following delimiter is not quoteChar, we find next quote char position
  1338. if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen] !== quoteChar)
  1339. {
  1340. quoteSearch = input.indexOf(quoteChar, cursor);
  1341. }
  1342. nextDelim = input.indexOf(delim, cursor);
  1343. nextNewline = input.indexOf(newline, cursor);
  1344. break;
  1345. }
  1346. var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
  1347. // Closing quote followed by newline or 'unnecessary spaces + newLine'
  1348. if (input.substring(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen) === newline)
  1349. {
  1350. row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
  1351. saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
  1352. nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
  1353. quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line
  1354. if (stepIsFunction)
  1355. {
  1356. doStep();
  1357. if (aborted)
  1358. return returnable();
  1359. }
  1360. if (preview && data.length >= preview)
  1361. return returnable(true);
  1362. break;
  1363. }
  1364. // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
  1365. errors.push({
  1366. type: 'Quotes',
  1367. code: 'InvalidQuotes',
  1368. message: 'Trailing quote on quoted field is malformed',
  1369. row: data.length, // row has yet to be inserted
  1370. index: cursor
  1371. });
  1372. quoteSearch++;
  1373. continue;
  1374. }
  1375. continue;
  1376. }
  1377. // Comment found at start of new line
  1378. if (comments && row.length === 0 && input.substring(cursor, cursor + commentsLen) === comments)
  1379. {
  1380. if (nextNewline === -1) // Comment ends at EOF
  1381. return returnable();
  1382. cursor = nextNewline + newlineLen;
  1383. nextNewline = input.indexOf(newline, cursor);
  1384. nextDelim = input.indexOf(delim, cursor);
  1385. continue;
  1386. }
  1387. // Next delimiter comes before next newline, so we've reached end of field
  1388. if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1))
  1389. {
  1390. row.push(input.substring(cursor, nextDelim));
  1391. cursor = nextDelim + delimLen;
  1392. // we look for next delimiter char
  1393. nextDelim = input.indexOf(delim, cursor);
  1394. continue;
  1395. }
  1396. // End of row
  1397. if (nextNewline !== -1)
  1398. {
  1399. row.push(input.substring(cursor, nextNewline));
  1400. saveRow(nextNewline + newlineLen);
  1401. if (stepIsFunction)
  1402. {
  1403. doStep();
  1404. if (aborted)
  1405. return returnable();
  1406. }
  1407. if (preview && data.length >= preview)
  1408. return returnable(true);
  1409. continue;
  1410. }
  1411. break;
  1412. }
  1413. return finish();
  1414. function pushRow(row)
  1415. {
  1416. data.push(row);
  1417. lastCursor = cursor;
  1418. }
  1419. /**
  1420. * checks if there are extra spaces after closing quote and given index without any text
  1421. * if Yes, returns the number of spaces
  1422. */
  1423. function extraSpaces(index) {
  1424. var spaceLength = 0;
  1425. if (index !== -1) {
  1426. var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
  1427. if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
  1428. spaceLength = textBetweenClosingQuoteAndIndex.length;
  1429. }
  1430. }
  1431. return spaceLength;
  1432. }
  1433. /**
  1434. * Appends the remaining input from cursor to the end into
  1435. * row, saves the row, calls step, and returns the results.
  1436. */
  1437. function finish(value)
  1438. {
  1439. if (ignoreLastRow)
  1440. return returnable();
  1441. if (typeof value === 'undefined')
  1442. value = input.substring(cursor);
  1443. row.push(value);
  1444. cursor = inputLen; // important in case parsing is paused
  1445. pushRow(row);
  1446. if (stepIsFunction)
  1447. doStep();
  1448. return returnable();
  1449. }
  1450. /**
  1451. * Appends the current row to the results. It sets the cursor
  1452. * to newCursor and finds the nextNewline. The caller should
  1453. * take care to execute user's step function and check for
  1454. * preview and end parsing if necessary.
  1455. */
  1456. function saveRow(newCursor)
  1457. {
  1458. cursor = newCursor;
  1459. pushRow(row);
  1460. row = [];
  1461. nextNewline = input.indexOf(newline, cursor);
  1462. }
  1463. /** Returns an object with the results, errors, and meta. */
  1464. function returnable(stopped)
  1465. {
  1466. if (config.header && !baseIndex && data.length && !headerParsed)
  1467. {
  1468. const result = data[0];
  1469. const headerCount = {}; // To track the count of each base header
  1470. const usedHeaders = new Set(result); // To track used headers and avoid duplicates
  1471. let duplicateHeaders = false;
  1472. for (let i = 0; i < result.length; i++) {
  1473. let header = result[i];
  1474. if (isFunction(config.transformHeader))
  1475. header = config.transformHeader(header, i);
  1476. if (!headerCount[header]) {
  1477. headerCount[header] = 1;
  1478. result[i] = header;
  1479. } else {
  1480. let newHeader;
  1481. let suffixCount = headerCount[header];
  1482. // Find a unique new header
  1483. do {
  1484. newHeader = `${header}_${suffixCount}`;
  1485. suffixCount++;
  1486. } while (usedHeaders.has(newHeader));
  1487. usedHeaders.add(newHeader); // Mark this new Header as used
  1488. result[i] = newHeader;
  1489. headerCount[header]++;
  1490. duplicateHeaders = true;
  1491. if (renamedHeaders === null) {
  1492. renamedHeaders = {};
  1493. }
  1494. renamedHeaders[newHeader] = header;
  1495. }
  1496. usedHeaders.add(header); // Ensure the original header is marked as used
  1497. }
  1498. if (duplicateHeaders) {
  1499. console.warn('Duplicate headers found and renamed.');
  1500. }
  1501. headerParsed = true;
  1502. }
  1503. return {
  1504. data: data,
  1505. errors: errors,
  1506. meta: {
  1507. delimiter: delim,
  1508. linebreak: newline,
  1509. aborted: aborted,
  1510. truncated: !!stopped,
  1511. cursor: lastCursor + (baseIndex || 0),
  1512. renamedHeaders: renamedHeaders
  1513. }
  1514. };
  1515. }
  1516. /** Executes the user's step function and resets data & errors. */
  1517. function doStep()
  1518. {
  1519. step(returnable());
  1520. data = [];
  1521. errors = [];
  1522. }
  1523. };
  1524. /** Sets the abort flag */
  1525. this.abort = function()
  1526. {
  1527. aborted = true;
  1528. };
  1529. /** Gets the cursor position */
  1530. this.getCharIndex = function()
  1531. {
  1532. return cursor;
  1533. };
  1534. }
  1535. function newWorker()
  1536. {
  1537. if (!Papa.WORKERS_SUPPORTED)
  1538. return false;
  1539. var workerUrl = getWorkerBlob();
  1540. var w = new global.Worker(workerUrl);
  1541. w.onmessage = mainThreadReceivedMessage;
  1542. w.id = workerIdCounter++;
  1543. workers[w.id] = w;
  1544. return w;
  1545. }
  1546. /** Callback when main thread receives a message */
  1547. function mainThreadReceivedMessage(e)
  1548. {
  1549. var msg = e.data;
  1550. var worker = workers[msg.workerId];
  1551. var aborted = false;
  1552. if (msg.error)
  1553. worker.userError(msg.error, msg.file);
  1554. else if (msg.results && msg.results.data)
  1555. {
  1556. var abort = function() {
  1557. aborted = true;
  1558. completeWorker(msg.workerId, { data: [], errors: [], meta: { aborted: true } });
  1559. };
  1560. var handle = {
  1561. abort: abort,
  1562. pause: notImplemented,
  1563. resume: notImplemented
  1564. };
  1565. if (isFunction(worker.userStep))
  1566. {
  1567. for (var i = 0; i < msg.results.data.length; i++)
  1568. {
  1569. worker.userStep({
  1570. data: msg.results.data[i],
  1571. errors: msg.results.errors,
  1572. meta: msg.results.meta
  1573. }, handle);
  1574. if (aborted)
  1575. break;
  1576. }
  1577. delete msg.results; // free memory ASAP
  1578. }
  1579. else if (isFunction(worker.userChunk))
  1580. {
  1581. worker.userChunk(msg.results, handle, msg.file);
  1582. delete msg.results;
  1583. }
  1584. }
  1585. if (msg.finished && !aborted)
  1586. completeWorker(msg.workerId, msg.results);
  1587. }
  1588. function completeWorker(workerId, results) {
  1589. var worker = workers[workerId];
  1590. if (isFunction(worker.userComplete))
  1591. worker.userComplete(results);
  1592. worker.terminate();
  1593. delete workers[workerId];
  1594. }
  1595. function notImplemented() {
  1596. throw new Error('Not implemented.');
  1597. }
  1598. /** Callback when worker thread receives a message */
  1599. function workerThreadReceivedMessage(e)
  1600. {
  1601. var msg = e.data;
  1602. if (typeof Papa.WORKER_ID === 'undefined' && msg)
  1603. Papa.WORKER_ID = msg.workerId;
  1604. if (typeof msg.input === 'string')
  1605. {
  1606. global.postMessage({
  1607. workerId: Papa.WORKER_ID,
  1608. results: Papa.parse(msg.input, msg.config),
  1609. finished: true
  1610. });
  1611. }
  1612. else if ((global.File && msg.input instanceof File) || msg.input instanceof Object) // thank you, Safari (see issue #106)
  1613. {
  1614. var results = Papa.parse(msg.input, msg.config);
  1615. if (results)
  1616. global.postMessage({
  1617. workerId: Papa.WORKER_ID,
  1618. results: results,
  1619. finished: true
  1620. });
  1621. }
  1622. }
  1623. /** Makes a deep copy of an array or object (mostly) */
  1624. function copy(obj)
  1625. {
  1626. if (typeof obj !== 'object' || obj === null)
  1627. return obj;
  1628. var cpy = Array.isArray(obj) ? [] : {};
  1629. for (var key in obj)
  1630. cpy[key] = copy(obj[key]);
  1631. return cpy;
  1632. }
  1633. function bindFunction(f, self)
  1634. {
  1635. return function() { f.apply(self, arguments); };
  1636. }
  1637. function isFunction(func)
  1638. {
  1639. return typeof func === 'function';
  1640. }
  1641. return Papa;
  1642. }));