1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945 |
- /* @license
- Papa Parse
- v5.5.2
- https://github.com/mholt/PapaParse
- License: MIT
- */
- (function(root, factory)
- {
- /* globals define */
- if (typeof define === 'function' && define.amd)
- {
- // AMD. Register as an anonymous module.
- define([], factory);
- }
- else if (typeof module === 'object' && typeof exports !== 'undefined')
- {
- // Node. Does not work with strict CommonJS, but
- // only CommonJS-like environments that support module.exports,
- // like Node.
- module.exports = factory();
- }
- else
- {
- // Browser globals (root is window)
- root.Papa = factory();
- }
- // in strict mode we cannot access arguments.callee, so we need a named reference to
- // stringify the factory method for the blob worker
- // eslint-disable-next-line func-name
- }(this, function moduleFactory()
- {
- 'use strict';
- var global = (function() {
- // alternative method, similar to `Function('return this')()`
- // but without using `eval` (which is disabled when
- // using Content Security Policy).
- if (typeof self !== 'undefined') { return self; }
- if (typeof window !== 'undefined') { return window; }
- if (typeof global !== 'undefined') { return global; }
- // When running tests none of the above have been defined
- return {};
- })();
- function getWorkerBlob() {
- var URL = global.URL || global.webkitURL || null;
- var code = moduleFactory.toString();
- return Papa.BLOB_URL || (Papa.BLOB_URL = URL.createObjectURL(new Blob(["var global = (function() { if (typeof self !== 'undefined') { return self; } if (typeof window !== 'undefined') { return window; } if (typeof global !== 'undefined') { return global; } return {}; })(); global.IS_PAPA_WORKER=true; ", '(', code, ')();'], {type: 'text/javascript'})));
- }
- var IS_WORKER = !global.document && !!global.postMessage,
- IS_PAPA_WORKER = global.IS_PAPA_WORKER || false;
- var workers = {}, workerIdCounter = 0;
- var Papa = {};
- Papa.parse = CsvToJson;
- Papa.unparse = JsonToCsv;
- Papa.RECORD_SEP = String.fromCharCode(30);
- Papa.UNIT_SEP = String.fromCharCode(31);
- Papa.BYTE_ORDER_MARK = '\ufeff';
- Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
- Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker;
- Papa.NODE_STREAM_INPUT = 1;
- // Configurable chunk sizes for local and remote files, respectively
- Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB
- Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB
- Papa.DefaultDelimiter = ','; // Used if not specified and detection fails
- // Exposed for testing and development only
- Papa.Parser = Parser;
- Papa.ParserHandle = ParserHandle;
- Papa.NetworkStreamer = NetworkStreamer;
- Papa.FileStreamer = FileStreamer;
- Papa.StringStreamer = StringStreamer;
- Papa.ReadableStreamStreamer = ReadableStreamStreamer;
- if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
- Papa.DuplexStreamStreamer = DuplexStreamStreamer;
- }
- if (global.jQuery)
- {
- var $ = global.jQuery;
- $.fn.parse = function(options)
- {
- var config = options.config || {};
- var queue = [];
- this.each(function(idx)
- {
- var supported = $(this).prop('tagName').toUpperCase() === 'INPUT'
- && $(this).attr('type').toLowerCase() === 'file'
- && global.FileReader;
- if (!supported || !this.files || this.files.length === 0)
- return true; // continue to next input element
- for (var i = 0; i < this.files.length; i++)
- {
- queue.push({
- file: this.files[i],
- inputElem: this,
- instanceConfig: $.extend({}, config)
- });
- }
- });
- parseNextFile(); // begin parsing
- return this; // maintains chainability
- function parseNextFile()
- {
- if (queue.length === 0)
- {
- if (isFunction(options.complete))
- options.complete();
- return;
- }
- var f = queue[0];
- if (isFunction(options.before))
- {
- var returned = options.before(f.file, f.inputElem);
- if (typeof returned === 'object')
- {
- if (returned.action === 'abort')
- {
- error('AbortError', f.file, f.inputElem, returned.reason);
- return; // Aborts all queued files immediately
- }
- else if (returned.action === 'skip')
- {
- fileComplete(); // parse the next file in the queue, if any
- return;
- }
- else if (typeof returned.config === 'object')
- f.instanceConfig = $.extend(f.instanceConfig, returned.config);
- }
- else if (returned === 'skip')
- {
- fileComplete(); // parse the next file in the queue, if any
- return;
- }
- }
- // Wrap up the user's complete callback, if any, so that ours also gets executed
- var userCompleteFunc = f.instanceConfig.complete;
- f.instanceConfig.complete = function(results)
- {
- if (isFunction(userCompleteFunc))
- userCompleteFunc(results, f.file, f.inputElem);
- fileComplete();
- };
- Papa.parse(f.file, f.instanceConfig);
- }
- function error(name, file, elem, reason)
- {
- if (isFunction(options.error))
- options.error({name: name}, file, elem, reason);
- }
- function fileComplete()
- {
- queue.splice(0, 1);
- parseNextFile();
- }
- };
- }
- if (IS_PAPA_WORKER)
- {
- global.onmessage = workerThreadReceivedMessage;
- }
- function CsvToJson(_input, _config)
- {
- _config = _config || {};
- var dynamicTyping = _config.dynamicTyping || false;
- if (isFunction(dynamicTyping)) {
- _config.dynamicTypingFunction = dynamicTyping;
- // Will be filled on first row call
- dynamicTyping = {};
- }
- _config.dynamicTyping = dynamicTyping;
- _config.transform = isFunction(_config.transform) ? _config.transform : false;
- if (_config.worker && Papa.WORKERS_SUPPORTED)
- {
- var w = newWorker();
- w.userStep = _config.step;
- w.userChunk = _config.chunk;
- w.userComplete = _config.complete;
- w.userError = _config.error;
- _config.step = isFunction(_config.step);
- _config.chunk = isFunction(_config.chunk);
- _config.complete = isFunction(_config.complete);
- _config.error = isFunction(_config.error);
- delete _config.worker; // prevent infinite loop
- w.postMessage({
- input: _input,
- config: _config,
- workerId: w.id
- });
- return;
- }
- var streamer = null;
- if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined')
- {
- // create a node Duplex stream for use
- // with .pipe
- streamer = new DuplexStreamStreamer(_config);
- return streamer.getStream();
- }
- else if (typeof _input === 'string')
- {
- _input = stripBom(_input);
- if (_config.download)
- streamer = new NetworkStreamer(_config);
- else
- streamer = new StringStreamer(_config);
- }
- else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on))
- {
- streamer = new ReadableStreamStreamer(_config);
- }
- else if ((global.File && _input instanceof File) || _input instanceof Object) // ...Safari. (see issue #106)
- streamer = new FileStreamer(_config);
- return streamer.stream(_input);
- // Strip character from UTF-8 BOM encoded files that cause issue parsing the file
- function stripBom(string) {
- if (string.charCodeAt(0) === 0xfeff) {
- return string.slice(1);
- }
- return string;
- }
- }
- function JsonToCsv(_input, _config)
- {
- // Default configuration
- /** whether to surround every datum with quotes */
- var _quotes = false;
- /** whether to write headers */
- var _writeHeader = true;
- /** delimiting character(s) */
- var _delimiter = ',';
- /** newline character(s) */
- var _newline = '\r\n';
- /** quote character */
- var _quoteChar = '"';
- /** escaped quote character, either "" or <config.escapeChar>" */
- var _escapedQuote = _quoteChar + _quoteChar;
- /** whether to skip empty lines */
- var _skipEmptyLines = false;
- /** the columns (keys) we expect when we unparse objects */
- var _columns = null;
- /** whether to prevent outputting cells that can be parsed as formulae by spreadsheet software (Excel and LibreOffice) */
- var _escapeFormulae = false;
- unpackConfig();
- var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
- if (typeof _input === 'string')
- _input = JSON.parse(_input);
- if (Array.isArray(_input))
- {
- if (!_input.length || Array.isArray(_input[0]))
- return serialize(null, _input, _skipEmptyLines);
- else if (typeof _input[0] === 'object')
- return serialize(_columns || Object.keys(_input[0]), _input, _skipEmptyLines);
- }
- else if (typeof _input === 'object')
- {
- if (typeof _input.data === 'string')
- _input.data = JSON.parse(_input.data);
- if (Array.isArray(_input.data))
- {
- if (!_input.fields)
- _input.fields = _input.meta && _input.meta.fields || _columns;
- if (!_input.fields)
- _input.fields = Array.isArray(_input.data[0])
- ? _input.fields
- : typeof _input.data[0] === 'object'
- ? Object.keys(_input.data[0])
- : [];
- if (!(Array.isArray(_input.data[0])) && typeof _input.data[0] !== 'object')
- _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
- }
- return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
- }
- // Default (any valid paths should return before this)
- throw new Error('Unable to serialize unrecognized input');
- function unpackConfig()
- {
- if (typeof _config !== 'object')
- return;
- if (typeof _config.delimiter === 'string'
- && !Papa.BAD_DELIMITERS.filter(function(value) { return _config.delimiter.indexOf(value) !== -1; }).length)
- {
- _delimiter = _config.delimiter;
- }
- if (typeof _config.quotes === 'boolean'
- || typeof _config.quotes === 'function'
- || Array.isArray(_config.quotes))
- _quotes = _config.quotes;
- if (typeof _config.skipEmptyLines === 'boolean'
- || typeof _config.skipEmptyLines === 'string')
- _skipEmptyLines = _config.skipEmptyLines;
- if (typeof _config.newline === 'string')
- _newline = _config.newline;
- if (typeof _config.quoteChar === 'string')
- _quoteChar = _config.quoteChar;
- if (typeof _config.header === 'boolean')
- _writeHeader = _config.header;
- if (Array.isArray(_config.columns)) {
- if (_config.columns.length === 0) throw new Error('Option columns is empty');
- _columns = _config.columns;
- }
- if (_config.escapeChar !== undefined) {
- _escapedQuote = _config.escapeChar + _quoteChar;
- }
- if (_config.escapeFormulae instanceof RegExp) {
- _escapeFormulae = _config.escapeFormulae;
- } else if (typeof _config.escapeFormulae === 'boolean' && _config.escapeFormulae) {
- _escapeFormulae = /^[=+\-@\t\r].*$/;
- }
- }
- /** The double for loop that iterates the data and writes out a CSV string including header row */
- function serialize(fields, data, skipEmptyLines)
- {
- var csv = '';
- if (typeof fields === 'string')
- fields = JSON.parse(fields);
- if (typeof data === 'string')
- data = JSON.parse(data);
- var hasHeader = Array.isArray(fields) && fields.length > 0;
- var dataKeyedByField = !(Array.isArray(data[0]));
- // If there a header row, write it first
- if (hasHeader && _writeHeader)
- {
- for (var i = 0; i < fields.length; i++)
- {
- if (i > 0)
- csv += _delimiter;
- csv += safe(fields[i], i);
- }
- if (data.length > 0)
- csv += _newline;
- }
- // Then write out the data
- for (var row = 0; row < data.length; row++)
- {
- var maxCol = hasHeader ? fields.length : data[row].length;
- var emptyLine = false;
- var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
- if (skipEmptyLines && !hasHeader)
- {
- emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0;
- }
- if (skipEmptyLines === 'greedy' && hasHeader) {
- var line = [];
- for (var c = 0; c < maxCol; c++) {
- var cx = dataKeyedByField ? fields[c] : c;
- line.push(data[row][cx]);
- }
- emptyLine = line.join('').trim() === '';
- }
- if (!emptyLine)
- {
- for (var col = 0; col < maxCol; col++)
- {
- if (col > 0 && !nullLine)
- csv += _delimiter;
- var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
- csv += safe(data[row][colIdx], col);
- }
- if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine)))
- {
- csv += _newline;
- }
- }
- }
- return csv;
- }
- /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
- function safe(str, col)
- {
- if (typeof str === 'undefined' || str === null)
- return '';
- if (str.constructor === Date)
- return JSON.stringify(str).slice(1, 25);
- var needsQuotes = false;
- if (_escapeFormulae && typeof str === "string" && _escapeFormulae.test(str)) {
- str = "'" + str;
- needsQuotes = true;
- }
- var escapedQuoteStr = str.toString().replace(quoteCharRegex, _escapedQuote);
- needsQuotes = needsQuotes
- || _quotes === true
- || (typeof _quotes === 'function' && _quotes(str, col))
- || (Array.isArray(_quotes) && _quotes[col])
- || hasAny(escapedQuoteStr, Papa.BAD_DELIMITERS)
- || escapedQuoteStr.indexOf(_delimiter) > -1
- || escapedQuoteStr.charAt(0) === ' '
- || escapedQuoteStr.charAt(escapedQuoteStr.length - 1) === ' ';
- return needsQuotes ? _quoteChar + escapedQuoteStr + _quoteChar : escapedQuoteStr;
- }
- function hasAny(str, substrings)
- {
- for (var i = 0; i < substrings.length; i++)
- if (str.indexOf(substrings[i]) > -1)
- return true;
- return false;
- }
- }
- /** ChunkStreamer is the base prototype for various streamer implementations. */
- function ChunkStreamer(config)
- {
- this._handle = null;
- this._finished = false;
- this._completed = false;
- this._halted = false;
- this._input = null;
- this._baseIndex = 0;
- this._partialLine = '';
- this._rowCount = 0;
- this._start = 0;
- this._nextChunk = null;
- this.isFirstChunk = true;
- this._completeResults = {
- data: [],
- errors: [],
- meta: {}
- };
- replaceConfig.call(this, config);
- this.parseChunk = function(chunk, isFakeChunk)
- {
- // First chunk pre-processing
- const skipFirstNLines = parseInt(this._config.skipFirstNLines) || 0;
- if (this.isFirstChunk && skipFirstNLines > 0) {
- let _newline = this._config.newline;
- if (!_newline) {
- const quoteChar = this._config.quoteChar || '"';
- _newline = this._handle.guessLineEndings(chunk, quoteChar);
- }
- const splitChunk = chunk.split(_newline);
- chunk = [...splitChunk.slice(skipFirstNLines)].join(_newline);
- }
- if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk))
- {
- var modifiedChunk = this._config.beforeFirstChunk(chunk);
- if (modifiedChunk !== undefined)
- chunk = modifiedChunk;
- }
- this.isFirstChunk = false;
- this._halted = false;
- // Rejoin the line we likely just split in two by chunking the file
- var aggregate = this._partialLine + chunk;
- this._partialLine = '';
- var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
- if (this._handle.paused() || this._handle.aborted()) {
- this._halted = true;
- return;
- }
- var lastIndex = results.meta.cursor;
- if (!this._finished)
- {
- this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
- this._baseIndex = lastIndex;
- }
- if (results && results.data)
- this._rowCount += results.data.length;
- var finishedIncludingPreview = this._finished || (this._config.preview && this._rowCount >= this._config.preview);
- if (IS_PAPA_WORKER)
- {
- global.postMessage({
- results: results,
- workerId: Papa.WORKER_ID,
- finished: finishedIncludingPreview
- });
- }
- else if (isFunction(this._config.chunk) && !isFakeChunk)
- {
- this._config.chunk(results, this._handle);
- if (this._handle.paused() || this._handle.aborted()) {
- this._halted = true;
- return;
- }
- results = undefined;
- this._completeResults = undefined;
- }
- if (!this._config.step && !this._config.chunk) {
- this._completeResults.data = this._completeResults.data.concat(results.data);
- this._completeResults.errors = this._completeResults.errors.concat(results.errors);
- this._completeResults.meta = results.meta;
- }
- if (!this._completed && finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) {
- this._config.complete(this._completeResults, this._input);
- this._completed = true;
- }
- if (!finishedIncludingPreview && (!results || !results.meta.paused))
- this._nextChunk();
- return results;
- };
- this._sendError = function(error)
- {
- if (isFunction(this._config.error))
- this._config.error(error);
- else if (IS_PAPA_WORKER && this._config.error)
- {
- global.postMessage({
- workerId: Papa.WORKER_ID,
- error: error,
- finished: false
- });
- }
- };
- function replaceConfig(config)
- {
- // Deep-copy the config so we can edit it
- var configCopy = copy(config);
- configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
- if (!config.step && !config.chunk)
- configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
- this._handle = new ParserHandle(configCopy);
- this._handle.streamer = this;
- this._config = configCopy; // persist the copy to the caller
- }
- }
- function NetworkStreamer(config)
- {
- config = config || {};
- if (!config.chunkSize)
- config.chunkSize = Papa.RemoteChunkSize;
- ChunkStreamer.call(this, config);
- var xhr;
- if (IS_WORKER)
- {
- this._nextChunk = function()
- {
- this._readChunk();
- this._chunkLoaded();
- };
- }
- else
- {
- this._nextChunk = function()
- {
- this._readChunk();
- };
- }
- this.stream = function(url)
- {
- this._input = url;
- this._nextChunk(); // Starts streaming
- };
- this._readChunk = function()
- {
- if (this._finished)
- {
- this._chunkLoaded();
- return;
- }
- xhr = new XMLHttpRequest();
- if (this._config.withCredentials)
- {
- xhr.withCredentials = this._config.withCredentials;
- }
- if (!IS_WORKER)
- {
- xhr.onload = bindFunction(this._chunkLoaded, this);
- xhr.onerror = bindFunction(this._chunkError, this);
- }
- xhr.open(this._config.downloadRequestBody ? 'POST' : 'GET', this._input, !IS_WORKER);
- // Headers can only be set when once the request state is OPENED
- if (this._config.downloadRequestHeaders)
- {
- var headers = this._config.downloadRequestHeaders;
- for (var headerName in headers)
- {
- xhr.setRequestHeader(headerName, headers[headerName]);
- }
- }
- if (this._config.chunkSize)
- {
- var end = this._start + this._config.chunkSize - 1; // minus one because byte range is inclusive
- xhr.setRequestHeader('Range', 'bytes=' + this._start + '-' + end);
- }
- try {
- xhr.send(this._config.downloadRequestBody);
- }
- catch (err) {
- this._chunkError(err.message);
- }
- if (IS_WORKER && xhr.status === 0)
- this._chunkError();
- };
- this._chunkLoaded = function()
- {
- if (xhr.readyState !== 4)
- return;
- if (xhr.status < 200 || xhr.status >= 400)
- {
- this._chunkError();
- return;
- }
- // Use chunckSize as it may be a diference on reponse lentgh due to characters with more than 1 byte
- this._start += this._config.chunkSize ? this._config.chunkSize : xhr.responseText.length;
- this._finished = !this._config.chunkSize || this._start >= getFileSize(xhr);
- this.parseChunk(xhr.responseText);
- };
- this._chunkError = function(errorMessage)
- {
- var errorText = xhr.statusText || errorMessage;
- this._sendError(new Error(errorText));
- };
- function getFileSize(xhr)
- {
- var contentRange = xhr.getResponseHeader('Content-Range');
- if (contentRange === null) { // no content range, then finish!
- return -1;
- }
- return parseInt(contentRange.substring(contentRange.lastIndexOf('/') + 1));
- }
- }
- NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype);
- NetworkStreamer.prototype.constructor = NetworkStreamer;
- function FileStreamer(config)
- {
- config = config || {};
- if (!config.chunkSize)
- config.chunkSize = Papa.LocalChunkSize;
- ChunkStreamer.call(this, config);
- var reader, slice;
- // FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862
- // But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76
- var usingAsyncReader = typeof FileReader !== 'undefined'; // Safari doesn't consider it a function - see issue #105
- this.stream = function(file)
- {
- this._input = file;
- slice = file.slice || file.webkitSlice || file.mozSlice;
- if (usingAsyncReader)
- {
- reader = new FileReader(); // Preferred method of reading files, even in workers
- reader.onload = bindFunction(this._chunkLoaded, this);
- reader.onerror = bindFunction(this._chunkError, this);
- }
- else
- reader = new FileReaderSync(); // Hack for running in a web worker in Firefox
- this._nextChunk(); // Starts streaming
- };
- this._nextChunk = function()
- {
- if (!this._finished && (!this._config.preview || this._rowCount < this._config.preview))
- this._readChunk();
- };
- this._readChunk = function()
- {
- var input = this._input;
- if (this._config.chunkSize)
- {
- var end = Math.min(this._start + this._config.chunkSize, this._input.size);
- input = slice.call(input, this._start, end);
- }
- var txt = reader.readAsText(input, this._config.encoding);
- if (!usingAsyncReader)
- this._chunkLoaded({ target: { result: txt } }); // mimic the async signature
- };
- this._chunkLoaded = function(event)
- {
- // Very important to increment start each time before handling results
- this._start += this._config.chunkSize;
- this._finished = !this._config.chunkSize || this._start >= this._input.size;
- this.parseChunk(event.target.result);
- };
- this._chunkError = function()
- {
- this._sendError(reader.error);
- };
- }
- FileStreamer.prototype = Object.create(ChunkStreamer.prototype);
- FileStreamer.prototype.constructor = FileStreamer;
- function StringStreamer(config)
- {
- config = config || {};
- ChunkStreamer.call(this, config);
- var remaining;
- this.stream = function(s)
- {
- remaining = s;
- return this._nextChunk();
- };
- this._nextChunk = function()
- {
- if (this._finished) return;
- var size = this._config.chunkSize;
- var chunk;
- if(size) {
- chunk = remaining.substring(0, size);
- remaining = remaining.substring(size);
- } else {
- chunk = remaining;
- remaining = '';
- }
- this._finished = !remaining;
- return this.parseChunk(chunk);
- };
- }
- StringStreamer.prototype = Object.create(StringStreamer.prototype);
- StringStreamer.prototype.constructor = StringStreamer;
- function ReadableStreamStreamer(config)
- {
- config = config || {};
- ChunkStreamer.call(this, config);
- var queue = [];
- var parseOnData = true;
- var streamHasEnded = false;
- this.pause = function()
- {
- ChunkStreamer.prototype.pause.apply(this, arguments);
- this._input.pause();
- };
- this.resume = function()
- {
- ChunkStreamer.prototype.resume.apply(this, arguments);
- this._input.resume();
- };
- this.stream = function(stream)
- {
- this._input = stream;
- this._input.on('data', this._streamData);
- this._input.on('end', this._streamEnd);
- this._input.on('error', this._streamError);
- };
- this._checkIsFinished = function()
- {
- if (streamHasEnded && queue.length === 1) {
- this._finished = true;
- }
- };
- this._nextChunk = function()
- {
- this._checkIsFinished();
- if (queue.length)
- {
- this.parseChunk(queue.shift());
- }
- else
- {
- parseOnData = true;
- }
- };
- this._streamData = bindFunction(function(chunk)
- {
- try
- {
- queue.push(typeof chunk === 'string' ? chunk : chunk.toString(this._config.encoding));
- if (parseOnData)
- {
- parseOnData = false;
- this._checkIsFinished();
- this.parseChunk(queue.shift());
- }
- }
- catch (error)
- {
- this._streamError(error);
- }
- }, this);
- this._streamError = bindFunction(function(error)
- {
- this._streamCleanUp();
- this._sendError(error);
- }, this);
- this._streamEnd = bindFunction(function()
- {
- this._streamCleanUp();
- streamHasEnded = true;
- this._streamData('');
- }, this);
- this._streamCleanUp = bindFunction(function()
- {
- this._input.removeListener('data', this._streamData);
- this._input.removeListener('end', this._streamEnd);
- this._input.removeListener('error', this._streamError);
- }, this);
- }
- ReadableStreamStreamer.prototype = Object.create(ChunkStreamer.prototype);
- ReadableStreamStreamer.prototype.constructor = ReadableStreamStreamer;
- function DuplexStreamStreamer(_config) {
- var Duplex = require('stream').Duplex;
- var config = copy(_config);
- var parseOnWrite = true;
- var writeStreamHasFinished = false;
- var parseCallbackQueue = [];
- var stream = null;
- this._onCsvData = function(results)
- {
- var data = results.data;
- if (!stream.push(data) && !this._handle.paused()) {
- // the writeable consumer buffer has filled up
- // so we need to pause until more items
- // can be processed
- this._handle.pause();
- }
- };
- this._onCsvComplete = function()
- {
- // node will finish the read stream when
- // null is pushed
- stream.push(null);
- };
- config.step = bindFunction(this._onCsvData, this);
- config.complete = bindFunction(this._onCsvComplete, this);
- ChunkStreamer.call(this, config);
- this._nextChunk = function()
- {
- if (writeStreamHasFinished && parseCallbackQueue.length === 1) {
- this._finished = true;
- }
- if (parseCallbackQueue.length) {
- parseCallbackQueue.shift()();
- } else {
- parseOnWrite = true;
- }
- };
- this._addToParseQueue = function(chunk, callback)
- {
- // add to queue so that we can indicate
- // completion via callback
- // node will automatically pause the incoming stream
- // when too many items have been added without their
- // callback being invoked
- parseCallbackQueue.push(bindFunction(function() {
- this.parseChunk(typeof chunk === 'string' ? chunk : chunk.toString(config.encoding));
- if (isFunction(callback)) {
- return callback();
- }
- }, this));
- if (parseOnWrite) {
- parseOnWrite = false;
- this._nextChunk();
- }
- };
- this._onRead = function()
- {
- if (this._handle.paused()) {
- // the writeable consumer can handle more data
- // so resume the chunk parsing
- this._handle.resume();
- }
- };
- this._onWrite = function(chunk, encoding, callback)
- {
- this._addToParseQueue(chunk, callback);
- };
- this._onWriteComplete = function()
- {
- writeStreamHasFinished = true;
- // have to write empty string
- // so parser knows its done
- this._addToParseQueue('');
- };
- this.getStream = function()
- {
- return stream;
- };
- stream = new Duplex({
- readableObjectMode: true,
- decodeStrings: false,
- read: bindFunction(this._onRead, this),
- write: bindFunction(this._onWrite, this)
- });
- stream.once('finish', bindFunction(this._onWriteComplete, this));
- }
- if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
- DuplexStreamStreamer.prototype = Object.create(ChunkStreamer.prototype);
- DuplexStreamStreamer.prototype.constructor = DuplexStreamStreamer;
- }
- // Use one ParserHandle per entire CSV file or string
- function ParserHandle(_config)
- {
- // One goal is to minimize the use of regular expressions...
- var MAX_FLOAT = Math.pow(2, 53);
- var MIN_FLOAT = -MAX_FLOAT;
- var FLOAT = /^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/;
- var ISO_DATE = /^((\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z)))$/;
- var self = this;
- var _stepCounter = 0; // Number of times step was called (number of rows parsed)
- var _rowCounter = 0; // Number of rows that have been parsed so far
- var _input; // The input being parsed
- var _parser; // The core parser being used
- var _paused = false; // Whether we are paused or not
- var _aborted = false; // Whether the parser has aborted or not
- var _delimiterError; // Temporary state between delimiter detection and processing results
- var _fields = []; // Fields are from the header row of the input, if there is one
- var _results = { // The last results returned from the parser
- data: [],
- errors: [],
- meta: {}
- };
- if (isFunction(_config.step))
- {
- var userStep = _config.step;
- _config.step = function(results)
- {
- _results = results;
- if (needsHeaderRow())
- processResults();
- else // only call user's step function after header row
- {
- processResults();
- // It's possbile that this line was empty and there's no row here after all
- if (_results.data.length === 0)
- return;
- _stepCounter += results.data.length;
- if (_config.preview && _stepCounter > _config.preview)
- _parser.abort();
- else {
- _results.data = _results.data[0];
- userStep(_results, self);
- }
- }
- };
- }
- /**
- * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
- * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
- * when an input comes in multiple chunks, like from a file.
- */
- this.parse = function(input, baseIndex, ignoreLastRow)
- {
- var quoteChar = _config.quoteChar || '"';
- if (!_config.newline)
- _config.newline = this.guessLineEndings(input, quoteChar);
- _delimiterError = false;
- if (!_config.delimiter)
- {
- var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
- if (delimGuess.successful)
- _config.delimiter = delimGuess.bestDelimiter;
- else
- {
- _delimiterError = true; // add error after parsing (otherwise it would be overwritten)
- _config.delimiter = Papa.DefaultDelimiter;
- }
- _results.meta.delimiter = _config.delimiter;
- }
- else if(isFunction(_config.delimiter))
- {
- _config.delimiter = _config.delimiter(input);
- _results.meta.delimiter = _config.delimiter;
- }
- var parserConfig = copy(_config);
- if (_config.preview && _config.header)
- parserConfig.preview++; // to compensate for header row
- _input = input;
- _parser = new Parser(parserConfig);
- _results = _parser.parse(_input, baseIndex, ignoreLastRow);
- processResults();
- return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } });
- };
- this.paused = function()
- {
- return _paused;
- };
- this.pause = function()
- {
- _paused = true;
- _parser.abort();
- // If it is streaming via "chunking", the reader will start appending correctly already so no need to substring,
- // otherwise we can get duplicate content within a row
- _input = isFunction(_config.chunk) ? "" : _input.substring(_parser.getCharIndex());
- };
- this.resume = function()
- {
- if(self.streamer._halted) {
- _paused = false;
- self.streamer.parseChunk(_input, true);
- } else {
- // Bugfix: #636 In case the processing hasn't halted yet
- // wait for it to halt in order to resume
- setTimeout(self.resume, 3);
- }
- };
- this.aborted = function()
- {
- return _aborted;
- };
- this.abort = function()
- {
- _aborted = true;
- _parser.abort();
- _results.meta.aborted = true;
- if (isFunction(_config.complete))
- _config.complete(_results);
- _input = '';
- };
- this.guessLineEndings = function(input, quoteChar)
- {
- input = input.substring(0, 1024 * 1024); // max length 1 MB
- // Replace all the text inside quotes
- var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
- input = input.replace(re, '');
- var r = input.split('\r');
- var n = input.split('\n');
- var nAppearsFirst = (n.length > 1 && n[0].length < r[0].length);
- if (r.length === 1 || nAppearsFirst)
- return '\n';
- var numWithN = 0;
- for (var i = 0; i < r.length; i++)
- {
- if (r[i][0] === '\n')
- numWithN++;
- }
- return numWithN >= r.length / 2 ? '\r\n' : '\r';
- };
- function testEmptyLine(s) {
- return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0;
- }
- function testFloat(s) {
- if (FLOAT.test(s)) {
- var floatValue = parseFloat(s);
- if (floatValue > MIN_FLOAT && floatValue < MAX_FLOAT) {
- return true;
- }
- }
- return false;
- }
- function processResults()
- {
- if (_results && _delimiterError)
- {
- addError('Delimiter', 'UndetectableDelimiter', 'Unable to auto-detect delimiting character; defaulted to \'' + Papa.DefaultDelimiter + '\'');
- _delimiterError = false;
- }
- if (_config.skipEmptyLines)
- {
- _results.data = _results.data.filter(function(d) {
- return !testEmptyLine(d);
- });
- }
- if (needsHeaderRow())
- fillHeaderFields();
- return applyHeaderAndDynamicTypingAndTransformation();
- }
- function needsHeaderRow()
- {
- return _config.header && _fields.length === 0;
- }
- function fillHeaderFields()
- {
- if (!_results)
- return;
- function addHeader(header, i)
- {
- if (isFunction(_config.transformHeader))
- header = _config.transformHeader(header, i);
- _fields.push(header);
- }
- if (Array.isArray(_results.data[0]))
- {
- for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
- _results.data[i].forEach(addHeader);
- _results.data.splice(0, 1);
- }
- // if _results.data[0] is not an array, we are in a step where _results.data is the row.
- else
- _results.data.forEach(addHeader);
- }
- function shouldApplyDynamicTyping(field) {
- // Cache function values to avoid calling it for each row
- if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
- _config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
- }
- return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
- }
- function parseDynamic(field, value)
- {
- if (shouldApplyDynamicTyping(field))
- {
- if (value === 'true' || value === 'TRUE')
- return true;
- else if (value === 'false' || value === 'FALSE')
- return false;
- else if (testFloat(value))
- return parseFloat(value);
- else if (ISO_DATE.test(value))
- return new Date(value);
- else
- return (value === '' ? null : value);
- }
- return value;
- }
- function applyHeaderAndDynamicTypingAndTransformation()
- {
- if (!_results || (!_config.header && !_config.dynamicTyping && !_config.transform))
- return _results;
- function processRow(rowSource, i)
- {
- var row = _config.header ? {} : [];
- var j;
- for (j = 0; j < rowSource.length; j++)
- {
- var field = j;
- var value = rowSource[j];
- if (_config.header)
- field = j >= _fields.length ? '__parsed_extra' : _fields[j];
- if (_config.transform)
- value = _config.transform(value,field);
- value = parseDynamic(field, value);
- if (field === '__parsed_extra')
- {
- row[field] = row[field] || [];
- row[field].push(value);
- }
- else
- row[field] = value;
- }
- if (_config.header)
- {
- if (j > _fields.length)
- addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
- else if (j < _fields.length)
- addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
- }
- return row;
- }
- var incrementBy = 1;
- if (!_results.data.length || Array.isArray(_results.data[0]))
- {
- _results.data = _results.data.map(processRow);
- incrementBy = _results.data.length;
- }
- else
- _results.data = processRow(_results.data, 0);
- if (_config.header && _results.meta)
- _results.meta.fields = _fields;
- _rowCounter += incrementBy;
- return _results;
- }
- function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
- var bestDelim, bestDelta, fieldCountPrevRow, maxFieldCount;
- delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
- for (var i = 0; i < delimitersToGuess.length; i++) {
- var delim = delimitersToGuess[i];
- var delta = 0, avgFieldCount = 0, emptyLinesCount = 0;
- fieldCountPrevRow = undefined;
- var preview = new Parser({
- comments: comments,
- delimiter: delim,
- newline: newline,
- preview: 10
- }).parse(input);
- for (var j = 0; j < preview.data.length; j++) {
- if (skipEmptyLines && testEmptyLine(preview.data[j])) {
- emptyLinesCount++;
- continue;
- }
- var fieldCount = preview.data[j].length;
- avgFieldCount += fieldCount;
- if (typeof fieldCountPrevRow === 'undefined') {
- fieldCountPrevRow = fieldCount;
- continue;
- }
- else if (fieldCount > 0) {
- delta += Math.abs(fieldCount - fieldCountPrevRow);
- fieldCountPrevRow = fieldCount;
- }
- }
- if (preview.data.length > 0)
- avgFieldCount /= (preview.data.length - emptyLinesCount);
- if ((typeof bestDelta === 'undefined' || delta <= bestDelta)
- && (typeof maxFieldCount === 'undefined' || avgFieldCount > maxFieldCount) && avgFieldCount > 1.99) {
- bestDelta = delta;
- bestDelim = delim;
- maxFieldCount = avgFieldCount;
- }
- }
- _config.delimiter = bestDelim;
- return {
- successful: !!bestDelim,
- bestDelimiter: bestDelim
- };
- }
- function addError(type, code, msg, row)
- {
- var error = {
- type: type,
- code: code,
- message: msg
- };
- if(row !== undefined) {
- error.row = row;
- }
- _results.errors.push(error);
- }
- }
- /** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
- function escapeRegExp(string)
- {
- return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
- }
- /** The core parser implements speedy and correct CSV parsing */
- function Parser(config)
- {
- // Unpack the config object
- config = config || {};
- var delim = config.delimiter;
- var newline = config.newline;
- var comments = config.comments;
- var step = config.step;
- var preview = config.preview;
- var fastMode = config.fastMode;
- var quoteChar;
- var renamedHeaders = null;
- var headerParsed = false;
- if (config.quoteChar === undefined || config.quoteChar === null) {
- quoteChar = '"';
- } else {
- quoteChar = config.quoteChar;
- }
- var escapeChar = quoteChar;
- if (config.escapeChar !== undefined) {
- escapeChar = config.escapeChar;
- }
- // Delimiter must be valid
- if (typeof delim !== 'string'
- || Papa.BAD_DELIMITERS.indexOf(delim) > -1)
- delim = ',';
- // Comment character must be valid
- if (comments === delim)
- throw new Error('Comment character same as delimiter');
- else if (comments === true)
- comments = '#';
- else if (typeof comments !== 'string'
- || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
- comments = false;
- // Newline must be valid: \r, \n, or \r\n
- if (newline !== '\n' && newline !== '\r' && newline !== '\r\n')
- newline = '\n';
- // We're gonna need these at the Parser scope
- var cursor = 0;
- var aborted = false;
- this.parse = function(input, baseIndex, ignoreLastRow)
- {
- // For some reason, in Chrome, this speeds things up (!?)
- if (typeof input !== 'string')
- throw new Error('Input must be a string');
- // We don't need to compute some of these every time parse() is called,
- // but having them in a more local scope seems to perform better
- var inputLen = input.length,
- delimLen = delim.length,
- newlineLen = newline.length,
- commentsLen = comments.length;
- var stepIsFunction = isFunction(step);
- // Establish starting state
- cursor = 0;
- var data = [], errors = [], row = [], lastCursor = 0;
- if (!input)
- return returnable();
- if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
- {
- var rows = input.split(newline);
- for (var i = 0; i < rows.length; i++)
- {
- row = rows[i];
- cursor += row.length;
- if (i !== rows.length - 1)
- cursor += newline.length;
- else if (ignoreLastRow)
- return returnable();
- if (comments && row.substring(0, commentsLen) === comments)
- continue;
- if (stepIsFunction)
- {
- data = [];
- pushRow(row.split(delim));
- doStep();
- if (aborted)
- return returnable();
- }
- else
- pushRow(row.split(delim));
- if (preview && i >= preview)
- {
- data = data.slice(0, preview);
- return returnable(true);
- }
- }
- return returnable();
- }
- var nextDelim = input.indexOf(delim, cursor);
- var nextNewline = input.indexOf(newline, cursor);
- var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
- var quoteSearch = input.indexOf(quoteChar, cursor);
- // Parser loop
- for (;;)
- {
- // Field has opening quote
- if (input[cursor] === quoteChar)
- {
- // Start our search for the closing quote where the cursor is
- quoteSearch = cursor;
- // Skip the opening quote
- cursor++;
- for (;;)
- {
- // Find closing quote
- quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
- //No other quotes are found - no other delimiters
- if (quoteSearch === -1)
- {
- if (!ignoreLastRow) {
- // No closing quote... what a pity
- errors.push({
- type: 'Quotes',
- code: 'MissingQuotes',
- message: 'Quoted field unterminated',
- row: data.length, // row has yet to be inserted
- index: cursor
- });
- }
- return finish();
- }
- // Closing quote at EOF
- if (quoteSearch === inputLen - 1)
- {
- var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
- return finish(value);
- }
- // If this quote is escaped, it's part of the data; skip it
- // If the quote character is the escape character, then check if the next character is the escape character
- if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar)
- {
- quoteSearch++;
- continue;
- }
- // If the quote character is not the escape character, then check if the previous character was the escape character
- if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch - 1] === escapeChar)
- {
- continue;
- }
- if(nextDelim !== -1 && nextDelim < (quoteSearch + 1)) {
- nextDelim = input.indexOf(delim, (quoteSearch + 1));
- }
- if(nextNewline !== -1 && nextNewline < (quoteSearch + 1)) {
- nextNewline = input.indexOf(newline, (quoteSearch + 1));
- }
- // Check up to nextDelim or nextNewline, whichever is closest
- var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
- var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
- // Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
- if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndDelimiter, delimLen) === delim)
- {
- row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
- cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
- // If char after following delimiter is not quoteChar, we find next quote char position
- if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen] !== quoteChar)
- {
- quoteSearch = input.indexOf(quoteChar, cursor);
- }
- nextDelim = input.indexOf(delim, cursor);
- nextNewline = input.indexOf(newline, cursor);
- break;
- }
- var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
- // Closing quote followed by newline or 'unnecessary spaces + newLine'
- if (input.substring(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen) === newline)
- {
- row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
- saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
- nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
- quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line
- if (stepIsFunction)
- {
- doStep();
- if (aborted)
- return returnable();
- }
- if (preview && data.length >= preview)
- return returnable(true);
- break;
- }
- // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
- errors.push({
- type: 'Quotes',
- code: 'InvalidQuotes',
- message: 'Trailing quote on quoted field is malformed',
- row: data.length, // row has yet to be inserted
- index: cursor
- });
- quoteSearch++;
- continue;
- }
- continue;
- }
- // Comment found at start of new line
- if (comments && row.length === 0 && input.substring(cursor, cursor + commentsLen) === comments)
- {
- if (nextNewline === -1) // Comment ends at EOF
- return returnable();
- cursor = nextNewline + newlineLen;
- nextNewline = input.indexOf(newline, cursor);
- nextDelim = input.indexOf(delim, cursor);
- continue;
- }
- // Next delimiter comes before next newline, so we've reached end of field
- if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1))
- {
- row.push(input.substring(cursor, nextDelim));
- cursor = nextDelim + delimLen;
- // we look for next delimiter char
- nextDelim = input.indexOf(delim, cursor);
- continue;
- }
- // End of row
- if (nextNewline !== -1)
- {
- row.push(input.substring(cursor, nextNewline));
- saveRow(nextNewline + newlineLen);
- if (stepIsFunction)
- {
- doStep();
- if (aborted)
- return returnable();
- }
- if (preview && data.length >= preview)
- return returnable(true);
- continue;
- }
- break;
- }
- return finish();
- function pushRow(row)
- {
- data.push(row);
- lastCursor = cursor;
- }
- /**
- * checks if there are extra spaces after closing quote and given index without any text
- * if Yes, returns the number of spaces
- */
- function extraSpaces(index) {
- var spaceLength = 0;
- if (index !== -1) {
- var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
- if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
- spaceLength = textBetweenClosingQuoteAndIndex.length;
- }
- }
- return spaceLength;
- }
- /**
- * Appends the remaining input from cursor to the end into
- * row, saves the row, calls step, and returns the results.
- */
- function finish(value)
- {
- if (ignoreLastRow)
- return returnable();
- if (typeof value === 'undefined')
- value = input.substring(cursor);
- row.push(value);
- cursor = inputLen; // important in case parsing is paused
- pushRow(row);
- if (stepIsFunction)
- doStep();
- return returnable();
- }
- /**
- * Appends the current row to the results. It sets the cursor
- * to newCursor and finds the nextNewline. The caller should
- * take care to execute user's step function and check for
- * preview and end parsing if necessary.
- */
- function saveRow(newCursor)
- {
- cursor = newCursor;
- pushRow(row);
- row = [];
- nextNewline = input.indexOf(newline, cursor);
- }
- /** Returns an object with the results, errors, and meta. */
- function returnable(stopped)
- {
- if (config.header && !baseIndex && data.length && !headerParsed)
- {
- const result = data[0];
- const headerCount = {}; // To track the count of each base header
- const usedHeaders = new Set(result); // To track used headers and avoid duplicates
- let duplicateHeaders = false;
- for (let i = 0; i < result.length; i++) {
- let header = result[i];
- if (isFunction(config.transformHeader))
- header = config.transformHeader(header, i);
- if (!headerCount[header]) {
- headerCount[header] = 1;
- result[i] = header;
- } else {
- let newHeader;
- let suffixCount = headerCount[header];
- // Find a unique new header
- do {
- newHeader = `${header}_${suffixCount}`;
- suffixCount++;
- } while (usedHeaders.has(newHeader));
- usedHeaders.add(newHeader); // Mark this new Header as used
- result[i] = newHeader;
- headerCount[header]++;
- duplicateHeaders = true;
- if (renamedHeaders === null) {
- renamedHeaders = {};
- }
- renamedHeaders[newHeader] = header;
- }
- usedHeaders.add(header); // Ensure the original header is marked as used
- }
- if (duplicateHeaders) {
- console.warn('Duplicate headers found and renamed.');
- }
- headerParsed = true;
- }
- return {
- data: data,
- errors: errors,
- meta: {
- delimiter: delim,
- linebreak: newline,
- aborted: aborted,
- truncated: !!stopped,
- cursor: lastCursor + (baseIndex || 0),
- renamedHeaders: renamedHeaders
- }
- };
- }
- /** Executes the user's step function and resets data & errors. */
- function doStep()
- {
- step(returnable());
- data = [];
- errors = [];
- }
- };
- /** Sets the abort flag */
- this.abort = function()
- {
- aborted = true;
- };
- /** Gets the cursor position */
- this.getCharIndex = function()
- {
- return cursor;
- };
- }
- function newWorker()
- {
- if (!Papa.WORKERS_SUPPORTED)
- return false;
- var workerUrl = getWorkerBlob();
- var w = new global.Worker(workerUrl);
- w.onmessage = mainThreadReceivedMessage;
- w.id = workerIdCounter++;
- workers[w.id] = w;
- return w;
- }
- /** Callback when main thread receives a message */
- function mainThreadReceivedMessage(e)
- {
- var msg = e.data;
- var worker = workers[msg.workerId];
- var aborted = false;
- if (msg.error)
- worker.userError(msg.error, msg.file);
- else if (msg.results && msg.results.data)
- {
- var abort = function() {
- aborted = true;
- completeWorker(msg.workerId, { data: [], errors: [], meta: { aborted: true } });
- };
- var handle = {
- abort: abort,
- pause: notImplemented,
- resume: notImplemented
- };
- if (isFunction(worker.userStep))
- {
- for (var i = 0; i < msg.results.data.length; i++)
- {
- worker.userStep({
- data: msg.results.data[i],
- errors: msg.results.errors,
- meta: msg.results.meta
- }, handle);
- if (aborted)
- break;
- }
- delete msg.results; // free memory ASAP
- }
- else if (isFunction(worker.userChunk))
- {
- worker.userChunk(msg.results, handle, msg.file);
- delete msg.results;
- }
- }
- if (msg.finished && !aborted)
- completeWorker(msg.workerId, msg.results);
- }
- function completeWorker(workerId, results) {
- var worker = workers[workerId];
- if (isFunction(worker.userComplete))
- worker.userComplete(results);
- worker.terminate();
- delete workers[workerId];
- }
- function notImplemented() {
- throw new Error('Not implemented.');
- }
- /** Callback when worker thread receives a message */
- function workerThreadReceivedMessage(e)
- {
- var msg = e.data;
- if (typeof Papa.WORKER_ID === 'undefined' && msg)
- Papa.WORKER_ID = msg.workerId;
- if (typeof msg.input === 'string')
- {
- global.postMessage({
- workerId: Papa.WORKER_ID,
- results: Papa.parse(msg.input, msg.config),
- finished: true
- });
- }
- else if ((global.File && msg.input instanceof File) || msg.input instanceof Object) // thank you, Safari (see issue #106)
- {
- var results = Papa.parse(msg.input, msg.config);
- if (results)
- global.postMessage({
- workerId: Papa.WORKER_ID,
- results: results,
- finished: true
- });
- }
- }
- /** Makes a deep copy of an array or object (mostly) */
- function copy(obj)
- {
- if (typeof obj !== 'object' || obj === null)
- return obj;
- var cpy = Array.isArray(obj) ? [] : {};
- for (var key in obj)
- cpy[key] = copy(obj[key]);
- return cpy;
- }
- function bindFunction(f, self)
- {
- return function() { f.apply(self, arguments); };
- }
- function isFunction(func)
- {
- return typeof func === 'function';
- }
- return Papa;
- }));
|