Blazing fast and Comprehensive CSV Parser for Node.JS / Browser / Command Line.
All you need nodejs csv to json converter.
Here is a free online csv to json service ultilising latest csvtojson module.
GitHub: https://github.com/Keyang/node-csvtojson
npm install -g csvtojson
npm install csvtojson --save
You can use File stream
//Converter Class
var Converter = require("csvtojson").Converter;
var converter = new Converter({});
//end_parsed will be emitted once parsing finished
converter.on("end_parsed", function (jsonArray) {
console.log(jsonArray); //here is your result jsonarray
});
//read from file
require("fs").createReadStream("./file.csv").pipe(converter);
Or use fromFile convenient function
//Converter Class
var Converter = require("csvtojson").Converter;
var converter = new Converter({});
converter.fromFile("./file.csv",function(err,result){
});
To convert any CSV data from readable stream just simply pipe in the data.
//Converter Class
var Converter = require("csvtojson").Converter;
var converter = new Converter({constructResult:false}); //for big csv data
//record_parsed will be emitted each csv row being processed
converter.on("record_parsed", function (jsonObj) {
console.log(jsonObj); //here is your result json object
});
require("request").get("http://csvwebserver").pipe(converter);
var Converter = require("csvtojson").Converter;
var converter = new Converter({});
converter.fromString(csvString, function(err,result){
//your code here
});
csvtojson
Example
csvtojson ./myCSVFile <option1=value>
Or use pipe:
cat myCSVFile | csvtojson
Check current version:
csvtojson version
Advanced usage with parameters support, check help:
csvtojson --help
The constructor of csv Converter allows parameters:
var converter=new require("csvtojson").Converter({
constructResult:false,
workerNum:4,
noheader:true
});
Following parameters are supported:
All parameters can be used in Command Line tool. see
csvtojson --help
To transform JSON result, (e.g. change value of one column), just simply add ‘transform handler’.
var Converter=require("csvtojson").Converter;
var csvConverter=new Converter({});
csvConverter.transform=function(json,row,index){
json["rowIndex"]=index;
/* some other examples:
delete json["myfield"]; //remove a field
json["dateOfBirth"]=new Date(json["dateOfBirth"]); // convert a field type
*/
};
csvConverter.fromString(csvString,function(err,result){
//all result rows will add a field 'rowIndex' indicating the row number of the csv data:
/*
[{
field1:value1,
rowIndex: 0
}]
*/
});
As shown in example above, it is able to apply any changes to the result json which will be pushed to down stream and “record_parsed” event.
Asynchronouse transformation can be achieve either through “record_parsed” event or creating a Writable stream.
To transform data asynchronously, it is suggested to use csvtojson with Async Queue.
This mainly is used when transformation of each csv row needs be mashed with data retrieved from external such as database / server / file system.
However this approach will not change the json result pushed to downstream.
Here is an example:
var Conv=require("csvtojson").Converter;
var async=require("async");
var rs=require("fs").createReadStream("path/to/csv"); // or any readable stream to csv data.
var q=async.queue(function(json,callback){
//process the json asynchronously.
require("request").get("http://myserver/user/"+json.userId,function(err,user){
//do the data mash here
json.user=user;
callback();
});
},10);//10 concurrent worker same time
q.saturated=function(){
rs.pause(); //if queue is full, it is suggested to pause the readstream so csvtojson will suspend populating json data. It is ok to not to do so if CSV data is not very large.
}
q.empty=function(){
rs.resume();//Resume the paused readable stream. you may need check if the readable stream isPaused() (this is since node 0.12) or finished.
}
var conv=new Conv({construct:false});
conv.transform=function(json){
q.push(json);
};
conv.on("end_parsed",function(){
q.drain=function(){
//code when Queue process finished.
}
})
rs.pipe(conv);
In example above, the transformation will happen if one csv rown being processed. The related user info will be pulled from a web server and mashed into json result.
There will be at most 10 data transformation woker working concurrently with the help of Async Queue.
It is able to create a Writable stream (or Transform) which process data asynchronously. See Here for more details.
Below is an example of result tranformation which converts csv data to a column array rather than a JSON.
var Converter=require("csvtojson").Converter;
var columArrData=__dirname+"/data/columnArray";
var rs=fs.createReadStream(columArrData);
var result = {}
var csvConverter=new Converter();
//end_parsed will be emitted once parsing finished
csvConverter.on("end_parsed", function(jsonObj) {
console.log(result);
console.log("Finished parsing");
done();
});
//record_parsed will be emitted each time a row has been parsed.
csvConverter.on("record_parsed", function(resultRow, rawRow, rowIndex) {
for (var key in resultRow) {
if (!result[key] || !result[key] instanceof Array) {
result[key] = [];
}
result[key][rowIndex] = resultRow[key];
}
});
rs.pipe(csvConverter);
Here is an example:
TIMESTAMP,UPDATE,UID,BYTES SENT,BYTES RCVED
1395426422,n,10028,1213,5461
1395426422,n,10013,9954,13560
1395426422,n,10109,221391500,141836
1395426422,n,10007,53448,308549
1395426422,n,10022,15506,72125
It will be converted to:
{
"TIMESTAMP": ["1395426422", "1395426422", "1395426422", "1395426422", "1395426422"],
"UPDATE": ["n", "n", "n", "n", "n"],
"UID": ["10028", "10013", "10109", "10007", "10022"],
"BYTES SENT": ["1213", "9954", "221391500", "53448", "15506"],
"BYTES RCVED": ["5461", "13560", "141836", "308549", "72125"]
}
This hook is called when parser received any data from upper stream and allow developers to change it. e.g.
/*
CSV data:
a,b,c,d,e
12,e3,fb,w2,dd
*/
var conv=new Converter();
conv.preProcessRaw=function(data,cb){
//change all 12 to 23
cb(data.replace("12","23"));
}
conv.fromString(csv,function(err,json){
//json:{a:23 ....}
})
By default, the preProcessRaw just returns the data from the source
Converter.prototype.preProcessRaw=function(data,cb){
cb(data);
}
It is also very good to sanitise/prepare the CSV data stream.
var headWhiteSpaceRemoved=false;
conv.preProcessRaw=function(data,cb){
if (!headWhiteSpaceRemoved){
data=data.replace(/^\s+/,"");
cb(data);
}else{
cb(data);
}
}
this hook is called when a file line is emitted. It is called with two parameters fileLineData,lineNumber
. The lineNumber
is starting from 1.
/*
CSV data:
a,b,c,d,e
12,e3,fb,w2,dd
*/
var conv=new Converter();
conv.preProcessLine=function(line,lineNumber){
//only change 12 to 23 for line 2
if (lineNumber === 2){
line=line.replace("12","23");
}
return line;
}
conv.fromString(csv,function(err,json){
//json:{a:23 ....}
})
Notice that preProcessLine does not support async changes not like preProcessRaw hook.
Following events are used for Converter class:
To subscribe the event:
//Converter Class
var Converter=require("csvtojson").Converter;
//end_parsed will be emitted once parsing finished
csvConverter.on("end_parsed",function(jsonObj){
console.log(jsonObj); //here is your result json object
});
//record_parsed will be emitted each time a row has been parsed.
csvConverter.on("record_parsed",function(resultRow,rawRow,rowIndex){
console.log(resultRow); //here is your result json object
});
There are flags in the library:
*omit*: Omit a column. The values in the column will not be built into JSON result.
*flat*: Mark a head column as is the key of its JSON result.
Example:
*flat*user.name, user.age, *omit*user.gender
Joe , 40, Male
It will be converted to:
[{
"user.name":"Joe",
"user":{
"age":40
}
}]
csvtojson library was designed to accept big csv file converting. To avoid memory consumption, it is recommending to use read stream and write stream.
var Converter=require("csvtojson").Converter;
var csvConverter=new Converter({constructResult:false}); // The parameter false will turn off final result construction. It can avoid huge memory consumption while parsing. The trade off is final result will not be populated to end_parsed event.
var readStream=require("fs").createReadStream("inputData.csv");
var writeStream=require("fs").createWriteStream("outpuData.json");
readStream.pipe(csvConverter).pipe(writeStream);
The constructResult:false will tell the constructor not to combine the final result which would drain the memory as progressing. The output is piped directly to writeStream.
csvtojson command line tool supports streaming in big csv file and stream out json file.
It is very convenient to process any kind of big csv file. It’s proved having no issue to proceed csv files over 3,000,000 lines (over 500MB) with memory usage under 30MB.
Once you have installed csvtojson, you could use the tool with command:
csvtojson [path to bigcsvdata] > converted.json
Or if you prefer streaming data in from another application:
cat [path to bigcsvdata] | csvtojson > converted.json
They will do the same job.
To parse a string, simply call fromString(csvString,callback) method. The callback parameter is optional.
For example:
var testData=__dirname+"/data/testData";
var data=fs.readFileSync(testData).toString();
var csvConverter=new CSVConverter();
//end_parsed will be emitted once parsing finished
csvConverter.on("end_parsed", function(jsonObj) {
//final result poped here as normal.
});
csvConverter.fromString(data,function(err,jsonObj){
if (err){
//err handle
}
console.log(jsonObj);
});
Note: If you want to maintain the original CSV data header values as JSON keys “as is” without being
interpreted as (complex) JSON structures you can set the option --flatKeys=true
.
Since version 0.3.8, csvtojson now can replicate any complex JSON structure.
As we know, JSON object represents a graph while CSV is only 2-dimension data structure (table).
To make JSON and CSV containing same amount information, we need “flatten” some information in JSON.
Here is an example. Original CSV:
fieldA.title, fieldA.children[0].name, fieldA.children[0].id,fieldA.children[1].name, fieldA.children[1].employee[].name,fieldA.children[1].employee[].name, fieldA.address[],fieldA.address[], description
Food Factory, Oscar, 0023, Tikka, Tim, Joe, 3 Lame Road, Grantstown, A fresh new food factory
Kindom Garden, Ceil, 54, Pillow, Amst, Tom, 24 Shaker Street, HelloTown, Awesome castle
The data above contains nested JSON including nested array of JSON objects and plain texts.
Using csvtojson to convert, the result would be like:
[{
"fieldA": {
"title": "Food Factory",
"children": [{
"name": "Oscar",
"id": "0023"
}, {
"name": "Tikka",
"employee": [{
"name": "Tim"
}, {
"name": "Joe"
}]
}],
"address": ["3 Lame Road", "Grantstown"]
},
"description": "A fresh new food factory"
}, {
"fieldA": {
"title": "Kindom Garden",
"children": [{
"name": "Ceil",
"id": "54"
}, {
"name": "Pillow",
"employee": [{
"name": "Amst"
}, {
"name": "Tom"
}]
}],
"address": ["24 Shaker Street", "HelloTown"]
},
"description": "Awesome castle"
}]
Here is the rule for CSV data headers:
Since 0.3.8, JSON parser is the default parser. It does not need to add “*json*” to column titles. Theoretically, the JSON parser now should have functionality of “Array” parser, “JSONArray” parser, and old “JSON” parser.
This mainly purposes on the next few versions where csvtojson could convert a JSON object back to CSV format without losing information.
It can be used to process JSON data exported from no-sql database like MongoDB.
From version 0.3.14, type of fields are supported by csvtojson.
The parameter checkType is used to whether to check and convert the field type.
See here for the parameter usage.
Thank all who have contributed to ticket #20.
When checkType is turned on, parser will try to convert value to its implicit type if it is not explicitly specified.
For example, csv data:
name, age, married, msg
Tom, 12, false, {"hello":"world","total":23}
Will be converted into:
{
"name":"Tom",
"age":12,
"married":false,
"msg":{
"hello":"world",
"total":"23"
}
}
If checkType is turned OFF, it will be converted to:
{
"name":"Tom",
"age":"12",
"married":"false",
"msg":"{\"hello\":\"world\",\"total\":23}"
}
CSV header column can explicitly define the type of the field.
Simply add type before column name with a hash and exclaimation (#!).
To define the field type, see following example
string#!appNumber, string#!finished, *flat*string#!user.msg, unknown#!msg
201401010002, true, {"hello":"world","total":23},a message
The data will be converted to:
{
"appNumber":"201401010002",
"finished":"true",
"user.msg":"{\"hello\":\"world\",\"total\":23}"
}
Since version 0.4.0, csvtojson supports multiple CPU cores to process large csv files.
The implementation and benchmark result can be found here.
To enable multi-core, just pass the worker number as parameter of constructor:
var Converter=require("csvtojson").Converter;
var converter=new Converter({
workerNum:2 //use two cores
});
The minimum worker number is 1. When worker number is larger than 1, the parser will balance the job load among workers.
For command line, to use worker just use --workerNum
argument:
csvtojson --workerNum=3 ./myfile.csv
It is worth to mention that for small size of CSV file it actually costs more time to create processes and keep the communication between them. Therefore, use less workers for small CSV files.
*Node.JS is running on single thread. You will not want to convert a large csv file on the same process where your node.js webserver is running. csvtojson gives an option to fork the whole conversion process to a new system process while the origin process will only pipe the input and result in and out. It very simple to enable this feature:
var Converter=require("csvtojson").Converter;
var converter=new Converter({
fork:true //use child process to convert
});
Same as multi-workers, fork a new process will cause extra cost on process communication and life cycle management. Use it wisely.*
Since 0.5.0, fork=true is the same as workerNum=2.
CSV header row can be configured programmatically.
the noheader parameter indicate if first row of csv is header row or not. e.g. CSV data:
CC102-PDMI-001,eClass_5.1.3,10/3/2014,12,40,green,40
CC200-009-001,eClass_5.1.3,11/3/2014,5,3,blue,38,extra field!
With noheader=true
csvtojson ./test/data/noheadercsv --noheader=true
we can get following result:
[
{"field1":"CC102-PDMI-001","field2":"eClass_5.1.3","field3":"10/3/2014","field4":"12","field5":"40","field6":"green","field7":"40"},
{"field1":"CC200-009-001","field2":"eClass_5.1.3","field3":"11/3/2014","field4":"5","field5":"3","field6":"blue","field7":"38","field8":"extra field!"}
]
or we can use it in code:
var converter=new require("csvtojson").Converter({noheader:true});
the headers parameter specify the header row in an array. If noheader is false, this value will override csv header row. With csv data above, run command:
csvtojson ./test/data/noheadercsv --noheader=true --headers='["hell","csv"]'
we get following results:
[
{"hell":"CC102-PDMI-001","csv":"eClass_5.1.3","field3":"10/3/2014","field4":"12","field5":"40","field6":"green","field7":"40"},
{"hell":"CC200-009-001","csv":"eClass_5.1.3","field3":"11/3/2014","field4":"5","field5":"3","field6":"blue","field7":"38","field8":"extra field!"}
]
If length of headers array is smaller than the column of csv, converter will automatically fill the column with “field*”. where * is current column index starting from 1.
Also we can use it in code:
var converter=new require("csvtojson").Converter({headers:["my header1","hello world"]});
Since version 0.4.4, parser detects CSV data corruption. It is important to catch those erros if CSV data is not guranteed correct. Just simply register a listener to error event:
var converter=new require("csvtojson").Converter();
converter.on("error",function(errMsg,errData){
//do error handling here
});
Once an error is emitted, the parser will continously parse csv data if up stream is still populating data. Therefore, a general practise is to close / destroy up stream once error is captured.
Here are built-in error messages and corresponding error data:
** Parser will be replaced by Result Transform and Flags **
This feature will be disabled in future.
CSVTOJSON allows adding customised parsers which concentrating on what to parse and how to parse.
It is the main power of the tool that developer only needs to concentrate on how to deal with the data and other concerns like streaming, memory, web, cli etc are done automatically.
How to add a customised parser:
//Parser Manager
var parserMgr=require("csvtojson").parserMgr;
parserMgr.addParser("myParserName",/^\*parserRegExp\*/,function (params){
var columnTitle=params.head; //params.head be like: *parserRegExp*ColumnName;
var fieldName=columnTitle.replace(this.regExp, ""); //this.regExp is the regular expression above.
params.resultRow[fieldName]="Hello my parser"+params.item;
});
parserMgr’s addParser function take three parameters:
parser name: the name of your parser. It should be unique.
Regular Expression: It is used to test if a column of CSV data is using this parser. In the example above any column’s first row starting with parserRegExp will be using it.
Parse function call back: It is where the parse happens. The converter works row by row and therefore the function will be called each time needs to parse a cell in CSV data.
The parameter of Parse function is a JSON object. It contains following fields:
head: The column’s first row’s data. It generally contains field information. e.g. arrayitems
item: The data inside current cell. e.g. item1
itemIndex: the index of current cell of a row. e.g. 0
rawRow: the reference of current row in array format. e.g. [“item1”, 23 ,“hello”]
resultRow: the reference of result row in JSON format. e.g. {“name”:“Joe”}
rowIndex: the index of current row in CSV data. start from 1 since 0 is the head. e.g. 1
resultObject: the reference of result object in JSON format. It always has a field called csvRows which is in Array format. It changes as parsing going on. e.g.
{
"csvRows":[
{
"itemName":"item1",
"number":10
},
{
"itemName":"item2",
"number":4
}
]
}
#Stream Options
Since version 1.0.0, the Converter constructor takes stream options as second parameter.
const conv=new Converter(params,{
objectMode:true, // stream down JSON object instead of JSON array
highWaterMark:65535 //Buffer level
})
See more detailed information here.
#Change Log
new Converter(true)
Since version 0.3, the core class of csvtojson has been inheriting from stream.Transform class. Therefore, it will behave like a normal Stream object and CSV features will not be available any more. Now the usage is like:
//Converter Class
var fs = require("fs");
var Converter = require("csvtojson").Converter;
var fileStream = fs.createReadStream("./file.csv");
//new converter instance
var converter = new Converter({constructResult:true});
//end_parsed will be emitted once parsing finished
converter.on("end_parsed", function (jsonObj) {
console.log(jsonObj); //here is your result json object
});
//read from file
fileStream.pipe(converter);
To convert from a string, previously the code was:
csvConverter.from(csvString);
Now it is:
csvConverter.fromString(csvString, callback);
The callback function above is optional. see Parse String.
After version 0.3, csvtojson requires node 0.10 and above.
-+ ↩︎