Parsing .csv
in
Programming Questions
•
1 year ago
I am working on cleaning up some data to use and wanted to replace all the commas with tabs and save it as a .tsv file but for some reason the code didn't pick up on all the commas to replace with tabs and I cant find a pattern as to why is missed a few....anyone had this problem?
For example:
# 2051 Marshfield Hills in the .tsv file has no tab or comma but in the original .csv file there was a comma there...why wasn't it replaced by a tab?
.csv file
2035,42.062204,-71.235774,FOXBORO,25
2038,42.08868,-71.404814,FRANKLIN,25
2040,41.970474,-70.701357,GREENBUSH,25
2041,42.069642,-70.649075,GREEN HARBOR,25
2043,42.212105,-70.884989,HINGHAM,25
2044,41.970474,-70.701357,HINGHAM,25
2045,42.284413,-70.873659,HULL,25
2047,42.142836,-70.69353,HUMAROCK,25
2048,42.013182,-71.218373,MANSFIELD,25
2050,42.111805,-70.710744,MARSHFIELD,25
2051,42.151202,-70.734146,MARSHFIELD HILLS,25
2052,42.181265,-71.309934,MEDFIELD,25
2053,42.156282,-71.427663,MEDWAY,25
2054,42.165249,-71.36126,MILLIS,25
.tsv file
2040 41.970474 -70.701357 GREENBUSH 25
2041 42.069642 -70.649075 GREEN HARBOR 25
2043 42.212105 -70.884989 HINGHAM 25
2044 41.970474 -70.701357 HINGHAM 25
2045 42.284413 -70.873659 HULL 25
2047 42.142836 -70.69353 HUMAROCK 25
2048 42.013182 -71.218373 MANSFIELD 25
2050 42.111805 -70.710744 MARSHFIELD 25
2051 42.151202 -70.734146 MARSHFIELD HILLS 25
2052 42.181265 -71.309934 MEDFIELD 25
2053 42.156282 -71.427663 MEDWAY 25
2054 42.165249 -71.36126 MILLIS 25
2055 41.970474 -70.701357 MINOT 25
For example:
# 2051 Marshfield Hills in the .tsv file has no tab or comma but in the original .csv file there was a comma there...why wasn't it replaced by a tab?
.csv file
2035,42.062204,-71.235774,FOXBORO,25
2038,42.08868,-71.404814,FRANKLIN,25
2040,41.970474,-70.701357,GREENBUSH,25
2041,42.069642,-70.649075,GREEN HARBOR,25
2043,42.212105,-70.884989,HINGHAM,25
2044,41.970474,-70.701357,HINGHAM,25
2045,42.284413,-70.873659,HULL,25
2047,42.142836,-70.69353,HUMAROCK,25
2048,42.013182,-71.218373,MANSFIELD,25
2050,42.111805,-70.710744,MARSHFIELD,25
2051,42.151202,-70.734146,MARSHFIELD HILLS,25
2052,42.181265,-71.309934,MEDFIELD,25
2053,42.156282,-71.427663,MEDWAY,25
2054,42.165249,-71.36126,MILLIS,25
.tsv file
2040 41.970474 -70.701357 GREENBUSH 25
2041 42.069642 -70.649075 GREEN HARBOR 25
2043 42.212105 -70.884989 HINGHAM 25
2044 41.970474 -70.701357 HINGHAM 25
2045 42.284413 -70.873659 HULL 25
2047 42.142836 -70.69353 HUMAROCK 25
2048 42.013182 -71.218373 MANSFIELD 25
2050 42.111805 -70.710744 MARSHFIELD 25
2051 42.151202 -70.734146 MARSHFIELD HILLS 25
2052 42.181265 -71.309934 MEDFIELD 25
2053 42.156282 -71.427663 MEDWAY 25
2054 42.165249 -71.36126 MILLIS 25
2055 41.970474 -70.701357 MINOT 25
- //Clean zipnov99.csv and save as cleanZips.tsv
- //load each line into an array of strings
- String[] zipLines;
- void setup() {
- zipLines = loadStrings("zipnov99.csv");
- PrintWriter tsv = createWriter("zips.tsv");
- makeTabs(zipLines);
- for(int i=0; i < zipLines.length; i++) {
- println(zipLines[i]);
- tsv.println(zipLines[i]);
- }
- }
- //change commas (",") to tabs (\t) and save as .tsv
- void makeTabs(String[] dataArray) {
- for(int i=0; i < dataArray.length; i++) {
- dataArray[i] = dataArray[i].replaceAll(",","\t");
- }
- }
Thank you!
1