Loading...
Logo
Processing Forum
So I've written this program that takes in 6 datasheets and graphs them as you click on the relevant tab. I thought that the plots looked a little strange before, but now that I look at what my program is printing to the console, I can see that something is wrong here...

I'm definitely looking at the same data that's supplied to my program, but for whatever reason the figures that populate the cells are coming back just a bit off. In the worst case, there are entire rows that won't show up in my graph.

If it's any help, I got these data sheets from a government website that allows you to download their data in the form of an Excel sheet. I was told to load it into Excel, then save as a tab delimited text file. So I've done all of that and here I am.

Any ideas on why many of my figures are just slightly off?

Replies(1)

Here is the class that's used to clean up and hold the data. It was originally made to store floats, but I tailored it to hold Strings.

// first line of the file should be the column headers
// first column should be the row titles
// all other values are expected to be floats
// getFloat(0, 0) returns the first data value in the upper lefthand corner
// files should be saved as "text, tab-delimited"
// empty rows are ignored
// extra whitespace is ignored


class EntryTable {
  int rowCount;
  int columnCount;
  //float[][] data;  Must make it hold Strings instead
  float[][] sumData;
  String[][] data;
  String[] rowNames;
  String[] columnNames;
  
  
  EntryTable(String filename) {
    String[] rows = loadStrings(filename);
    
    String[] columns = split(rows[0], TAB);
    columnNames = subset(columns, 1); // upper-left corner ignored
    scrubQuotes(columnNames);
    columnCount = columnNames.length;

    rowNames = new String[rows.length-1];
    //data = new float[rows.length-1][];  Must replace with String holding equivalent
    data = new String[rows.length-1][];

    // start reading at row 1, because the first row was only the column headers
    for (int i = 1; i < rows.length; i++) {
      if (trim(rows[i]).length() == 0) {
        continue; // skip empty rows
      }
      if (rows[i].startsWith("#")) {
        continue;  // skip comment lines
      }

      // split the row on the tabs
      String[] pieces = split(rows[i], TAB);
      scrubQuotes(pieces);
      
      // copy row title
      rowNames[rowCount] = pieces[0];
      // copy data into the table starting at pieces[1]
      data[rowCount] = subset(pieces, 1);

      // increment the number of valid rows found so far
      rowCount++;      
    }
    // resize the 'data' array as necessary
    data = (String[][]) subset(data, 0, rowCount);
  }
  
  
  void scrubQuotes(String[] array) {
    for (int i = 0; i < array.length; i++) {
      if (array[i].length() > 2) {
        // remove quotes at start and end, if present
        if (array[i].startsWith("\"") && array[i].endsWith("\"")) {
          array[i] = array[i].substring(1, array[i].length() - 1);
        }
      }
      // make double quotes into single quotes
      array[i] = array[i].replaceAll("\"\"", "\"");
    }
  }
  
  
  int getRowCount() {
    return rowCount;
  }
  
  
  String getRowName(int rowIndex) {
    return rowNames[rowIndex];
  }
  
  
  String[] getRowNames() {
    return rowNames;
  }

  
  // Find a row by its name, returns -1 if no row found. 
  // This will return the index of the first row with this name.
  // A more efficient version of this function would put row names
  // into a Hashtable (or HashMap) that would map to an integer for the row.
  int getRowIndex(String name) {
    for (int i = 0; i < rowCount; i++) {
      if (rowNames[i].equals(name)) {
        return i;
      }
    }
    //println("No row named '" + name + "' was found");
    return -1;
  }
  
  
  // technically, this only returns the number of columns 
  // in the very first row (which will be most accurate)
  int getColumnCount() {
    return columnCount;
  }
  
  
  String getColumnName(int colIndex) {
    return columnNames[colIndex];
  }
  
  
  String[] getColumnNames() {
    return columnNames;
  } 
  
  
  String getEntry(int rowIndex, int col){
  //Some training wheels
  if((rowIndex < 0) || (rowIndex >= data.length)){
    throw new RuntimeException("There is no row " + rowIndex);
  }
  if((col < 0) || (col >= data[rowIndex].length)) {
  throw new RuntimeException("Row " + rowIndex + " does not have a column " + col);    
  }
  //End of training wheels
  
  return data[rowIndex][col];
  }
 
 /*
//Can't use
  boolean isValid(int row, int col) {
    if (row < 0) return false;
    if (row >= rowCount) return false;
    //if (col >= columnCount) return false;
    if (col >= data[row].length) return false;
    if (col < 0) return false;
    return !Float.isNaN(data[row][col]);
  } */
  
  //This is the string version... use when checking for entries that arent necessarily numbers
 boolean isValid(int row, int col) {
    if (row < 0) return false;
    if (row >= rowCount) return false;
    //if (col >= columnCount) return false;
    if (col >= data[row].length) return false;
    if (col < 0) return false;
    return true;
    //return (!Float.isNaN(float(data[row][col])) || data[row][col] == "--" || data[row][col] == "NA");
 }
    
  float getColumnMin(int col) {
    float m = Float.MAX_VALUE;
    for (int i = 0; i < rowCount; i++) {
      if (!Float.isNaN(float(data[i][col]))) {
        if (float(data[i][col]) < m) {
          m = float(data[i][col]);
        }
      }
    }
    return m;
  }

  
  float getColumnMax(int col) {
    float m = -Float.MAX_VALUE;
    for (int i = 0; i < rowCount; i++) {
      if (isValid(i, col) && !Float.isNaN(float(data[i][col]))) {
        if (float(data[i][col]) > m) {
          m = float(data[i][col]);
        }
      }
    }
    return m;
  }

  
  float getRowMin(int row) {
    float m = Float.MAX_VALUE;
    for (int i = 0; i < columnCount; i++) {
      if (isValid(row, i) && !Float.isNaN(float(data[row][i]))) {
        if (float(data[row][i]) < m) {
          m = float(data[row][i]);
        }
      }
    }
    return m;
  } 

  
  float getRowMax(int row) {
    float m = -Float.MAX_VALUE;
    for (int i = 1; i < columnCount; i++) {
      if (!Float.isNaN(float(data[row][i]))) {
        if (float(data[row][i]) > m) {
          m = float(data[row][i]);
        }
      }
    }
    return m;
  }
  
  
  float getTableMin() {
    float m = Float.MAX_VALUE;
    for (int i = 0; i < rowCount; i++) {
      for (int j = 0; j < columnCount; j++) {
        if (isValid(i, j) && !Float.isNaN(float(data[i][j]))) {
          if (float(data[i][j]) < m) {
            m = float(data[i][j]);
          }
        }
      }
    }
    return m;
  }

  
  float getTableMax() {
    float m = -Float.MAX_VALUE;
    for (int i = 0; i < rowCount; i++) {
      for (int j = 0; j < columnCount; j++) {
        if (isValid(i, j) && !Float.isNaN(float(data[i][j]))) {
          if (float(data[i][j]) > m) {
            m = float(data[i][j]);
          }
        }
      }
    }
    return m;
  }


void calculateSortSumData(){
   int rows = data.length;
   
   sumData = new float[rows][2];   //first create new data structure to hold sums of data(col = 1) and their respctive country indices(col = 0)
   
  float sumOfRow;
  float holder;
  
  for(int i = 0; i < rows; i++){
    sumOfRow = 0; //reset the sum variable so that it may house the upcoming country's sum of data
    sumData[i][0] = i; //define which row(country) id you are storing
  
    for(int j = 0; j < columnCount; j++){
     if (isValid(i, j) && !Float.isNaN(float(data[i][j]))){ //if data is valid at each column check & does not contain  '--' or 'NaN'
     holder = float(getEntry(i, j));
     sumOfRow += holder; //add to sum variable
     }
    sumData[i][1] =sumOfRow;
  // println("Total for Row " + i + ": " + sumData[i][1]);
    }
 }
 //Now reorganize the new array's rows from smallest to largest sum value.
  
 sortSumData();

 }


void sortSumData() {
  int rows = data.length;
  
  float[] temp = new float[2];
  
  for(int completeness = 0; completeness < rowCount; completeness++){
    for(int i = 0; i < rows - 1; i++){//Need to sort by whole rows so that we can remember the original index number and therefore, it's respective country
      if(sumData[i][1] < sumData[i + 1][1]) {
        temp = sumData[i+1];
        sumData[i+1] = sumData[i];
        sumData[i] = temp;
    }
  }
}
}
}//End of class