Matching Strings
in
Programming Questions
•
1 year ago
I must match entries coming from arrays of Strings, strings are Author names, but the code doesn't work, as processing recognises as different equal strings
for instance the same author name written in the same way, parsed via a regex that makes sure spaces are not there, are not seen as equal, but they should be recognised as coincident (FROM LINE 170 to the end)
I've uploaded the file "MyEndNoteCopy.txt" on the following link:
https://rapidshare.com/files/3286203740/MyEndNoteCopy.txt
- String rows[] = loadStrings("MyEndNoteCopy.txt");
- String [][] csv; // initialize csv file
- //String [][] csv2; // initialize reduced csv file
- int csvWidth=0;
- String auToMatch; // Authors to Match: from citations
- String [] auToBeMatched; // Authors to be matched: from articles
- int colCit = 11; // column in which citations starts
- //calculate max width of csv file
- String regexSplit = "[;\\t]";
- for (int i=0; i < rows.length; i++)
- {
- rows[i] = rows[i].replaceAll("\t", ";");
- String [] columns = split(rows[i],";"); //separatore ;
- if (columns.length>csvWidth)
- {
- csvWidth=columns.length;
- }
- //print(colonna.length);
- }
- //print(csvWidth);
- //create csv array based on # of rows and columns in csv file
- csv = new String [rows.length][csvWidth];
- //parse values into 2d array
- for (int i=0; i < rows.length; i++) {
- String [] temp = new String [csvWidth];
- temp= split(rows[i], ';');
- for (int j=0; j < temp.length; j++){
- temp[j]=temp[j].replaceAll(" ", ""); //toglie gli spazi che appaiono all'inizio di ogni citazione
- temp[j] = temp[j].toLowerCase();
- csv[i][j]=temp[j];
- }
- }
- //test
- println(csv[0][0]); //colonna 11 è la prima citazione!
- //println(csv.length);
- //println(csv[0].length);
- //println(((row-1)/2));
- int col = csv[0].length;
- int row = csv.length;
- int newR;
- //print(csv2[303][1]);
- //print(csv2[1].length);
- // references authors and matching
- //
- String wordRegex = "\\b\\w+\\b"; //regex to match
- String digitRegex = "\\b(\\d+)\\b";
- //
- //
- // CREATE LISTS OF Key 1stAu Jou Page nRef ((REMEMBER to change ROW var name)
- //
- //
- int [] keyNo = new int[row];
- String [] firstAuthor = new String[row];
- String [] yr = new String[row];
- String [] jou = new String[row];
- String [] page = new String[row];
- int [] nRef = new int[row]; //Numero cit: utilizzato come lunghezza vettore citazioni
- for (int r =0; r<row; r++)
- {
- keyNo[r] = r;
- String [] temp = split(csv[r][0],',');
- firstAuthor[r] = temp[0];
- firstAuthor[r] = trim(firstAuthor[r]);
- //year yr
- yr[r] = trim(csv[r][1]);
- //journal jou
- String temp1 = csv[r][2];
- temp1 = temp1.replace(".", "");
- jou[r] = trim(temp1);
- //page page
- if (csv[r][4] != null){
- String [] temp2 = split(csv[r][4],'-');
- page[r] = temp2[0];
- }
- //number of References nRef
- int refNo =0;
- for (int c = colCit; c<col; c++)
- {
- if (csv[r][c] != null)
- {
- String [] cell = split(csv[r][c],',');
- if (cell.length>2)
- {
- refNo = refNo+1;
- }
- }
- nRef[r] = refNo;
- }
- //TEST
- //println(keyNo[r] + "," + firstAuthor[r] + ", yr: " + yr[r] + ", jou: " + jou[r] + ", page: " + page[r] + ", # ref: " + nRef[r]);
- }
- String [] correggiErrore = match(firstAuthor[0],wordRegex);
- print (correggiErrore[0]);
- firstAuthor[0] = correggiErrore[0];
- //
- //
- // CREATE ARRAYS OF CITATIONS (auth, yr, jou, page) FOR EACH ROW
- //
- //
- String [] authorM;
- String [] yrM;
- String [] jouM;
- String [] pageM;
- int [] KeyCit; //qui inserisco le chiavi degli articoli citati
- //println("REFERENZA: "+ nRef[1]);
- for (int r = 0; r<1; r++){
- int noRef = nRef[r]; // numero referenza
- authorM = new String [noRef]; // definire la lunghezza del vettore
- yrM = new String [noRef];
- jouM = new String [noRef];
- // fill vectors
- int position = 0; //position of the citation in the vector
- for (int c = colCit; c<col; c++) {
- if (csv[r][c] != null)
- {
- String [] cell = split(csv[r][c],',');
- if (cell.length>2)
- {
- cell[0]= trim(cell[0]); //remove white spaces at the beginning of the string
- cell[0] = cell[0].replaceAll("\\s", "#");
- String [] tempAuthor = split(cell[0],'#');
- String tempYr = trim(cell[1]);
- String tempJou = trim(cell[2]);
- //println(tempAuthor[0]);
- authorM [position] = tempAuthor[0];
- yrM[position] = tempYr;
- jouM[position] = tempJou;
- position = position +1;
- }
- }
- }
- //println("AUTORI CITATI "+authorM[1]+","+yrM[1]+","+jouM[1]);
- int comuni = 0;
- // MATCHING VALUES (and decide whether to add new lines)
- for (int cit = 0; cit<noRef; cit++)
- {
- String [] temp1 = match(authorM[cit],wordRegex);
- println("citation author: " + temp1[0]);
- for (int paper =0; paper<1; paper++)
- {
- String [] temp = match(firstAuthor[paper],wordRegex);
- println("article author: " + temp[0]);
- println(temp1[0] == temp[0] + ",");
- //if (common[0] != null)
- //{
- // println(common[0]);
- //}
- //String [] temp= match(authorM[cit],firstAuthor[paper]); //&& yrM[cit] == yr[paper] && jouM[cit] == jou[paper])
- // print(temp[0]);
- /*
- if (temp[0] != null){
- comuni = comuni +1;
- }
- */
- }
- }
- }
1