Loading...
Logo
Processing Forum

Dear all readers,


I'm currently working on a tool that looks up related topics with the Wikipedia API. 

The idea:

Search for one topic to get back related topics. Then these related topics should also be used in a new search. These words should also send back their related topics.

The next code does only the first part (getting all the results for one keyword):
Copy code

  1. XML myXML;
  2. XML[] links;

  3. String keyword = "anders";

  4. String query;

  5. String search1;
  6. String search2;



  7. XML myXML2;
  8. XML[] links2;





  9. void setup() {

  10. }


  11. void draw() {

  12.   query = "http://nl.wikipedia.org/w/api.php?titles="+keyword+"&format=xml&action=query&prop=links&pllimit=500";
  13.   
  14.   try {
  15.     myXML = loadXML(query);
  16.     links = myXML.getChildren("query/pages/page/links/pl"); 

  17.     for (int i = 0; i < links.length; i++) {
  18.       String title = links[i].getString("title");
  19.       
  20.      search1  = search1+","+title;


  21.     }
  22.   } 
  23.   catch (Exception exception) {
  24.     println(exception);
  25.   }
  26.   
  27. println(search1);
  28.   
  29.     noLoop();
  30. }

This code should get the result of the first word, and then uses the results to search again, so every result of the first search should be a keyword to get results from.


Copy code


  1. XML myXML;
  2. XML[] links;

  3. String keyword = "anders";

  4. String query;

  5. String search1;
  6. String search2;



  7. XML myXML2;
  8. XML[] links2;





  9. void setup() {

  10. }


  11. void draw() {

  12.   query = "http://nl.wikipedia.org/w/api.php?titles="+keyword+"&format=xml&action=query&prop=links&pllimit=500";
  13.   
  14.   try {
  15.     myXML = loadXML(query);
  16.     links = myXML.getChildren("query/pages/page/links/pl"); 

  17.     for (int i = 0; i < links.length; i++) {
  18.       String title = links[i].getString("title");
  19.       
  20.      search1  = search1+","+title;


  21.     }
  22.   } 
  23.   catch (Exception exception) {
  24.     println(exception);
  25.   }
  26.   
  27.   
  28.   String[] search1_results = split(search1, ',');

  29.  

  30.    
  31.     for (int i = 0; i < search1_results.length; i++) {
  32.       

  33.     query = "http://nl.wikipedia.org/w/api.php?titles="+search1_results[i]+"&format=xml&action=query&prop=links&pllimit=500";

  34.        myXML2 = loadXML(query);
  35.        links = myXML.getChildren("query/pages/page/links/pl"); 

  36.        for (int i2 = 0; i2 < links.length; i2++) {

  37.       String title = links[i2].getString("title");
  38.       
  39.   
  40.       search2  = search1+","+title;
  41.    
  42.           }
  43.     }
  44.   
  45.   
  46.   
  47.       println(search1);

  48.   
  49.   
  50. }


The code above, however, is not working. I can't see what I'm doing wrong at this point. Hope you can help me out with this code.


Best,


Joshua


Replies(7)


here is a working version

it still throws a lot of excep

I catched them with
  • checking search1_results[i] for ""
  • and myXML2 for null

Copy code
  1. // This code should get the result of the first word, and then uses the results to search again, so every result of the first search should be a keyword to get results from.
  2. XML myXML;
  3. XML[] links;
  4. String keyword = "anders";
  5. String query;
  6. String search1;
  7. String search2;
  8. XML myXML2;
  9. XML[] links2 = new XML[0];
  10. void setup() {
  11.   noLoop();
  12. }
  13. void draw() {
  14.   query = "http://nl.wikipedia.org/w/api.php?titles="+keyword+"&format=xml&action=query&prop=links&pllimit=500";
  15.   try {
  16.     myXML = loadXML(query);
  17.     links = myXML.getChildren("query/pages/page/links/pl");
  18.     for (int i = 0; i < links.length; i++) {
  19.       String title = links[i].getString("title");
  20.       search1  = search1+","+title;
  21.     }
  22.   }
  23.   catch (Exception exception) {
  24.     println(exception);
  25.   }
  26.   String[] search1_results = split(search1, ',');
  27.   // second loop
  28.   search2  = "______________________________________\n";
  29.   for (int i = 0; i < search1_results.length; i++) {
  30.     // println ( search1_results[i] );
  31.     search2  = search2 + "For "+search1_results[i]+":\n";
  32.     if (!search1_results[i].equals("")) {
  33.       query = "http://nl.wikipedia.org/w/api.php?titles="+search1_results[i]+"&format=xml&action=query&prop=links&pllimit=500";
  34.       myXML2 = loadXML(query);
  35.       if (myXML2!=null) {
  36.         links = myXML2.getChildren("query/pages/page/links/pl");
  37.         for (int i2 = 0; i2 < links.length; i2++) {
  38.           String title = links[i2].getString("title");
  39.           search2  = search2+","+title;
  40.         } // for
  41.         search2  = search2+"\n";
  42.       } // if
  43.       else println ("xml was null");
  44.     } // if
  45.     else println ("search1_results[i] was empty");
  46.   } // for
  47.   println(search2);
  48. }

Greetings, Chrisir   

If you need an answer, please send me a personal message since this forum doesn't notify.
thank you! 

I also cleared out the exceps:

Copy code
  1. // This code should get the result of the first word, and then uses the results to search again, so every result of the first search should be a keyword to get results from.
  2. XML myXML;
  3. XML[] links;
  4. String keyword = "anders";
  5. String query;
  6. String search1;
  7. String search2;
  8. XML myXML2;
  9. XML[] links2 = new XML[0];
  10. void setup() {
  11.   noLoop();
  12. }
  13. void draw() {
  14.   query = "http://nl.wikipedia.org/w/api.php?titles="+keyword+"&format=xml&action=query&prop=links&pllimit=500";
  15.   try {
  16.     myXML = loadXML(query);
  17.     links = myXML.getChildren("query/pages/page/links/pl"); 
  18.     for (int i = 0; i < links.length; i++) {
  19.       String title = links[i].getString("title");
  20.       search1  = search1+","+title;
  21.     }
  22.   } 
  23.   catch (Exception exception) {
  24.     println(exception);
  25.   }
  26.   String[] search1_results = split(search1, ',');
  27.   // second loop
  28.   search2  = "______________________________________\n";
  29.   for (int i = 0; i < search1_results.length; i++) {
  30.     // println ( search1_results[i] );
  31.     search2  = search2 + "For "+search1_results[i]+":\n"; 
  32.     if (!search1_results[i].equals("")) { 
  33.       query = "http://nl.wikipedia.org/w/api.php?titles="+search1_results[i]+"&format=xml&action=query&prop=links&pllimit=500";
  34.       
  35.   query = query.replaceAll(" ","&20");
  36.       
  37.       myXML2 = loadXML(query);
  38.       if (myXML2!=null) {
  39.         links = myXML2.getChildren("query/pages/page/links/pl"); 
  40.         for (int i2 = 0; i2 < links.length; i2++) {
  41.           String title = links[i2].getString("title");
  42.           
  43.             title = title.replaceAll("Wikipedia:Doorverwijspagina","");
  44.           
  45.           search2  = search2+","+title;
  46.         } // for 
  47.         search2  = search2+"\n";
  48.       } // if
  49.       else println ("xml was null");
  50.     } // if
  51.     else println ("search1_results[i] was empty");
  52.   } // for 
  53.   println(search2);
  54. }

This was the trick:

  query = query.replaceAll(" ","&20");

The wikisearch doesn't support spaces. They need to be properly replaced by &20. 


Thank you so much!

nice!

I also have an entry with your program
For W?adys?aw Anders:

Maybe you need to check how the ? comes in and replace it equally?
like   query = query.replaceAll(" ","&20");

could be & or å or so.....

ah, I looked it up, it's Władysław
so it's the ł here

but it's wrong in the xml already, so no use

  query = query.replaceAll(" ","&20");

no idea...

Greetings, Chrisir     


I think it's a encoding error. There's a fix for this. Had the same problem a year ago. Need to look up how.


Still one error though:

First query is NULL, so it looks for everything with NULL in wiki. Weird. 
The &20 sounds suspicious. In HTML encoding, it would be &20; In URL encoding (more probably for a query), it would be %20
The page is probably encoded in UTF-8, so the special characters are decomposed in several 8-bit chars, and recomposed to UTF-16 Java chars thereafter. But if the font used to display them doesn't support this code page, you won't see them properly.
About the null query, I don't know where you see that. Can you show the current state of your code, if different from above? Is the whole query variable null, or only the keyword? Since it is initialized at declaration, I don't see why it would be null.

he is searching for null (as a string: the keyword has the String entry "null")
so maybe the api is giving "null" at 1st entry?
It's a String, not a var that is not defined imho



______________________________________
For null:
,/dev/null,ACID,Aggregaatfunctie,Alter (SQL),Axioma's van Armstrong,CRUD,C (programmeertaal),Commit,Create (SQL),Cursor (databank),Data Control Language,Data Definition Language,Data Manipulation Language,Databankmodel,Database,Databasemanagementsysteem,Databasenormalisatie,Datatype,Delete (SQL),Drop (SQL),From (SQL),Index (databank),Informatica,Insert (SQL),Join (SQL),Kandidaatsleutel,Lijst van relationele databases,Merge (SQL),Opgeslagen procedure,Partitie (databank),Pointer (programmeerconcept),Primaire sleutel,Programmeertaal,Referentiële integriteit,Relatie (databank),Relationeel model,Relationele algebra,Relationele database,Rollback (dataopslag),SQL,Select (SQL),Tabel (databank),Transactie (dataopslag),Trigger (databank),Truncate (SQL),Union (SQL),Unix,Update (SQL),Variabele (informatica),View (databank),Vreemde sleutel,Where (SQL)
For Anders (voornaam):
,Anders (voornaam),Anders Hechtel-Eksel,Andersen,Anderson,Andrea Anders,Andrea Anders (James Bond),Brouwerij Anders!,David Anders,Hans Anders,Hendrik Anders,Samuel Anders,Thomas Anders,Wiktionary,William Anders,W?adys?aw Anders,
For Anders Hechtel-Eksel:



as if null would be a search result:
see line 34



Copy code
  1. // This code should get the result of the first word, and then uses the results to search again,
  2. // so every result of the first search should be a keyword to get results from.
  3. XML myXML;
  4. XML[] links;
  5. String keyword = "anders";
  6. String query;
  7. String search1;
  8. String search2;
  9. XML myXML2;
  10. XML[] links2 = new XML[0];
  11. void setup() {
  12.   noLoop();
  13. }
  14. void draw() {
  15.   query = "http://nl.wikipedia.org/w/api.php?titles="+keyword+"&format=xml&action=query&prop=links&pllimit=500";
  16.   try {
  17.     myXML = loadXML(query);
  18.     links = myXML.getChildren("query/pages/page/links/pl");
  19.     //println(links );
  20.     for (int i = 0; i < links.length; i++) {
  21.       String title = links[i].getString("title");
  22.       //println(title);
  23.       search1  = search1+","+title;
  24.     }
  25.   }
  26.   catch (Exception exception) {
  27.     println(exception);
  28.   }
  29.   String[] search1_results = split(search1, ',');
  30.   // second loop
  31.   search2  = "______________________________________\n";
  32.   for (int i = 0; i < search1_results.length; i++) {
  33.     // println ( search1_results[i] );
  34.     search2  = search2 + "For "+search1_results[i]+":\n";
  35.     if (!search1_results[i].equals("")) {
  36.       query = "http://nl.wikipedia.org/w/api.php?titles="+search1_results[i]+"&format=xml&action=query&prop=links&pllimit=500";
  37.       query = query.replaceAll(" ", "&20");
  38.       myXML2 = loadXML(query);
  39.       if (myXML2!=null) {
  40.         links = myXML2.getChildren("query/pages/page/links/pl");
  41.         for (int i2 = 0; i2 < links.length; i2++) {
  42.           String title = links[i2].getString("title");
  43.           title = title.replaceAll("Wikipedia:Doorverwijspagina", "");
  44.           search2  = search2+","+title;
  45.         } // for
  46.         search2  = search2+"\n";
  47.       } // if
  48.       else println ("xml was null");
  49.     } // if
  50.     else println ("search1_results[i] was empty");
  51.   } // for
  52.   println(search2);
  53. }



Greetings, Chrisir


Ah, he wrote "first query", so the wording was ambiguous.
The null is there because search1 is not initialized, so it is null when entering the loop, in line 23.
I suggest to make an array of size  links.length , to fill it with the titles, then to use join() to build search1.
Uh, no, I don't see the point to build a string from an array to split it thereafter to get an array!
Just use the resulting array as search1_results.