large-scale data visualization
in
Programming Questions
•
5 months ago
Hi all,
I have a large scale data (~2,000,000) lines in a text file. Each line reads like this:
id hr mt t x y
where id is a number from 1 to ~10,000 representing a particular vehicle in the fleet, hr and mt represent the time (hour and minute), t =hr*60+mt, x and y are the location of the vehicle at this time. For example:
1 0 0 0 x1 y1
1 0 10 10 x2 y2
showing vehicle #1 is at [x1,y1] when time is 00:00 and is at [x2,y2] when time is 00:10.
The text file is sorted according to vehicle id then time (fourth column), such as:
/////////////////////////////////////////////////////////////
1 15 36 936 116.5117 39.9212
1 15 46 946 116.5114 39.9388
....
2 13 33 813 116.3642 39.8878
...
9611 23 48 1428 116.4150 40.0216
/////////////////////////////////////////////////////////
I worked with Processing to visualize the trajectories of these vehicles. Essentially I created a class "car". Each object of the car class stores the portion of data from the large text file (e.g., all lines starting with same id). Then the object will determine if it is running according to the time (global variable, from 00:00 to 24:00). If it runs, then calculate the position of next move and draw a line between the current position and the next position.
When I let the program runs, it doesn't show any lines on the screen, until the time reaches 24:00, then all lines are shown on the screen at once. First I thought it was because the data were too large. But using a small portion of it (100, 1,000 lines instead of 2,000,000 lines), it still does not show any visualizations until the end. I couldn't figure out what was the reason. Was it because I used class? I post the code here and hope you guys can help me take a look. Thanks a lot.
// raw data: X3 (id from 1, hr, mt, time from 1, width, height) sorted by id and then time
int control=0; //0->stop; 1->pause; 2->start
int time=1; //from 1 to largest time
int time_limit=30;
int time_max=630;
car[] taxi;
void setup(){
size(800,800);
background(0);
smooth();
frameRate(1);
loading();
loadingData();
background(0);
draw_buttons();
}
void draw(){
MouseControl();
if (control==2){
//start
while (time<=time_max){
fill(0,50);
noStroke();
rect(0,30,width,height-30);
strokeWeight(1);
stroke(10,250,250);
for (int i=0; i<taxi.length; i++){
taxi[i].update_run();
taxi[i].move();
taxi[i].update_idx();
if (taxi[i].run==true){
line(taxi[i].source.x,taxi[i].source.y,taxi[i].target.x,taxi[i].target.y);
}
}
timer(time);
time++;
}
control=1;
}
if (control==0){
//end
time=1;
background(0);
draw_buttons();
}
}
void timer(int f){
textSize(20);
textAlign(CENTER,TOP);
stroke(0);
fill(0);
rect(0,0,width-85,50);
fill(255);
String[] tm=new String[2];
tm[1]=str((30+time)-60*floor((30+time)/60));
tm[0]=str(13+floor((30+time)/60));
for (int i=0; i<tm.length; i++){
if (tm[i].length()==1){
tm[i]="0"+tm[i];
}
}
text(tm[0]+":"+tm[1],width/2,30);
}
void loadingData(){
String[] raw=loadStrings("Sa3.txt");
String[] col=splitTokens(raw[raw.length-1]);
float nn=float(col[0]);
int n=int(nn);
taxi= new car[n];
int j=0;
int test=1;
int end=0;
for (int i=0; i<n; i++){
int start=j;
while (((i+1)==test)&&(j<raw.length-1)){
j++;
col=splitTokens(raw[j]);
float temp=float(col[0]);
test=int(temp);
}
if (j!=raw.length-1){
end=j-1;
}
else{
end=j;
}
float[][] data=new float[end-start+1][5];
for (int k=0; k<=end-start; k++){
col=splitTokens(raw[k+start]);
data[k][0]=float(col[1]);
data[k][1]=float(col[2]);
data[k][2]=float(col[3]);
//data[k][3]=width*0.9*(float(col[4])-116.05)/0.69+20;
//data[k][4]=height*0.9*(float(col[5])-116.05)/0.69+20;
data[k][3]=width*0.9*(float(col[4])-114.0891)/3.9016+20;
data[k][4]=height*0.9*(float(col[5])-37.1406)/3.9016+20;
//data[k][3]=float(col[4]);
//data[k][4]=float(col[5]);
}
taxi[i]=new car(data);
}
}
class car{
float[][] data;//data: (hr, mt, continouse time, width, height)
int idx=1; //time is between data[idx-1][2] and data[idx][2]
boolean run=false;
PVector source=new PVector(0,0);
PVector target=new PVector(0,0);
car(float[][] t_data){
data=t_data;
}
void update_run(){
if ((idx>=data.length)||(time<=data[0][2])){
run=false;
}
else{
if ((data[idx][2]-data[idx-1][2])>time_limit){
run=false;
}
else{
run=true;
}
}
}
void move(){
if (run==true){
/////////////////////////
float dx=(data[idx][3]-data[idx-1][3])/(data[idx][2]-data[idx-1][2]);
float dy=(data[idx][4]-data[idx-1][4])/(data[idx][2]-data[idx-1][2]);
source.x=data[idx-1][3]+(time-data[idx-1][2]-1)*dx;
target.x=data[idx-1][3]+(time-data[idx-1][2])*dx;
source.y=data[idx-1][4]+(time-data[idx-1][2]-1)*dy;
target.y=data[idx-1][4]+(time-data[idx-1][2])*dy;
}
}
void update_idx(){
if ((idx<data.length)&&(time==data[idx][2])){
idx++;
}
}
}
void loading(){
textSize(20);
textAlign(CENTER,CENTER);
stroke(255);
text("loading ...",width/2,height/2);
}
void draw_buttons(){
//three squares, width=height=20
stroke(255);
strokeWeight(1);
fill(0);
pushMatrix();
translate((width-80),5);
rect(0,0,20,20);//start
triangle(5,5,5,15,15,10);
rect(20,0,20,20);//pause
line(27,5,27,15);
line(33,5,33,15);
rect(40,0,20,20);//stop
rect(45,5,10,10);
popMatrix();
textSize(20);
textAlign(CENTER,TOP);
//stroke(255);
fill(255);
text("00:00",width/2,30);
}
void MouseControl(){
//control: 0->stop; 1->pause; 2->start
int x=width-80;
int y=5; //top-left point of the three-botton block
if ((mouseX>x)&&(mouseX<(x+20))&&(mouseY>y)&&(mouseY<(y+20))){
control=2;
}
if ((mouseX>(x+20))&&(mouseX<(x+40))&&(mouseY>y)&&(mouseY<(y+20))){
control=1;
}
if ((mouseX>(x+40))&&(mouseX<(x+60))&&(mouseY>y)&&(mouseY<(y+20))){
control=0;
}
}
I have a large scale data (~2,000,000) lines in a text file. Each line reads like this:
id hr mt t x y
where id is a number from 1 to ~10,000 representing a particular vehicle in the fleet, hr and mt represent the time (hour and minute), t =hr*60+mt, x and y are the location of the vehicle at this time. For example:
1 0 0 0 x1 y1
1 0 10 10 x2 y2
showing vehicle #1 is at [x1,y1] when time is 00:00 and is at [x2,y2] when time is 00:10.
The text file is sorted according to vehicle id then time (fourth column), such as:
/////////////////////////////////////////////////////////////
1 15 36 936 116.5117 39.9212
1 15 46 946 116.5114 39.9388
....
2 13 33 813 116.3642 39.8878
...
9611 23 48 1428 116.4150 40.0216
/////////////////////////////////////////////////////////
I worked with Processing to visualize the trajectories of these vehicles. Essentially I created a class "car". Each object of the car class stores the portion of data from the large text file (e.g., all lines starting with same id). Then the object will determine if it is running according to the time (global variable, from 00:00 to 24:00). If it runs, then calculate the position of next move and draw a line between the current position and the next position.
When I let the program runs, it doesn't show any lines on the screen, until the time reaches 24:00, then all lines are shown on the screen at once. First I thought it was because the data were too large. But using a small portion of it (100, 1,000 lines instead of 2,000,000 lines), it still does not show any visualizations until the end. I couldn't figure out what was the reason. Was it because I used class? I post the code here and hope you guys can help me take a look. Thanks a lot.
// raw data: X3 (id from 1, hr, mt, time from 1, width, height) sorted by id and then time
int control=0; //0->stop; 1->pause; 2->start
int time=1; //from 1 to largest time
int time_limit=30;
int time_max=630;
car[] taxi;
void setup(){
size(800,800);
background(0);
smooth();
frameRate(1);
loading();
loadingData();
background(0);
draw_buttons();
}
void draw(){
MouseControl();
if (control==2){
//start
while (time<=time_max){
fill(0,50);
noStroke();
rect(0,30,width,height-30);
strokeWeight(1);
stroke(10,250,250);
for (int i=0; i<taxi.length; i++){
taxi[i].update_run();
taxi[i].move();
taxi[i].update_idx();
if (taxi[i].run==true){
line(taxi[i].source.x,taxi[i].source.y,taxi[i].target.x,taxi[i].target.y);
}
}
timer(time);
time++;
}
control=1;
}
if (control==0){
//end
time=1;
background(0);
draw_buttons();
}
}
void timer(int f){
textSize(20);
textAlign(CENTER,TOP);
stroke(0);
fill(0);
rect(0,0,width-85,50);
fill(255);
String[] tm=new String[2];
tm[1]=str((30+time)-60*floor((30+time)/60));
tm[0]=str(13+floor((30+time)/60));
for (int i=0; i<tm.length; i++){
if (tm[i].length()==1){
tm[i]="0"+tm[i];
}
}
text(tm[0]+":"+tm[1],width/2,30);
}
void loadingData(){
String[] raw=loadStrings("Sa3.txt");
String[] col=splitTokens(raw[raw.length-1]);
float nn=float(col[0]);
int n=int(nn);
taxi= new car[n];
int j=0;
int test=1;
int end=0;
for (int i=0; i<n; i++){
int start=j;
while (((i+1)==test)&&(j<raw.length-1)){
j++;
col=splitTokens(raw[j]);
float temp=float(col[0]);
test=int(temp);
}
if (j!=raw.length-1){
end=j-1;
}
else{
end=j;
}
float[][] data=new float[end-start+1][5];
for (int k=0; k<=end-start; k++){
col=splitTokens(raw[k+start]);
data[k][0]=float(col[1]);
data[k][1]=float(col[2]);
data[k][2]=float(col[3]);
//data[k][3]=width*0.9*(float(col[4])-116.05)/0.69+20;
//data[k][4]=height*0.9*(float(col[5])-116.05)/0.69+20;
data[k][3]=width*0.9*(float(col[4])-114.0891)/3.9016+20;
data[k][4]=height*0.9*(float(col[5])-37.1406)/3.9016+20;
//data[k][3]=float(col[4]);
//data[k][4]=float(col[5]);
}
taxi[i]=new car(data);
}
}
class car{
float[][] data;//data: (hr, mt, continouse time, width, height)
int idx=1; //time is between data[idx-1][2] and data[idx][2]
boolean run=false;
PVector source=new PVector(0,0);
PVector target=new PVector(0,0);
car(float[][] t_data){
data=t_data;
}
void update_run(){
if ((idx>=data.length)||(time<=data[0][2])){
run=false;
}
else{
if ((data[idx][2]-data[idx-1][2])>time_limit){
run=false;
}
else{
run=true;
}
}
}
void move(){
if (run==true){
/////////////////////////
float dx=(data[idx][3]-data[idx-1][3])/(data[idx][2]-data[idx-1][2]);
float dy=(data[idx][4]-data[idx-1][4])/(data[idx][2]-data[idx-1][2]);
source.x=data[idx-1][3]+(time-data[idx-1][2]-1)*dx;
target.x=data[idx-1][3]+(time-data[idx-1][2])*dx;
source.y=data[idx-1][4]+(time-data[idx-1][2]-1)*dy;
target.y=data[idx-1][4]+(time-data[idx-1][2])*dy;
}
}
void update_idx(){
if ((idx<data.length)&&(time==data[idx][2])){
idx++;
}
}
}
void loading(){
textSize(20);
textAlign(CENTER,CENTER);
stroke(255);
text("loading ...",width/2,height/2);
}
void draw_buttons(){
//three squares, width=height=20
stroke(255);
strokeWeight(1);
fill(0);
pushMatrix();
translate((width-80),5);
rect(0,0,20,20);//start
triangle(5,5,5,15,15,10);
rect(20,0,20,20);//pause
line(27,5,27,15);
line(33,5,33,15);
rect(40,0,20,20);//stop
rect(45,5,10,10);
popMatrix();
textSize(20);
textAlign(CENTER,TOP);
//stroke(255);
fill(255);
text("00:00",width/2,30);
}
void MouseControl(){
//control: 0->stop; 1->pause; 2->start
int x=width-80;
int y=5; //top-left point of the three-botton block
if ((mouseX>x)&&(mouseX<(x+20))&&(mouseY>y)&&(mouseY<(y+20))){
control=2;
}
if ((mouseX>(x+20))&&(mouseX<(x+40))&&(mouseY>y)&&(mouseY<(y+20))){
control=1;
}
if ((mouseX>(x+40))&&(mouseX<(x+60))&&(mouseY>y)&&(mouseY<(y+20))){
control=0;
}
}
1