Combining Voice Recognition Code with Sound Visualisation
in
Contributed Library Questions
•
1 year ago
Hi All,
I'm in the process of combining a sound visualiser called fft (
http://www.youtube.com/watch?v=RBVseisC-VQ) and the STT library (
http://stt.getflourish.com/)
Initially I had taken all of the main functions and combined the code, which worked, although the speech recognition did not record and produce in a typographic format as it should.
I went back and cleaned up the code, however now I have received an error. I have attached a screenshot of both windows in order to display what I am working with.
I think where there may be an error due to the audioinput doing two things at one time, although I am unsure how to resolve this. Also, I would like the text on the visualisation to be deleted.
If anyone could help that would be greatly appreciated.
Kind regards,
- /**
- z_fft by zambari
- ** parts based on* Get Line In by Damien Di Fede.
- *
- early, not very developed version
- */
- import ddf.minim.analysis.*;
- import ddf.minim.*;
- import processing.opengl.*;
- import com.getflourish.stt.*;
- STT stt;
- String result;
- PFont fontA;
- Minim minim;
- Minim minim2;
- FFT fft;
- FFT fft2;
- AudioInput in;
- AudioInput in2;
- int nrOfIterations=100; //=29 fps on windows
- int iterationDistance=80;
- int bufferSizeSmall=512;
- int fftRatio=16; // how many times bigger is the big buffer for detailed analisis
- int bufferSizeBig=bufferSizeSmall*fftRatio;
- int fftHistSize=512;
- float[] logPos=new float[fftHistSize];
- float[][] fftHistory=new float[nrOfIterations][fftHistSize];
- int nextBuffer=0;
- Zcam myCamera;
- LFO lfo1;
- void setup()
- {
- size(1024, 576, OPENGL);
- textFont(createFont("SanSerif", 27));
- minim = new Minim(this);
- minim2 = new Minim(this);
- minim.debugOn();
- minim2.debugOn();
- in = minim.getLineIn(Minim.STEREO, bufferSizeBig);
- in2 = minim2.getLineIn(Minim.STEREO, bufferSizeSmall);
- fft = new FFT(in.bufferSize(), in.sampleRate());
- fft2 = new FFT(in2.bufferSize(), in2.sampleRate());
- myCamera = new Zcam();
- lfo1=new LFO(6000);
- addMouseWheelListener(new java.awt.event.MouseWheelListener() {
- public void mouseWheelMoved(java.awt.event.MouseWheelEvent evt) {
- mouseWheel(evt.getWheelRotation());
- }});
- for (int i=0;i<fftHistSize;i++) { logPos[i]=log(i)*40;};
- float fftMin=log(1);
- float fftMax=1/log(bufferSizeBig);
- stt = new STT(this);
- stt.enableDebug();
- stt.setLanguage("en");
- stt.enableAutoRecord();
- }
- void draw()
- {
- background(0);
- text(result, mouseX, mouseY);
- }
- // Method is called if transcription was successfull
- void transcribe (String utterance, float confidence)
- {
- println(utterance);
- result = utterance;
- }
- {
- myCamera.placeCam();
- scale(0.1);
- background(color(0,0,0,15));
- stroke(255);
- // draw the waveforms
- // for(int i = 0; i < in.bufferSize() - 1; i++)
- // {
- // line(i, 50 + in.left.get(i)*50, i+1, 50 + in.left.get(i+1)*50);
- // line(i, 150 + in.right.get(i)*50, i+1, 150 + in.right.get(i+1)*50);
- // }
- pushMatrix();
- scale(4);
- for(int i = 0; i < in2.bufferSize() - 1; i++)
- {
- line(i, 200+50 + in2.left.get(i)*50, i+1, 200+60 + in2.left.get(i+1)*50);
- line(i, 200+80 + in2.right.get(i)*50, i+1, 200+90 + in2.right.get(i+1)*50);
- }
- popMatrix();
- fft.forward(in.mix);
- fft2.forward(in2.mix);
- //fft2.forward(in2.mix);
- //void logAverages(int minBandwidth, int bandsPerOctave)
- //fft.logAverages(10, 2); //use once??
- // scene.beginDraw();
- float blendratio;
- for (int k=nrOfIterations-1;k>0;k--)
- // for(int i = 0; i < 172; i++) //buahahah dirty!!!
- for(int i = 0; i < 272; i++) //buahahah dirty!!!
- {
- // arrayCopy(fftHistory[k-1], fftHistory[k]);
- fftHistory[k][i]=fftHistory[k][i]*0.5+fftHistory[k-1][i]*0.5;
- }
- int n=0;
- for(int i = 1; i <fftHistSize ; i++)
- { blendratio=(iÿtRatio)/(fftRatio*1.0);
- fftHistory[0][n]=(fft2.getBand(i/(fftRatio))*(1-blendratio)+
- fft2.getBand(i/(fftRatio)+1)*(blendratio));
- // fftHistory[0][n]+=log(fftHistory[0][n])*10;
- fftHistory[0][n]=fft.getBand(i)*4;
- n++;
- // fftHistory[0][i]=fft.getBand(floor(map(1/log(i),fftMin,fftMax,0,bufferSizeBig)))*9;
- // fftHistory[0][i]=fft.getBand(i)*2;
- // line(i*20,(int)-fft.getBand(i)*4,(i+1)*20,(int)-fft.getBand(i+1)*4);
- if (i>50) i++;
- if (i>100) i++;
- if (i>200) i++;
- if (i>300) i++;
- if (i>400) i++;
- if (i>500) i++;
- }
- println(frameRate);
- float x=0;
- float oldx=0;
- for (int k=1;k<nrOfIterations;k++)
- {
- stroke(255-255*k/nrOfIterations);
- for(int i = 0; i < n-1; i++)
- { // fftHistory[k][i]=fftHistory[k-1][i]; // there must be a quicker way // circular buffer bayve?
- // line(i, -fftHistory[k-1][i],-k*30, i, -fftHistory[k][i],-k*20);
- oldx=x;
- // x=log(i)*40.0;
- x=logPos[i];
- // line(x*20, -fftHistory[k][i],-k*50, (x+1)*20, -fftHistory[k][i+1],-k*50);
- line(oldx*20, -fftHistory[k][i],-k*iterationDistance, x*20,-fftHistory[k][i+1],-k*iterationDistance);
- if (i==235)
- {
- // line(i*20,10,i*20,-20);
- }
- // if (i==0)
- // { line(i*20, -fftHistory[k-1][i],-k*50, (i)*20, -fftHistory[k][i],-(k+1)*50);
- // }
- if ((i==0)&&(k==1))
- text(i,x*20,10);
- }
- // line(i*20, -fftHistory[k][i],-k*30, i*20, -fftHistory[k][i+1],-k*30);
- }
- fill(255);
- resetMatrix();
- text("FFt1 val " + "ddD", 5, 20);
- text("The window being used is: ", 5, 40);
- // fftLin.linAverages(30);
- // fftLog = new FFT(jingle.bufferSize(), jingle.sampleRate());
- // calculate averages based on a miminum octave width of 22 Hz
- // split each octave into three bands
- // this should result in 30 averages
- }
- void stop()
- {
- //original comment : always close Minim audio classes when you are done with them
- in.close();
- minim.stop();
- super.stop();
- }
1