Audio beacon is generating different frequency between 18 khz to 19 khz.I'm trying to record all frequency using AudioTrack Api.I refered this link How to get frequency from fft result?. I'm getting all data is 0 after apply hanning window function.1)how to apply hanning window? 2)how to filter frequency?3) I record different range frequency audio and save it in .wav formate.I'm reading that audio file and convert into frequency.But i'm getting high frqeuency only.How to get multiple peak frequency?
int fftSize = 1024;
public void startRecord() {
short[] bytebuff = new short[2 * fftSize];
while (started) {
int bufferReadResult = audioRecord.read(bytebuff, 0, bytebuff.length);
if (bufferReadResult >= 0) {
fft(bytebuff);
}
}
}
public void fft(short[] bufferByte) {
int N = bufferByte.length;
DoubleFFT_1D fft1d = new DoubleFFT_1D(N);
double[] fft = new double[N * 2];
double[] magnitude = new double[N / 2];
for (int i = 0; i < N; i++) {//Hann window function
bufferByte[i] = (byte) (bufferByte[i] * 0.5 * (1.0 - Math.cos(2.0 * Math.PI * i / (bufferByte.length))));//here i'm getting all data is zero.
}
for (int i = 0; i < N - 1; ++i) {
fft[2 * i] = bufferByte[i];
fft[2 * i + 1] = 0;
}
fft1d.complexForward(fft);
// calculate power spectrum (magnitude) values from fft[]
for (int i = 0; i < (N / 2) - 1; i++) {
double real = fft[2 * i];
double imaginary = fft[2 * i + 1];
magnitude[i] = Math.sqrt(real * real + imaginary * imaginary);
}
double max_magnitude = -1;
int max_index = -1;
for (int i = 0; i < (N / 2) - 1; i++) {
if (magnitude[i] > max_magnitude) {
max_magnitude = magnitude[i];
max_index = i;
}
}
int freq = max_index * 44100 / N;
Log.e("AudioBEacon", "---" + freq);
}
You have a bad cast here:
bufferByte[i] = (byte) (bufferByte[i] * ...
It should be:
bufferByte[i] = (short) (bufferByte[i] * ...
Related
in order to interpolate 2 values, I can use
lerp(int a, int b) {
return (a + b) / 2;
}
Now imagine I've an array(1, 30, 100, 300) and I want to interpolate it to array in size N (N=10 for example).
If N == 7, then:
1,15,30,65,100,200,300
I've no idea how to interpolate 4 values to be 10. I need a method that looks like:
interpolate(fina int[] input, final int newSize) {
int[] res = new int[newSize];
...
return res;
}
that works even on my example above with newSize of 7, 10 or whatever.
Any idea how to implement it?
SOLVED.
public static double[] interpolate(double[] x, int newLength) {
double[] y = null;
if (newLength > 0) {
int N = x.length;
if (N == 1) {
y = new double[1];
y[0] = x[0];
return y;
} else if (newLength == 1) {
y = new double[1];
int ind = (int) Math.floor(N * 0.5 + 0.5);
ind = Math.max(1, ind);
ind = Math.min(ind, N);
y[0] = x[ind - 1];
return y;
} else {
y = new double[newLength];
double Beta = ((double) newLength) / N;
double newBeta = 1.0;
if (newLength > 2)
newBeta = (N - 2.0) / (newLength - 2.0);
y[0] = x[0];
y[1] = x[1];
y[newLength - 1] = x[N - 1];
double tmp, alpha;
int i, j;
for (i = 2; i <= newLength - 2; i++) {
tmp = 1.0 + (i - 1) * newBeta;
j = (int) Math.floor(tmp);
alpha = tmp - j;
y[i] = (1.0 - alpha) * x[Math.max(0, j)] + alpha * x[Math.min(N - 1, j + 1)];
}
}
}
return y;
}
/**
* Find the maximum of all elements in the array, ignoring elements that are NaN.
* #param data
* #return
*/
public static double max(double[] data) {
double max = Double.NaN;
for (int i = 0; i < data.length; i++) {
if (Double.isNaN(data[i]))
continue;
if (Double.isNaN(max) || data[i] > max)
max = data[i];
}
return max;
}
public static int max(int[] data) {
int max = data[0];
for (int i = 1; i < data.length; i++) {
if (data[i] > max)
max = data[i];
}
return max;
}
Currently, I'm using JTransforms to perform FFTs on audio samples I duplicated off the MediaCodecAudioRenderer, customized with a hook to duplicate the decoded buffer, from an ExoPlayer player using an RTMP MediaSource.
What I got is a 4096 (or a 4608... yeah, some MP3s strangely had non-power-of-2 sample sizes and I don't know why)-length ByteBuffer. That's what I have to put into the FloatFFT_1D or the DoubleFFT_1D object, correct?
Right now my code is as follows:
crf.setHook((dupe, format) -> {
if(currentMediaSource == mMediaSourceAudio) {
byte[] data = new byte[dupe.limit()];
dupe.position(0);
dupe.get(data);
if(format != null) {
new FFTTask(data, format).execute();
}
Log.i("straight_from_renderer", data.length+" "+format);
}
});
...
...
private class FFTTask extends AsyncTask<Void, Void, float[]> {
byte[] bufferContents;
MediaFormat format;
FFTTask(byte[] samples, MediaFormat format) {
this.bufferContents = samples;
this.format = format;
}
float[] floatMe(short[] pcms) {
float[] floaters = new float[pcms.length];
for (int i = 0; i < pcms.length; i++) {
floaters[i] = pcms[i];
}
return floaters;
}
short[] shortMe(byte[] bytes) {
short[] out = new short[bytes.length / 2]; // will drop last byte if odd number
ByteBuffer bb = ByteBuffer.wrap(bytes);
for (int i = 0; i < out.length; i++) {
out[i] = bb.getShort();
}
return out;
}
float[] directFloatMe(byte[] bytes) {
float[] out = new float[bytes.length / 2]; // will drop last byte if odd number
ByteBuffer bb = ByteBuffer.wrap(bytes);
for (int i = 0; i < out.length; i++) {
out[i] = bb.getFloat();
}
return out;
}
private double db2(double r, double i, double maxSquared) {
return 5.0 * Math.log10((r * r + i * i) / maxSquared);
}
double[] convertToDb(double[] data, double maxSquared) {
data[0] = db2(data[0], 0.0, maxSquared);
int j = 1;
for (int i=1; i < data.length - 1; i+=2, j++) {
data[j] = db2(data[i], data[i+1], maxSquared);
}
data[j] = data[0];
return data;
}
#Override
protected float[] doInBackground(Void... voids) {
//WARNING: bufferContents is from a 2-channel 48k bitrate audio, so convert to mono first?
/*
byte[] oneChannel = new byte[bufferContents.length/2];
for(int i = 0; i < oneChannel.length; i+=2) {
oneChannel[i] = bufferContents[i*2+2];
oneChannel[i+1] = bufferContents[i*2+3];
}
*/
float[] dataAsFloats = floatMe(shortMe(bufferContents));
int fftLen = dataAsFloats.length/2;
fft = new FloatFFT_1D(fftLen);
fft.complexForward(dataAsFloats);
String log = "";
float[] magnitudes = new float[dataAsFloats.length/2];
float magMax = 0;
int maxIndex = 0;
float dominantFreq = 0;
for(int i = 0; i < dataAsFloats.length/2; i++) {
float re = dataAsFloats[2*i];
float im = dataAsFloats[2*i+1];
magnitudes[i] = (float)(Math.sqrt(re * re + im * im) / 1e7);
//log += re+" "+im+" "+magnitudes[i]+"\n";
if(magnitudes[i] > magMax) {
magMax = (float)(magnitudes[i]);
maxIndex = i;
}
}
dominantFreq = format.getInteger(MediaFormat.KEY_SAMPLE_RATE) * maxIndex / fftLen;
Log.i("fft_results", magMax+" "+dominantFreq);
return magnitudes;
}
#Override
protected void onPostExecute(float[] res) {
super.onPostExecute(res);
//fftListener.onFFTResultsAvailable(res);
caView.feedFFTMagnitudes(res);
}
}
I commented out the channel splitting code because I'm not sure whether to put that byte array in wholesale, or split it by channel then put the data from one of the channels in.
But then the final res values are really noisy - as in, the magnitudes are chaotic and display no obvious pattern as it should be from the usual audio analysis images - instead, it's just a tight zigzag, with high values from frequencies above 20000 Hz.
What am I doing wrong?
What I did wrong was the bytes-to-short conversion, turns out. Now I just use this function to convert two bytes to a short:
private short getSixteenBitSample(byte high, byte low) {
return (short)((high << 8) | (low & 0xff));
}
Then cast the short to a float in the conversion array. THEN put that conversion array through the FFT.
I want to calculate haralick features in android using jfeaturelib(which is basically for java) but I came to know that there is no implementation of ImageIO or BufferedImage in android as these are used in calculating haralick features in bellow code. These are only available in pure JAVA .
public void haralickFeatures(){
InputStream stream = HaralickDemo.class.getClassLoader().getResourceAsStream("test.jpg");
ColorProcessor image = new ColorProcessor(ImageIO.read(stream));
// initialize the descriptor
Haralick descriptor = new Haralick();
// run the descriptor and extract the features
descriptor.run(image);
// obtain the features
List<double[]> features = descriptor.getFeatures();
// print the features to system out
for (double[] feature : features) {
System.out.println(Arrays2.join(feature, ", ", "%.5f"));
}
}
Is there a way to calculate haralick features in android. Any code example will be great help. Thanks in advance.
As you mentioned you cannot use jfeaturelib to calculate haralick features because this library use certain classes that are only implemented in pure java but not android.
You can use my code which I had taken from jfeaturelib and modified it to fit to use for android.
First you have to create a java class in your android project and name it what you want(in my case I name it as GLCM)
public class GLCM {
static int totalPixels=0;
/**
* The number of gray values for the textures
*/
private final int NUM_GRAY_VALUES = 32;
/**
* p_(x+y) statistics
*/
private final double[] p_x_plus_y = new double[2 * NUM_GRAY_VALUES - 1];
/**
* p_(x-y) statistics
*/
private final double[] p_x_minus_y = new double[NUM_GRAY_VALUES];
/**
* row mean value
*/
private double mu_x = 0;
/**
* column mean value
*/
private double mu_y = 0;
/**
* row variance
*/
private double var_x = 0;
/**
* column variance
*/
private double var_y = 0;
/**
* HXY1 statistics
*/
private double hx = 0;
/**
* HXY2 statistics
*/
private double hy = 0;
/**
* HXY1 statistics
*/
private double hxy1 = 0;
/**
* HXY2 statistics
*/
private double hxy2 = 0;
/**
* p_x statistics
*/
private final double[] p_x = new double[NUM_GRAY_VALUES];
/**
* p_y statistics
*/
private final double[] p_y = new double[NUM_GRAY_VALUES];
// -
public List<double[]> data;
public int haralickDist;
double[] features = null;
static byte[] imageArray;
public void addData(double[] data) {
this.data.add(data);
}
public List<double[]> getFeatures() {
return data;
}
public void process(Bitmap b) {
features = new double[14];
Coocurrence coocurrence = new Coocurrence(b, NUM_GRAY_VALUES, this.haralickDist);
coocurrence.calculate();
double[][] cooccurrenceMatrix = coocurrence.getCooccurrenceMatrix();
double meanGrayValue = coocurrence.getMeanGrayValue();
normalize(cooccurrenceMatrix, coocurrence.getCooccurenceSums());
calculateStatistics(cooccurrenceMatrix);
double[][] p = cooccurrenceMatrix;
double[][] Q = new double[NUM_GRAY_VALUES][NUM_GRAY_VALUES];
for (int i = 0; i < NUM_GRAY_VALUES; i++) {
double sum_j_p_x_minus_y = 0;
for (int j = 0; j < NUM_GRAY_VALUES; j++) {
double p_ij = p[i][j];
sum_j_p_x_minus_y += j * p_x_minus_y[j];
features[0] += p_ij * p_ij;
features[2] += i * j * p_ij - mu_x * mu_y;
features[3] += (i - meanGrayValue) * (i - meanGrayValue) * p_ij;
features[4] += p_ij / (1 + (i - j) * (i - j));
features[8] += p_ij * log(p_ij);
// feature 13
if (p_ij != 0 && p_x[i] != 0) { // would result in 0
for (int k = 0; k < NUM_GRAY_VALUES; k++) {
if (p_y[k] != 0 && p[j][k] != 0) { // would result in NaN
Q[i][j] += (p_ij * p[j][k]) / (p_x[i] * p_y[k]);
}
}
}
}
features[1] += i * i * p_x_minus_y[i];
features[9] += (i - sum_j_p_x_minus_y) * (i - sum_j_p_x_minus_y) * p_x_minus_y[i];
features[10] += p_x_minus_y[i] * log(p_x_minus_y[i]);
}
// feature 13: Max Correlation Coefficient
double[] realEigenvaluesOfQ = new Matrix(Q).eig().getRealEigenvalues();
Arrays2.abs(realEigenvaluesOfQ);
Arrays.sort(realEigenvaluesOfQ);
features[13] = Math.sqrt(realEigenvaluesOfQ[realEigenvaluesOfQ.length - 2]);
features[2] /= Math.sqrt(var_x * var_y);
features[8] *= -1;
features[10] *= -1;
double maxhxhy = Math.max(hx, hy);
if (Math.signum(maxhxhy) == 0) {
features[11] = 0;
} else {
features[11] = (features[8] - hxy1) / maxhxhy;
}
features[12] = Math.sqrt(1 - Math.exp(-2 * (hxy2 - features[8])));
for (int i = 0; i < 2 * NUM_GRAY_VALUES - 1; i++) {
features[5] += i * p_x_plus_y[i];
features[7] += p_x_plus_y[i] * log(p_x_plus_y[i]);
double sum_j_p_x_plus_y = 0;
for (int j = 0; j < 2 * NUM_GRAY_VALUES - 1; j++) {
sum_j_p_x_plus_y += j * p_x_plus_y[j];
}
features[6] += (i - sum_j_p_x_plus_y) * (i - sum_j_p_x_plus_y) * p_x_plus_y[i];
}
features[7] *= -1;
}
/**
* Calculates the statistical properties.
*/
private void calculateStatistics(double[][] cooccurrenceMatrix) {
// p_x, p_y, p_x+y, p_x-y
for (int i = 0; i < NUM_GRAY_VALUES; i++) {
for (int j = 0; j < NUM_GRAY_VALUES; j++) {
double p_ij = cooccurrenceMatrix[i][j];
p_x[i] += p_ij;
p_y[j] += p_ij;
p_x_plus_y[i + j] += p_ij;
p_x_minus_y[Math.abs(i - j)] += p_ij;
}
}
// mean and variance values
double[] meanVar;
meanVar = meanVar(p_x);
mu_x = meanVar[0];
var_x = meanVar[1];
meanVar = meanVar(p_y);
mu_y = meanVar[0];
var_y = meanVar[1];
for (int i = 0; i < NUM_GRAY_VALUES; i++) {
// hx and hy
hx += p_x[i] * log(p_x[i]);
hy += p_y[i] * log(p_y[i]);
// hxy1 and hxy2
for (int j = 0; j < NUM_GRAY_VALUES; j++) {
double p_ij = cooccurrenceMatrix[i][j];
hxy1 += p_ij * log(p_x[i] * p_y[j]);
hxy2 += p_x[i] * p_y[j] * log(p_x[i] * p_y[j]);
}
}
hx *= -1;
hy *= -1;
hxy1 *= -1;
hxy2 *= -1;
}
/**
* Compute mean and variance of the given array
*
* #param a inut values
* #return array{mean, variance}
*/
private double[] meanVar(double[] a) {
// VAR(X) = E(X^2) - E(X)^2
// two-pass is numerically stable.
double ex = 0;
for (int i = 0; i < NUM_GRAY_VALUES; i++) {
ex += a[i];
}
ex /= a.length;
double var = 0;
for (int i = 0; i < NUM_GRAY_VALUES; i++) {
var += (a[i] - ex) * (a[i] - ex);
}
var /= (a.length - 1);
return new double[]{ex, var};
}
/**
* Returns the bound logarithm of the specified value.
*
* If Math.log would be Double.NEGATIVE_INFINITY, 0 is returned
*
* #param value the value for which the logarithm should be returned
* #return the logarithm of the specified value
*/
private double log(double value) {
double log = Math.log(value);
if (log == Double.NEGATIVE_INFINITY) {
log = 0;
}
return log;
}
/**
* Normalizes the array by the given sum. by dividing each 2nd dimension
* array componentwise by the sum.
*
* #param A
* #param sum
*/
private void normalize(double[][] A, double sum) {
for (double[] A1 : A) {
Arrays2.div(A1, sum);
}
}
//<editor-fold defaultstate="collapsed" desc="getter/Setter">
/**
* Getter for haralick distributions
*
* #return haralick distributions
*/
public int getHaralickDist() {
return haralickDist;
}
/**
* Setter for haralick distributions
*
* #param haralickDist int for haralick distributions (must be >= 1)
*/
public void setHaralickDist(int haralickDist) {
if (haralickDist <= 0) {
throw new IllegalArgumentException("the distance for haralick must be >= 1 but was " + haralickDist);
}
this.haralickDist = haralickDist;
}
//</editor-fold>
static class Coocurrence {
/**
* The number of gray values for the textures
*/
private final int NUM_GRAY_VALUES;
/**
* The number of gray levels in an image
*/
int GRAY_RANGES = 256;
/**
* The scale for the gray values for conversion rgb to gray values.
*/
double GRAY_SCALE;
/**
* gray histogram of the image.
*/
double[] grayHistogram;
/**
* Quantized gray values of each pixel of the image.
*
* Use int instead of byte as there is no unsigned byte in Java.
* Otherwise you'll have a hard time using white = 255. Alternative:
* replace with ImageJ ByteProcessor.
*/
private final int[] grayValue;
/**
* mean gray value
*/
private double meanGrayValue = 0;
/**
* The cooccurrence matrix
*/
private final double[][] cooccurrenceMatrices;
/**
* The value for one increment in the gray/color histograms.
*/
private final int HARALICK_DIST;
private final Bitmap image;
public Coocurrence(Bitmap b, int numGrayValues, int haralickDist) {
this.NUM_GRAY_VALUES = numGrayValues;
this.HARALICK_DIST = haralickDist;
this.cooccurrenceMatrices = new double[NUM_GRAY_VALUES][NUM_GRAY_VALUES];
this.image = b;
totalPixels=b.getHeight()*b.getWidth();
this.grayValue = new int[totalPixels];
}
void calculate() {
this.GRAY_SCALE = (double) GRAY_RANGES / (double) NUM_GRAY_VALUES;
this.grayHistogram = new double[GRAY_RANGES];
calculateGreyValues();
final int imageWidth = image.getWidth();
final int imageHeight = image.getHeight();
final int d = HARALICK_DIST;
final int yOffset = d * imageWidth;
int i, j, pos;
// image is not empty per default
for (int y = 0; y < imageHeight; y++) {
for (int x = 0; x < imageWidth; x++) {
pos = imageWidth * y + x;
// horizontal neighbor: 0 degrees
i = x - d;
if (i >= 0) {
increment(grayValue[pos], grayValue[pos - d]);
}
// vertical neighbor: 90 degree
j = y - d;
if (j >= 0) {
increment(grayValue[pos], grayValue[pos - yOffset]);
}
// 45 degree diagonal neigbor
i = x + d;
j = y - d;
if (i < imageWidth && j >= 0) {
increment(grayValue[pos], grayValue[pos + d - yOffset]);
}
// 135 vertical neighbor
i = x - d;
j = y - d;
if (i >= 0 && j >= 0) {
increment(grayValue[pos], grayValue[pos - d - yOffset]);
}
}
}
}
private void calculateGreyValues() {
final int size = grayValue.length;
double graySum = 0;
for (int pos = 0; pos < size; pos++) {
int gray = imageArray[pos]&0xff;
graySum += gray;
grayValue[pos] = (int) (gray / GRAY_SCALE); // quantized for texture analysis
assert grayValue[pos] >= 0 : grayValue[pos] + " > 0 violated";
grayHistogram[gray]++;
}
Arrays2.div(grayHistogram, size);
meanGrayValue = Math.floor(graySum / size / GRAY_SCALE)*GRAY_SCALE;
}
/**
* Incremets the coocurrence matrix at the specified positions (g1,g2)
* and (g2,g1) if g1 and g2 are in range.
*
* #param g1 the gray value of the first pixel
* #param g2 the gray value of the second pixel
*/
private void increment(int g1, int g2) {
cooccurrenceMatrices[g1][g2]++;
cooccurrenceMatrices[g2][g1]++;
}
public double getMeanGrayValue() {
return this.meanGrayValue;
}
public double[][] getCooccurrenceMatrix() {
return this.cooccurrenceMatrices;
}
public double getCooccurenceSums() {
// divide by R=8 neighbours
// see p.613, ยง2 of Haralick paper
return totalPixels * 8;
}
}
}
Now create object of that GLCM class in your main activity or in activity you want
GLCM glcm=new GLCM();
Next step is to copy past this function in your main activity or in activity you want. This function extract feature as you have to pass an image as bitmap and this function will return 14 haralick features in float array. Here is that function
public void haralickFeatures(Bitmap b) throws IOException {
glcm.haralickDist=1;
ByteArrayOutputStream stream = new ByteArrayOutputStream();
b.compress(Bitmap.CompressFormat.PNG, 90, stream); // what 90 does ??
GLCM.imageArray=new byte[]{};
GLCM.imageArray = stream.toByteArray();
glcm.process(b);
glcm.data = new ArrayList<>(1);
glcm.addData(glcm.features);
List<double[]> featuresHar=glcm.getFeatures();
for (double[] feature : featuresHar) {
featureString=Arrays2.join(feature, ",", "%.5f");
}
String[] featureStr=featureString.split(Pattern.quote(","));
float[] featureFlot = new float[featureStr.length];
for (int i=0;i<featureStr.length;i++){
featureFlot[i]=Float.parseFloat(featureStr[i]);
}
//featureFlot is array that contain all 14 haralick features
}
I have searched on stackoverflow for the fastest FFT algorithm, and I found the following:
public class FFT {
int n, m;
// Lookup tables. Only need to recompute when size of FFT changes.
double[] cos;
double[] sin;
public FFT(int n) {
this.n = n;
this.m = (int) (Math.log(n) / Math.log(2));
// Make sure n is a power of 2
if (n != (1 << m))
throw new RuntimeException("FFT length must be power of 2");
// precompute tables
cos = new double[n / 2];
sin = new double[n / 2];
for (int i = 0; i < n / 2; i++) {
cos[i] = Math.cos(-2 * Math.PI * i / n);
sin[i] = Math.sin(-2 * Math.PI * i / n);
}
}
public void fft(double[] x, double[] y) {
int i, j, k, n1, n2, a;
double c, s, t1, t2;
// Bit-reverse
j = 0;
n2 = n / 2;
for (i = 1; i < n - 1; i++) {
n1 = n2;
while (j >= n1) {
j = j - n1;
n1 = n1 / 2;
}
j = j + n1;
if (i < j) {
t1 = x[i];
x[i] = x[j];
x[j] = t1;
t1 = y[i];
y[i] = y[j];
y[j] = t1;
}
}
// FFT
n1 = 0;
n2 = 1;
for (i = 0; i < m; i++) {
n1 = n2;
n2 = n2 + n2;
a = 0;
for (j = 0; j < n1; j++) {
c = cos[a];
s = sin[a];
a += 1 << (m - i - 1);
for (k = j; k < n; k = k + n2) {
t1 = c * x[k + n1] - s * y[k + n1];
t2 = s * x[k + n1] + c * y[k + n1];
x[k + n1] = x[k] - t1;
y[k + n1] = y[k] - t2;
x[k] = x[k] + t1;
y[k] = y[k] + t2;
}
}
}
}
}
My question is that I have a MediaRecorder Object to capture audio as follows:
if (mRecorder == null) {
mRecorder = new MediaRecorder();
mRecorder.setAudioSource(MediaRecorder.AudioSource.MIC);
mRecorder.setOutputFormat(MediaRecorder.OutputFormat.THREE_GPP);
mRecorder.setAudioEncoder(MediaRecorder.AudioEncoder.AMR_NB);
mRecorder.setOutputFile("/dev/null");
try {
mRecorder.prepare();
} catch (IllegalStateException e) {
Log.e("error", "IllegalStateException");
} catch (IOException e) {
Log.e("error", "IOException");
;
}
mRecorder.start();
}
Now I want to use this FFT algorithm on my captured audio and show the results on an equalizer or something. How I can do that?
MediaRecorder doesn't give you access directly to the audio buffer, but if you use AudioRecord you can. If you must use MediaRecorder then perhaps save it to a file and then re-read the file back in again.
Someone has worked on an example here Capturing Sound for Analysis and Visualizing Frequencies in Android
I'm new in Android development. I'm looking for any method that applies pitch shifting to output sound (in real-time). But I couldn't find any point to start.
I've found this topic but I still don't know how can I apply this.
Any suggestions?
In general, the algorithm is called a phase vocoder -- searching for that on the Internets should get you started.
There are a few open source phase vocoders out there, you should be able to use those for reference too.
You can do phase vocoder in real-time -- the main component used is the FFT, so you'll need a fast FFT. The Android libraries can do this for you, see this documentation: http://developer.android.com/reference/android/media/audiofx/Visualizer.html
As it happens, I'm about to release an open source FFT for ARM that is faster than Apple's vDSP library (which was hitherto the fastest). I'll post back in a few days when I've uploaded it to github.com.
Good luck.
There is no built-in pitch shifting algorithm in the Android SDK. You have to code your own. Pitch shifting is a real hardcore DSP algorithm; good sounding algorithms are results of many months or rather years of development...
I personally do not know any Java implementation so I suggest you to adopt some of the free C++ PS algorithms, the best one - which I use in my audio applications, is SoundTouch:
http://www.surina.net/soundtouch/
I played with its code a little and it seems it would not be too much complicated to rewrite it in Java.
HOME URL: http://www.dspdimension.com
public class AudioPitch{
//region Private Static Memebers
private static int MAX_FRAME_LENGTH = 8192;
private static double M_PI = 3.14159265358979323846;
private static float[] gInFIFO = new float[MAX_FRAME_LENGTH];
private static float[] gOutFIFO = new float[MAX_FRAME_LENGTH];
private static float[] gFFTworksp = new float[2 * MAX_FRAME_LENGTH];
private static float[] gLastPhase = new float[MAX_FRAME_LENGTH / 2 + 1];
private static float[] gSumPhase = new float[MAX_FRAME_LENGTH / 2 + 1];
private static float[] gOutputAccum = new float[2 * MAX_FRAME_LENGTH];
private static float[] gAnaFreq = new float[MAX_FRAME_LENGTH];
private static float[] gAnaMagn = new float[MAX_FRAME_LENGTH];
private static float[] gSynFreq = new float[MAX_FRAME_LENGTH];
private static float[] gSynMagn = new float[MAX_FRAME_LENGTH];
private static long gRover;
//endregion
public static void PitchShift(float pitchShift, long numSampsToProcess, long fftFrameSize/*(long)2048*/, long osamp/*(long)10*/, float sampleRate, float[] indata)
{
double magn, phase, tmp, window, real, imag;
double freqPerBin, expct;
long i, k, qpd, index, inFifoLatency, stepSize, fftFrameSize2;
float[] outdata = indata;
/* set up some handy variables */
fftFrameSize2 = fftFrameSize / 2;
stepSize = fftFrameSize / osamp;
freqPerBin = sampleRate / (double)fftFrameSize;
expct = 2.0 * M_PI * (double)stepSize / (double)fftFrameSize;
inFifoLatency = fftFrameSize - stepSize;
if (gRover == 0) gRover = inFifoLatency;
/* main processing loop */
for (i = 0; i < numSampsToProcess; i++)
{
/* As long as we have not yet collected enough data just read in */
gInFIFO[(int) gRover] = indata[(int) i];
outdata[(int) i] = gOutFIFO[(int) (gRover - inFifoLatency)];
gRover++;
/* now we have enough data for processing */
if (gRover >= fftFrameSize)
{
gRover = inFifoLatency;
/* do windowing and re,im interleave */
for (k = 0; k < fftFrameSize; k++)
{
window = -.5 * Math.cos(2.0 * M_PI * (double)k / (double)fftFrameSize) + .5;
gFFTworksp[(int) (2 * k)] = (float)(gInFIFO[(int) k] * window);
gFFTworksp[(int) (2 * k + 1)] = 0.0F;
}
/* ***************** ANALYSIS ******************* */
/* do transform */
ShortTimeFourierTransform(gFFTworksp, fftFrameSize, -1);
/* this is the analysis step */
for (k = 0; k <= fftFrameSize2; k++)
{
/* de-interlace FFT buffer */
real = gFFTworksp[(int) (2 * k)];
imag = gFFTworksp[(int) (2 * k + 1)];
/* compute magnitude and phase */
magn = 2.0 * Math.sqrt(real * real + imag * imag);
phase = smbAtan2(imag, real);
/* compute phase difference */
tmp = phase - gLastPhase[(int) k];
gLastPhase[(int) k] = (float)phase;
/* subtract expected phase difference */
tmp -= (double)k * expct;
/* map delta phase into +/- Pi interval */
qpd = (long)(tmp / M_PI);
if (qpd >= 0) qpd += qpd & 1;
else qpd -= qpd & 1;
tmp -= M_PI * (double)qpd;
/* get deviation from bin frequency from the +/- Pi interval */
tmp = osamp * tmp / (2.0 * M_PI);
/* compute the k-th partials' true frequency */
tmp = (double)k * freqPerBin + tmp * freqPerBin;
/* store magnitude and true frequency in analysis arrays */
gAnaMagn[(int) k] = (float)magn;
gAnaFreq[(int) k] = (float)tmp;
}
/* ***************** PROCESSING ******************* */
/* this does the actual pitch shifting */
for (int zero = 0; zero < fftFrameSize; zero++)
{
gSynMagn[zero] = 0;
gSynFreq[zero] = 0;
}
for (k = 0; k <= fftFrameSize2; k++)
{
index = (long)(k * pitchShift);
if (index <= fftFrameSize2)
{
gSynMagn[(int) index] += gAnaMagn[(int) k];
gSynFreq[(int) index] = gAnaFreq[(int) k] * pitchShift;
}
}
/* ***************** SYNTHESIS ******************* */
/* this is the synthesis step */
for (k = 0; k <= fftFrameSize2; k++)
{
/* get magnitude and true frequency from synthesis arrays */
magn = gSynMagn[(int) k];
tmp = gSynFreq[(int) k];
/* subtract bin mid frequency */
tmp -= (double)k * freqPerBin;
/* get bin deviation from freq deviation */
tmp /= freqPerBin;
/* take osamp into account */
tmp = 2.0 * M_PI * tmp / osamp;
/* add the overlap phase advance back in */
tmp += (double)k * expct;
/* accumulate delta phase to get bin phase */
gSumPhase[(int) k] += (float)tmp;
phase = gSumPhase[(int) k];
/* get real and imag part and re-interleave */
gFFTworksp[(int) (2 * k)] = (float)(magn * Math.cos(phase));
gFFTworksp[(int) (2 * k + 1)] = (float)(magn * Math.sin(phase));
}
/* zero negative frequencies */
for (k = fftFrameSize + 2; k < 2 * fftFrameSize; k++) gFFTworksp[(int) k] = 0.0F;
/* do inverse transform */
ShortTimeFourierTransform(gFFTworksp, fftFrameSize, 1);
/* do windowing and add to output accumulator */
for (k = 0; k < fftFrameSize; k++)
{
window = -.5 * Math.cos(2.0 * M_PI * (double)k / (double)fftFrameSize) + .5;
gOutputAccum[(int) k] += (float)(2.0 * window * gFFTworksp[(int) (2 * k)] / (fftFrameSize2 * osamp));
}
for (k = 0; k < stepSize; k++) gOutFIFO[(int) k] = gOutputAccum[(int) k];
/* shift accumulator */
//memmove(gOutputAccum, gOutputAccum + stepSize, fftFrameSize * sizeof(float));
for (k = 0; k < fftFrameSize; k++)
{
gOutputAccum[(int) k] = gOutputAccum[(int) (k + stepSize)];
}
/* move input FIFO */
for (k = 0; k < inFifoLatency; k++) gInFIFO[(int) k] = gInFIFO[(int) (k + stepSize)];
}
}
}
//endregion
//region Private Static Methods
public static void ShortTimeFourierTransform(float[] fftBuffer, long fftFrameSize, long sign)
{
float wr, wi, arg, temp;
float tr, ti, ur, ui;
long i, bitm, j, le, le2, k;
for (i = 2; i < 2 * fftFrameSize - 2; i += 2)
{
for (bitm = 2, j = 0; bitm < 2 * fftFrameSize; bitm <<= 1)
{
if ((i & bitm) != 0) j++;
j <<= 1;
}
if (i < j)
{
temp = fftBuffer[(int) i];
fftBuffer[(int) i] = fftBuffer[(int) j];
fftBuffer[(int) j] = temp;
temp = fftBuffer[(int) (i + 1)];
fftBuffer[(int) (i + 1)] = fftBuffer[(int) (j + 1)];
fftBuffer[(int) (j + 1)] = temp;
}
}
long max = (long)(Math.log(fftFrameSize) / Math.log(2.0) + .5);
for (k = 0, le = 2; k < max; k++)
{
le <<= 1;
le2 = le >> 1;
ur = 1.0F;
ui = 0.0F;
arg = (float)M_PI / (le2 >> 1);
wr = (float)Math.cos(arg);
wi = (float)(sign * Math.sin(arg));
for (j = 0; j < le2; j += 2)
{
for (i = j; i < 2 * fftFrameSize; i += le)
{
tr = fftBuffer[(int) (i + le2)] * ur - fftBuffer[(int) (i + le2 + 1)] * ui;
ti = fftBuffer[(int) (i + le2)] * ui + fftBuffer[(int) (i + le2 + 1)] * ur;
fftBuffer[(int) (i + le2)] = fftBuffer[(int) i] - tr;
fftBuffer[(int) (i + le2 + 1)] = fftBuffer[(int) (i + 1)] - ti;
fftBuffer[(int) i] += tr;
fftBuffer[(int) (i + 1)] += ti;
}
tr = ur * wr - ui * wi;
ui = ur * wi + ui * wr;
ur = tr;
}
}
}
//endregion
private static double smbAtan2(double x, double y)
{
double signx;
if (x > 0.) signx = 1.;
else signx = -1.;
if (x == 0.) return 0.;
if (y == 0.) return signx * M_PI / 2.;
return Math.atan2(x, y);
}
}
this code working too but very consumption cpu usage.
pitchShift between 0.5 -2.0
call this class as below:
int maxValueOFShort = 32768;
short [] buffer = new short[800];
float[] inData = new float[buffer.length];
while (audiorackIsRun)
{
int m = recorder.read(buffer, 0, buffer.length);
for(int n=0; n<buffer.length;n++)
inData[n] = buffer[n]/(float)maxValueOFShort;
AudioPitch.PitchShift(1, buffer.length, 4096, 4, 44100, inData);
for(int n=0; n<buffer.length;n++)
buffer[n] = (short)(inData[n]*maxValueOFShort);
player.write(buffer, 0, buffer.length);
}