Related
Note: All info in my post only goes for Samsung Galaxy S7 device. I do not know how emulators and other devices behave.
In onImageAvailable I convert continuously each image to a NV21 byte array and forward it to an API expecting raw NV21 format.
This is how I initialize the image reader and receive the images:
private void openCamera() {
...
mImageReader = ImageReader.newInstance(WIDTH, HEIGHT,
ImageFormat.YUV_420_888, 1); // only 1 for best performance
mImageReader.setOnImageAvailableListener(
mOnImageAvailableListener, mBackgroundHandler);
...
}
private final ImageReader.OnImageAvailableListener mOnImageAvailableListener
= new ImageReader.OnImageAvailableListener() {
#Override
public void onImageAvailable(ImageReader reader) {
Image image = reader.acquireLatestImage();
if (image != null) {
byte[] data = convertYUV420ToNV21_ALL_PLANES(image); // this image is turned 90 deg using front cam in portrait mode
byte[] data_rotated = rotateNV21_working(data, WIDTH, HEIGHT, 270);
ForwardToAPI(data_rotated); // image data is being forwarded to api and received later on
image.close();
}
}
};
The function converting the image to raw NV21 (from here), working fine, the image is (due to android?) turned by 90 degrees when using front cam in portrait mode:
(I modified it, slightly according to comments of Alex Cohn)
private byte[] convertYUV420ToNV21_ALL_PLANES(Image imgYUV420) {
byte[] rez;
ByteBuffer buffer0 = imgYUV420.getPlanes()[0].getBuffer();
ByteBuffer buffer1 = imgYUV420.getPlanes()[1].getBuffer();
ByteBuffer buffer2 = imgYUV420.getPlanes()[2].getBuffer();
// actually here should be something like each second byte
// however I simply get the last byte of buffer 2 and the entire buffer 1
int buffer0_size = buffer0.remaining();
int buffer1_size = buffer1.remaining(); // / 2 + 1;
int buffer2_size = 1;//buffer2.remaining(); // / 2 + 1;
byte[] buffer0_byte = new byte[buffer0_size];
byte[] buffer1_byte = new byte[buffer1_size];
byte[] buffer2_byte = new byte[buffer2_size];
buffer0.get(buffer0_byte, 0, buffer0_size);
buffer1.get(buffer1_byte, 0, buffer1_size);
buffer2.get(buffer2_byte, buffer2_size-1, buffer2_size);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
try {
// swap 1 and 2 as blue and red colors are swapped
outputStream.write(buffer0_byte);
outputStream.write(buffer2_byte);
outputStream.write(buffer1_byte);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
rez = outputStream.toByteArray();
return rez;
}
Hence "data" needs to be rotated. Using this function (from here), I get a weird 3-times interlaced picture error:
public static byte[] rotateNV21(byte[] input, int width, int height, int rotation) {
byte[] output = new byte[input.length];
boolean swap = (rotation == 90 || rotation == 270);
// **EDIT:** in portrait mode & front cam this needs to be set to true:
boolean yflip = true;// (rotation == 90 || rotation == 180);
boolean xflip = (rotation == 270 || rotation == 180);
for (int x = 0; x < width; x++) {
for (int y = 0; y < height; y++) {
int xo = x, yo = y;
int w = width, h = height;
int xi = xo, yi = yo;
if (swap) {
xi = w * yo / h;
yi = h * xo / w;
}
if (yflip) {
yi = h - yi - 1;
}
if (xflip) {
xi = w - xi - 1;
}
output[w * yo + xo] = input[w * yi + xi];
int fs = w * h;
int qs = (fs >> 2);
xi = (xi >> 1);
yi = (yi >> 1);
xo = (xo >> 1);
yo = (yo >> 1);
w = (w >> 1);
h = (h >> 1);
// adjust for interleave here
int ui = fs + (w * yi + xi) * 2;
int uo = fs + (w * yo + xo) * 2;
// and here
int vi = ui + 1;
int vo = uo + 1;
output[uo] = input[ui];
output[vo] = input[vi];
}
}
return output;
}
Resulting into this picture:
Note: it is still the same cup, however you see it 3-4 times.
Using another suggested rotate function from here gives the proper result:
public static byte[] rotateNV21_working(final byte[] yuv,
final int width,
final int height,
final int rotation)
{
if (rotation == 0) return yuv;
if (rotation % 90 != 0 || rotation < 0 || rotation > 270) {
throw new IllegalArgumentException("0 <= rotation < 360, rotation % 90 == 0");
}
final byte[] output = new byte[yuv.length];
final int frameSize = width * height;
final boolean swap = rotation % 180 != 0;
final boolean xflip = rotation % 270 != 0;
final boolean yflip = rotation >= 180;
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
final int yIn = j * width + i;
final int uIn = frameSize + (j >> 1) * width + (i & ~1);
final int vIn = uIn + 1;
final int wOut = swap ? height : width;
final int hOut = swap ? width : height;
final int iSwapped = swap ? j : i;
final int jSwapped = swap ? i : j;
final int iOut = xflip ? wOut - iSwapped - 1 : iSwapped;
final int jOut = yflip ? hOut - jSwapped - 1 : jSwapped;
final int yOut = jOut * wOut + iOut;
final int uOut = frameSize + (jOut >> 1) * wOut + (iOut & ~1);
final int vOut = uOut + 1;
output[yOut] = (byte)(0xff & yuv[yIn]);
output[uOut] = (byte)(0xff & yuv[uIn]);
output[vOut] = (byte)(0xff & yuv[vIn]);
}
}
return output;
}
The result is fine now:
The top image shows the direct stream using a texture view's surface and adding it to the captureRequestBuilder. The bottom image shows the raw image data after rotating.
The questions are:
Does this hack in "convertYUV420ToNV21_ALL_PLANES" work on any
device/emulator?
Why does rotateNV21 not work, while rotateNV21_working works fine.
Edit: The mirror issue is fixed, see code comment. The squeeze issue is fixed, it was caused by the API it gets forwarded.
The actual open issue is a proper not too expensive function, converting and rotating an image into raw NV21 working on any device.
Here is the code to convert the Image to NV21 byte[]. This will work when the imgYUV420 U and V planes have pixelStride=1 (as on emulator) or pixelStride=2 (as on Nexus):
private byte[] convertYUV420ToNV21_ALL_PLANES(Image imgYUV420) {
assert(imgYUV420.getFormat() == ImageFormat.YUV_420_888);
Log.d(TAG, "image: " + imgYUV420.getWidth() + "x" + imgYUV420.getHeight() + " " + imgYUV420.getFormat());
Log.d(TAG, "planes: " + imgYUV420.getPlanes().length);
for (int nplane = 0; nplane < imgYUV420.getPlanes().length; nplane++) {
Log.d(TAG, "plane[" + nplane + "]: length " + imgYUV420.getPlanes()[nplane].getBuffer().remaining() + ", strides: " + imgYUV420.getPlanes()[nplane].getPixelStride() + " " + imgYUV420.getPlanes()[nplane].getRowStride());
}
byte[] rez = new byte[imgYUV420.getWidth() * imgYUV420.getHeight() * 3 / 2];
ByteBuffer buffer0 = imgYUV420.getPlanes()[0].getBuffer();
ByteBuffer buffer1 = imgYUV420.getPlanes()[1].getBuffer();
ByteBuffer buffer2 = imgYUV420.getPlanes()[2].getBuffer();
int n = 0;
assert(imgYUV420.getPlanes()[0].getPixelStride() == 1);
for (int row = 0; row < imgYUV420.getHeight(); row++) {
for (int col = 0; col < imgYUV420.getWidth(); col++) {
rez[n++] = buffer0.get();
}
}
assert(imgYUV420.getPlanes()[2].getPixelStride() == imgYUV420.getPlanes()[1].getPixelStride());
int stride = imgYUV420.getPlanes()[1].getPixelStride();
for (int row = 0; row < imgYUV420.getHeight(); row += 2) {
for (int col = 0; col < imgYUV420.getWidth(); col += 2) {
rez[n++] = buffer1.get();
rez[n++] = buffer2.get();
for (int skip = 1; skip < stride; skip++) {
if (buffer1.remaining() > 0) {
buffer1.get();
}
if (buffer2.remaining() > 0) {
buffer2.get();
}
}
}
}
Log.w(TAG, "total: " + rez.length);
return rez;
}
optimized Java code is available here.
As you can see, it is very easy to change this code to produce a rotated image in a single step:
private byte[] rotateYUV420ToNV21(Image imgYUV420) {
Log.d(TAG, "image: " + imgYUV420.getWidth() + "x" + imgYUV420.getHeight() + " " + imgYUV420.getFormat());
Log.d(TAG, "planes: " + imgYUV420.getPlanes().length);
for (int nplane = 0; nplane < imgYUV420.getPlanes().length; nplane++) {
Log.d(TAG, "plane[" + nplane + "]: length " + imgYUV420.getPlanes()[nplane].getBuffer().remaining() + ", strides: " + imgYUV420.getPlanes()[nplane].getPixelStride() + " " + imgYUV420.getPlanes()[nplane].getRowStride());
}
byte[] rez = new byte[imgYUV420.getWidth() * imgYUV420.getHeight() * 3 / 2];
ByteBuffer buffer0 = imgYUV420.getPlanes()[0].getBuffer();
ByteBuffer buffer1 = imgYUV420.getPlanes()[1].getBuffer();
ByteBuffer buffer2 = imgYUV420.getPlanes()[2].getBuffer();
int width = imgYUV420.getHeight();
assert(imgYUV420.getPlanes()[0].getPixelStride() == 1);
for (int row = imgYUV420.getHeight()-1; row >=0; row--) {
for (int col = 0; col < imgYUV420.getWidth(); col++) {
rez[col*width+row] = buffer0.get();
}
}
int uv_offset = imgYUV420.getWidth()*imgYUV420.getHeight();
assert(imgYUV420.getPlanes()[2].getPixelStride() == imgYUV420.getPlanes()[1].getPixelStride());
int stride = imgYUV420.getPlanes()[1].getPixelStride();
for (int row = imgYUV420.getHeight() - 2; row >= 0; row -= 2) {
for (int col = 0; col < imgYUV420.getWidth(); col += 2) {
rez[uv_offset+col/2*width+row] = buffer1.get();
rez[uv_offset+col/2*width+row+1] = buffer2.get();
for (int skip = 1; skip < stride; skip++) {
if (buffer1.remaining() > 0) {
buffer1.get();
}
if (buffer2.remaining() > 0) {
buffer2.get();
}
}
}
}
Log.w(TAG, "total rotated: " + rez.length);
return rez;
}
I sincerely recommend the site http://rawpixels.net/ to see the actual structure of your raw images.
With OpenCV and Android Camera API 2 this task is very fast and you don't need YUV420toNV21 Java conversion, and with OpenCV this convertion is 4x more fast:
Java side:
//Starts a builtin camera with api camera 2
public void startCamera() {
CameraManager manager = (CameraManager) AppData.getAppContext().getSystemService(Context.CAMERA_SERVICE);
try {
String pickedCamera = getCamera(manager);
manager.openCamera(pickedCamera, cameraStateCallback, null);
// set image format on YUV
mImageReader = ImageReader.newInstance(mWidth,mHeight, ImageFormat.YUV_420_888, 4);
mImageReader.setOnImageAvailableListener(onImageAvailableListener, null);
Log.d(TAG, "imageReader created");
} catch (CameraAccessException e) {
Log.e(TAG, e.getMessage());
}
}
//Listens for frames and send them to be processed
protected ImageReader.OnImageAvailableListener onImageAvailableListener = new ImageReader.OnImageAvailableListener() {
#Override
public void onImageAvailable(ImageReader reader) {
Image image = null;
try {
image = reader.acquireLatestImage();
ByteBuffer buffer = image.getPlanes()[0].getBuffer();
byte[] frameData = new byte[buffer.capacity()];
buffer.get(frameData);
// Native process (see below)
processAndRotateFrame(frameData);
image.close();
} catch (Exception e) {
Logger.e(TAG, "imageReader exception: "+e.getMessage());
} finally {
if (image != null) {
image.close();
}
}
}
};
Native side (NDK or Cmake):
JNIEXPORT jint JNICALL com_android_mvf_Utils_ProccessAndRotateFrame
(JNIEnv *env, jobject object, jint width, jint height, jbyteArray frame, jint rotation) {
// load data from JAVA side
jbyte *pFrameData = env->GetByteArrayElements(frame, 0);
// convert array to Mat, for example GRAY or COLOR
Mat mGray(height, width, cv::IMREAD_GRAYSCALE, (unsigned char *)pFrameData);
// rotate image
rotateMat(mGray, rotation);
int objects = your_function(env, mGray);
env->ReleaseByteArrayElements(frame, pFrameData, 0);
return objects;
}
void rotateMat(cv::Mat &matImage, int rotFlag) {
if (rotFlag != 0 && rotFlag != 360) {
if (rotFlag == 90) {
cv::transpose(matImage, matImage);
cv::flip(matImage, matImage, 1);
} else if (rotFlag == 270 || rotFlag == -90) {
cv::transpose(matImage, matImage);
cv::flip(matImage, matImage, 0);
} else if (rotFlag == 180) {
cv::flip(matImage, matImage, -1);
}
}
}
I am trying to encode an .h264 video by using MediaCodec and Camera (onPreviewFrame). I got stuck converting color space from YV12 (from camera) to COLOR_FormatYUV420SemiPlanar (needed by the encoder).
Edit: I noticed this can be a bug on MediaCodec since the following code works on other devices:
public static byte[] YV12toYUV420PackedSemiPlanar(final byte[] input, final byte[] output, final int width, final int height) {
/*
* COLOR_TI_FormatYUV420PackedSemiPlanar is NV12
* We convert by putting the corresponding U and V bytes together (interleaved).
*/
final int frameSize = width * height;
final int qFrameSize = frameSize / 4;
System.arraycopy(input, 0, output, 0, frameSize); // Y
for (int i = 0; i < qFrameSize; i++) {
output[frameSize + i * 2] = input[frameSize + i + qFrameSize]; // Cb (U)
output[frameSize + i * 2 + 1] = input[frameSize + i]; // Cr (V)
}
return output;
}
This is the result I get (seems like color bits have some offset):
Edit 2: Frame size is 1280x720, device is Samsung s5(SM-G900V) with OMX.qcom.video.encoder.avc running Android Lollipop 5.0 (API 21).
Note: I know about COLOR_FormatSurface but I need to make this work on API 16.
If this is running on a Qualcomm device prior to Android 4.3, you need to align the start of the U/V plane to a 2048 byte boundary. Something like this might work:
public static byte[] YV12toYUV420PackedSemiPlanar(final byte[] input, final byte[] output, final int width, final int height) {
final int frameSize = width * height;
final int alignedFrameSize = (frameSize + 2047)/2048*2048;
final int qFrameSize = frameSize / 4;
System.arraycopy(input, 0, output, 0, frameSize); // Y
for (int i = 0; i < qFrameSize; i++) {
output[alignedFrameSize + i * 2] = input[frameSize + i + qFrameSize]; // Cb (U)
output[alignedFrameSize + i * 2 + 1] = input[frameSize + i]; // Cr (V)
}
return output;
}
This is a pretty well-known issue; prior to Android 4.3, the input formats to encoders weren't really tested strictly, so encoders could basically do whatever they wanted. (Beware, Samsung's encoders will behave even worse.) See https://code.google.com/p/android/issues/detail?id=37769 for a collection of other known issues.
You can try this
public byte[] YV12toYUV420PackedSemiPlanar(final byte[] input, final byte[] output, final int width, final int height)
{
for (int i = 0; i < height; i++)
System.arraycopy(input, yStride * i, output, yStride * i, yStride); // Y
for (int i = 0; i < halfHeight; i++) {
for (int j = 0; j < halfWidth; j++) {
output[ySize + (i * halfWidth + j) * 2] = input[ySize + cSize + i * cStride + j]; // Cb (U)
output[ySize + (i * halfWidth + j) * 2 + 1] = input[ySize + i * cStride + j]; // Cr (V)
}
}
return output;
}
I set up an array[8] to store a string conversion. The X will range from 0 to 255. If X is less than 127 (7 bits) it does not write higher bit 0's. So I preset the array[8] to all 0's and the next routine would write only the changed data. Code compiles but the array[] all reads 1's regardless of what x= to.
int x = 10;
string=(Integer.toBinaryString(x));
int[] array = new int[8];
for (int j=0; j < 7; j++){
array[j]=0;
}
for (int i=0; i < string.length(); i++) {
array[i] = Integer.parseInt(string.substring(i,i+1));
}
Log.d("TAG", "Data " + array[0] + "" + array[1]+ "" + array[2] +
"" + array[3]+ "" + array[4]+ "" + array[5] +
"" + array[6] + "" + array[7]);
int x = 10;
String s=(Integer.toBinaryString(x));
int[] array = new int[8];
//no need for a loop that sets all values to 0.
int offset = array.length - s.length();
//you need this offset because the string may be shorter than the array
for (int i=0; i < s.length(); i++) {
array[i + offset] = Integer.parseInt(s.substring(i,i+1));
//applay the offset here
}
This will produce the follow array for int = 10:
[0, 0, 0, 0, 1, 0, 1, 0]
I'm new in Android development. I'm looking for any method that applies pitch shifting to output sound (in real-time). But I couldn't find any point to start.
I've found this topic but I still don't know how can I apply this.
Any suggestions?
In general, the algorithm is called a phase vocoder -- searching for that on the Internets should get you started.
There are a few open source phase vocoders out there, you should be able to use those for reference too.
You can do phase vocoder in real-time -- the main component used is the FFT, so you'll need a fast FFT. The Android libraries can do this for you, see this documentation: http://developer.android.com/reference/android/media/audiofx/Visualizer.html
As it happens, I'm about to release an open source FFT for ARM that is faster than Apple's vDSP library (which was hitherto the fastest). I'll post back in a few days when I've uploaded it to github.com.
Good luck.
There is no built-in pitch shifting algorithm in the Android SDK. You have to code your own. Pitch shifting is a real hardcore DSP algorithm; good sounding algorithms are results of many months or rather years of development...
I personally do not know any Java implementation so I suggest you to adopt some of the free C++ PS algorithms, the best one - which I use in my audio applications, is SoundTouch:
http://www.surina.net/soundtouch/
I played with its code a little and it seems it would not be too much complicated to rewrite it in Java.
HOME URL: http://www.dspdimension.com
public class AudioPitch{
//region Private Static Memebers
private static int MAX_FRAME_LENGTH = 8192;
private static double M_PI = 3.14159265358979323846;
private static float[] gInFIFO = new float[MAX_FRAME_LENGTH];
private static float[] gOutFIFO = new float[MAX_FRAME_LENGTH];
private static float[] gFFTworksp = new float[2 * MAX_FRAME_LENGTH];
private static float[] gLastPhase = new float[MAX_FRAME_LENGTH / 2 + 1];
private static float[] gSumPhase = new float[MAX_FRAME_LENGTH / 2 + 1];
private static float[] gOutputAccum = new float[2 * MAX_FRAME_LENGTH];
private static float[] gAnaFreq = new float[MAX_FRAME_LENGTH];
private static float[] gAnaMagn = new float[MAX_FRAME_LENGTH];
private static float[] gSynFreq = new float[MAX_FRAME_LENGTH];
private static float[] gSynMagn = new float[MAX_FRAME_LENGTH];
private static long gRover;
//endregion
public static void PitchShift(float pitchShift, long numSampsToProcess, long fftFrameSize/*(long)2048*/, long osamp/*(long)10*/, float sampleRate, float[] indata)
{
double magn, phase, tmp, window, real, imag;
double freqPerBin, expct;
long i, k, qpd, index, inFifoLatency, stepSize, fftFrameSize2;
float[] outdata = indata;
/* set up some handy variables */
fftFrameSize2 = fftFrameSize / 2;
stepSize = fftFrameSize / osamp;
freqPerBin = sampleRate / (double)fftFrameSize;
expct = 2.0 * M_PI * (double)stepSize / (double)fftFrameSize;
inFifoLatency = fftFrameSize - stepSize;
if (gRover == 0) gRover = inFifoLatency;
/* main processing loop */
for (i = 0; i < numSampsToProcess; i++)
{
/* As long as we have not yet collected enough data just read in */
gInFIFO[(int) gRover] = indata[(int) i];
outdata[(int) i] = gOutFIFO[(int) (gRover - inFifoLatency)];
gRover++;
/* now we have enough data for processing */
if (gRover >= fftFrameSize)
{
gRover = inFifoLatency;
/* do windowing and re,im interleave */
for (k = 0; k < fftFrameSize; k++)
{
window = -.5 * Math.cos(2.0 * M_PI * (double)k / (double)fftFrameSize) + .5;
gFFTworksp[(int) (2 * k)] = (float)(gInFIFO[(int) k] * window);
gFFTworksp[(int) (2 * k + 1)] = 0.0F;
}
/* ***************** ANALYSIS ******************* */
/* do transform */
ShortTimeFourierTransform(gFFTworksp, fftFrameSize, -1);
/* this is the analysis step */
for (k = 0; k <= fftFrameSize2; k++)
{
/* de-interlace FFT buffer */
real = gFFTworksp[(int) (2 * k)];
imag = gFFTworksp[(int) (2 * k + 1)];
/* compute magnitude and phase */
magn = 2.0 * Math.sqrt(real * real + imag * imag);
phase = smbAtan2(imag, real);
/* compute phase difference */
tmp = phase - gLastPhase[(int) k];
gLastPhase[(int) k] = (float)phase;
/* subtract expected phase difference */
tmp -= (double)k * expct;
/* map delta phase into +/- Pi interval */
qpd = (long)(tmp / M_PI);
if (qpd >= 0) qpd += qpd & 1;
else qpd -= qpd & 1;
tmp -= M_PI * (double)qpd;
/* get deviation from bin frequency from the +/- Pi interval */
tmp = osamp * tmp / (2.0 * M_PI);
/* compute the k-th partials' true frequency */
tmp = (double)k * freqPerBin + tmp * freqPerBin;
/* store magnitude and true frequency in analysis arrays */
gAnaMagn[(int) k] = (float)magn;
gAnaFreq[(int) k] = (float)tmp;
}
/* ***************** PROCESSING ******************* */
/* this does the actual pitch shifting */
for (int zero = 0; zero < fftFrameSize; zero++)
{
gSynMagn[zero] = 0;
gSynFreq[zero] = 0;
}
for (k = 0; k <= fftFrameSize2; k++)
{
index = (long)(k * pitchShift);
if (index <= fftFrameSize2)
{
gSynMagn[(int) index] += gAnaMagn[(int) k];
gSynFreq[(int) index] = gAnaFreq[(int) k] * pitchShift;
}
}
/* ***************** SYNTHESIS ******************* */
/* this is the synthesis step */
for (k = 0; k <= fftFrameSize2; k++)
{
/* get magnitude and true frequency from synthesis arrays */
magn = gSynMagn[(int) k];
tmp = gSynFreq[(int) k];
/* subtract bin mid frequency */
tmp -= (double)k * freqPerBin;
/* get bin deviation from freq deviation */
tmp /= freqPerBin;
/* take osamp into account */
tmp = 2.0 * M_PI * tmp / osamp;
/* add the overlap phase advance back in */
tmp += (double)k * expct;
/* accumulate delta phase to get bin phase */
gSumPhase[(int) k] += (float)tmp;
phase = gSumPhase[(int) k];
/* get real and imag part and re-interleave */
gFFTworksp[(int) (2 * k)] = (float)(magn * Math.cos(phase));
gFFTworksp[(int) (2 * k + 1)] = (float)(magn * Math.sin(phase));
}
/* zero negative frequencies */
for (k = fftFrameSize + 2; k < 2 * fftFrameSize; k++) gFFTworksp[(int) k] = 0.0F;
/* do inverse transform */
ShortTimeFourierTransform(gFFTworksp, fftFrameSize, 1);
/* do windowing and add to output accumulator */
for (k = 0; k < fftFrameSize; k++)
{
window = -.5 * Math.cos(2.0 * M_PI * (double)k / (double)fftFrameSize) + .5;
gOutputAccum[(int) k] += (float)(2.0 * window * gFFTworksp[(int) (2 * k)] / (fftFrameSize2 * osamp));
}
for (k = 0; k < stepSize; k++) gOutFIFO[(int) k] = gOutputAccum[(int) k];
/* shift accumulator */
//memmove(gOutputAccum, gOutputAccum + stepSize, fftFrameSize * sizeof(float));
for (k = 0; k < fftFrameSize; k++)
{
gOutputAccum[(int) k] = gOutputAccum[(int) (k + stepSize)];
}
/* move input FIFO */
for (k = 0; k < inFifoLatency; k++) gInFIFO[(int) k] = gInFIFO[(int) (k + stepSize)];
}
}
}
//endregion
//region Private Static Methods
public static void ShortTimeFourierTransform(float[] fftBuffer, long fftFrameSize, long sign)
{
float wr, wi, arg, temp;
float tr, ti, ur, ui;
long i, bitm, j, le, le2, k;
for (i = 2; i < 2 * fftFrameSize - 2; i += 2)
{
for (bitm = 2, j = 0; bitm < 2 * fftFrameSize; bitm <<= 1)
{
if ((i & bitm) != 0) j++;
j <<= 1;
}
if (i < j)
{
temp = fftBuffer[(int) i];
fftBuffer[(int) i] = fftBuffer[(int) j];
fftBuffer[(int) j] = temp;
temp = fftBuffer[(int) (i + 1)];
fftBuffer[(int) (i + 1)] = fftBuffer[(int) (j + 1)];
fftBuffer[(int) (j + 1)] = temp;
}
}
long max = (long)(Math.log(fftFrameSize) / Math.log(2.0) + .5);
for (k = 0, le = 2; k < max; k++)
{
le <<= 1;
le2 = le >> 1;
ur = 1.0F;
ui = 0.0F;
arg = (float)M_PI / (le2 >> 1);
wr = (float)Math.cos(arg);
wi = (float)(sign * Math.sin(arg));
for (j = 0; j < le2; j += 2)
{
for (i = j; i < 2 * fftFrameSize; i += le)
{
tr = fftBuffer[(int) (i + le2)] * ur - fftBuffer[(int) (i + le2 + 1)] * ui;
ti = fftBuffer[(int) (i + le2)] * ui + fftBuffer[(int) (i + le2 + 1)] * ur;
fftBuffer[(int) (i + le2)] = fftBuffer[(int) i] - tr;
fftBuffer[(int) (i + le2 + 1)] = fftBuffer[(int) (i + 1)] - ti;
fftBuffer[(int) i] += tr;
fftBuffer[(int) (i + 1)] += ti;
}
tr = ur * wr - ui * wi;
ui = ur * wi + ui * wr;
ur = tr;
}
}
}
//endregion
private static double smbAtan2(double x, double y)
{
double signx;
if (x > 0.) signx = 1.;
else signx = -1.;
if (x == 0.) return 0.;
if (y == 0.) return signx * M_PI / 2.;
return Math.atan2(x, y);
}
}
this code working too but very consumption cpu usage.
pitchShift between 0.5 -2.0
call this class as below:
int maxValueOFShort = 32768;
short [] buffer = new short[800];
float[] inData = new float[buffer.length];
while (audiorackIsRun)
{
int m = recorder.read(buffer, 0, buffer.length);
for(int n=0; n<buffer.length;n++)
inData[n] = buffer[n]/(float)maxValueOFShort;
AudioPitch.PitchShift(1, buffer.length, 4096, 4, 44100, inData);
for(int n=0; n<buffer.length;n++)
buffer[n] = (short)(inData[n]*maxValueOFShort);
player.write(buffer, 0, buffer.length);
}
I need display a jpeg picture, and convert it to YUV420SP. First I use SkBitmap to parse jpeg and display it, then I use the code below to convert RGB565 to YUV420SP on android, but it spend 75ms to convert a 640*480 RGB565 picture, so anybody know the faster way to convert RGB565 to YUV420SP on android? or faster way to convert jpeg file to YUV420SP on android?
// Convert from RGB to YUV420
int RGB2YUV_YR[256], RGB2YUV_YG[256], RGB2YUV_YB[256];
int RGB2YUV_UR[256], RGB2YUV_UG[256], RGB2YUV_UBVR[256];
int RGB2YUV_VG[256], RGB2YUV_VB[256];
//
// Table used for RGB to YUV420 conversion
//
void InitLookupTable()
{
static bool hasInited = false;
if(hasInited)
return ;
hasInited = true;
int i;
for (i = 0; i < 256; i++)
RGB2YUV_YR[i] = (float) 65.481 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_YG[i] = (float) 128.553 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_YB[i] = (float) 24.966 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_UR[i] = (float) 37.797 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_UG[i] = (float) 74.203 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_VG[i] = (float) 93.786 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_VB[i] = (float) 18.214 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_UBVR[i] = (float) 112 * (i << 8);
}
int ConvertRGB5652YUV420SP(int w, int h, unsigned char *bmp, unsigned char *yuv)
{
unsigned char *u, *v, *y, *uu, *vv;
unsigned char *pu1, *pu2, *pu3, *pu4;
unsigned char *pv1, *pv2, *pv3, *pv4;
unsigned char rValue = 0, gValue = 0, bValue = 0;
uint16_t* bmpPtr;
int i, j;
printf("ConvertRGB5652YUV420SP begin,w=%d,h=%d,bmp=%p,yuv=%p\n", w, h, bmp, yuv);
struct timeval tpstart,tpend;
gettimeofday(&tpstart,NULL);
InitLookupTable();
gettimeofday(&tpend,NULL);
float timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
timeuse/=1000;
printf("InitLookupTable used time=%f\n", timeuse);
gettimeofday(&tpstart,NULL);
uu = new unsigned char[w * h];
vv = new unsigned char[w * h];
if (uu == NULL || vv == NULL || yuv == NULL)
return 0;
y = yuv;
u = uu;
v = vv;
// Get r,g,b pointers from bmp image data....
bmpPtr = (uint16_t*)bmp;
//Get YUV values for rgb values...
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
uint16_t color = *bmpPtr;
unsigned int r = (color>>11) & 0x1f;
unsigned int g = (color>> 5) & 0x3f;
unsigned int b = (color ) & 0x1f;
rValue = (r<<3) | (r>>2);
gValue = (g<<2) | (g>>4);
bValue = (b<<3) | (b>>2);
*y++ = (RGB2YUV_YR[rValue] + RGB2YUV_YG[gValue] + RGB2YUV_YB[bValue] +
1048576) >> 16;
*u++ = (-RGB2YUV_UR[rValue] - RGB2YUV_UG[gValue] + RGB2YUV_UBVR[bValue] +
8388608) >> 16;
*v++ = (RGB2YUV_UBVR[rValue] - RGB2YUV_VG[gValue] - RGB2YUV_VB[bValue] +
8388608) >> 16;
bmpPtr++;
}
}
gettimeofday(&tpend,NULL);
timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
timeuse/=1000;
printf("Get YUV values used time=%f\n", timeuse);
gettimeofday(&tpstart,NULL);
// Now sample the U & V to obtain YUV 4:2:0 format
// Get the right pointers...
u = yuv + w * h;
v = u + 1;
// For U
pu1 = uu;
pu2 = pu1 + 1;
pu3 = pu1 + w;
pu4 = pu3 + 1;
// For V
pv1 = vv;
pv2 = pv1 + 1;
pv3 = pv1 + w;
pv4 = pv3 + 1;
// Do sampling....
for (i = 0; i < h; i += 2) {
for (j = 0; j < w; j += 2) {
*u = (*pu1 + *pu2 + *pu3 + *pu4) >> 2;
u += 2;
*v = (*pv1 + *pv2 + *pv3 + *pv4) >> 2;
v += 2;
pu1 += 2;
pu2 += 2;
pu3 += 2;
pu4 += 2;
pv1 += 2;
pv2 += 2;
pv3 += 2;
pv4 += 2;
}
pu1 += w;
pu2 += w;
pu3 += w;
pu4 += w;
pv1 += w;
pv2 += w;
pv3 += w;
pv4 += w;
}
gettimeofday(&tpend,NULL);
timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
timeuse/=1000;
printf("Do sampling used time=%f\n", timeuse);
gettimeofday(&tpstart,NULL);
delete uu;
delete vv;
return 1;
}
int main(int argc, char **argv) {
unsigned char bmp[640*480*2] = {0};
unsigned char yuv[(640*480*3)/2] = {0};
struct timeval tpstart,tpend;
gettimeofday(&tpstart,NULL);
ConvertRGB5652YUV420SP(640, 480, bmp, yuv);
gettimeofday(&tpend,NULL);
float timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
timeuse/=1000;
printf("ConvertARGB2YUV420SP used time=%f\n", timeuse);
return 0;
}
output on android(armv6):
ConvertRGB5652YUV420SP begin,w=640,h=480,bmp=0xbe7314fc,yuv=0xbe7c74fc
InitLookupTable used time=0.383000
Get YUV values used time=61.394001
Do sampling used time=11.918000
ConvertARGB2YUV420SP used time=74.596001
cpu info:
$ cat /proc/cpuinfo
cat /proc/cpuinfo
Processor : ARMv6-compatible processor rev 5 (v6l)
BogoMIPS : 791.34
Features : swp half thumb fastmult vfp edsp java
CPU implementer : 0x41
CPU architecture: 6TEJ
CPU variant : 0x1
CPU part : 0xb36
CPU revision : 5
Hardware : IMAPX200
Revision : 0000
Serial : 0000000000000000
On ARMv7, use NEON. It will do the job in less than 1ms. (VGA)
If you are stuck with ARMv6, optimize it in ARM assembly. (about 8ms on VGA)
Use fixed-point arithmetic instead of the lookup tables. Get rid of them.
make two masks :
0x001f001f : mask1
0x003f003f : mask2
then load two pixels at once into a 32bit register (which is a lot faster than 16bit read)
and red, mask1, pixel, lsr #11
and grn, mask2, pixel, lsr #5
and blu, mask1, pixel
now you have three registers, each containing two values - one in the lower, and the other in the upper 16 bits.
smulxy instructions will do some miracles from here on. (16bit multiply)
Good luck.
PS : your lookup table isn't that good either. Why are they all in length of 256?
You could reduce them to 32 (r and b related) and 64 (g related) Which will increase the cache hit rate.
Probably that will just do for the targeted 40ms without resorting to assembly.
Yes, cache-misses are THAT painful.
I have found a faster way in skia, it runs about 40ms.
#include "SkColorPriv.h"
#include "SkBitmap.h"
#include "SkCanvas.h"
#include "SkStream.h"
using namespace android;
// taken from jcolor.c in libjpeg
#if 0 // 16bit - precise but slow
#define CYR 19595 // 0.299
#define CYG 38470 // 0.587
#define CYB 7471 // 0.114
#define CUR -11059 // -0.16874
#define CUG -21709 // -0.33126
#define CUB 32768 // 0.5
#define CVR 32768 // 0.5
#define CVG -27439 // -0.41869
#define CVB -5329 // -0.08131
#define CSHIFT 16
#else // 8bit - fast, slightly less precise
#define CYR 77 // 0.299
#define CYG 150 // 0.587
#define CYB 29 // 0.114
#define CUR -43 // -0.16874
#define CUG -85 // -0.33126
#define CUB 128 // 0.5
#define CVR 128 // 0.5
#define CVG -107 // -0.41869
#define CVB -21 // -0.08131
#define CSHIFT 8
#endif
static void rgb2yuv_32(uint8_t dst[], SkPMColor c) {
int r = SkGetPackedR32(c);
int g = SkGetPackedG32(c);
int b = SkGetPackedB32(c);
int y = ( CYR*r + CYG*g + CYB*b ) >> CSHIFT;
int u = ( CUR*r + CUG*g + CUB*b ) >> CSHIFT;
int v = ( CVR*r + CVG*g + CVB*b ) >> CSHIFT;
dst[0] = SkToU8(y);
dst[1] = SkToU8(u + 128);
dst[2] = SkToU8(v + 128);
}
static void rgb2yuv_32_x(uint8_t *py, uint8_t *pu, uint8_t *pv, SkPMColor c) {
int r = SkGetPackedR32(c);
int g = SkGetPackedG32(c);
int b = SkGetPackedB32(c);
if(py != NULL){
int y = ( CYR*r + CYG*g + CYB*b ) >> CSHIFT;
*py = SkToU8(y);
}
if(pu != NULL){
int u = ( CUR*r + CUG*g + CUB*b ) >> CSHIFT;
*pu = SkToU8(u + 128);
}
if(pv != NULL){
int v = ( CVR*r + CVG*g + CVB*b ) >> CSHIFT;
*pv = SkToU8(v + 128);
}
}
static void rgb2yuv_4444(uint8_t dst[], U16CPU c) {
int r = SkGetPackedR4444(c);
int g = SkGetPackedG4444(c);
int b = SkGetPackedB4444(c);
int y = ( CYR*r + CYG*g + CYB*b ) >> (CSHIFT - 4);
int u = ( CUR*r + CUG*g + CUB*b ) >> (CSHIFT - 4);
int v = ( CVR*r + CVG*g + CVB*b ) >> (CSHIFT - 4);
dst[0] = SkToU8(y);
dst[1] = SkToU8(u + 128);
dst[2] = SkToU8(v + 128);
}
static void rgb2yuv_4444_x(uint8_t *py, uint8_t *pu, uint8_t *pv, U16CPU c) {
int r = SkGetPackedR4444(c);
int g = SkGetPackedG4444(c);
int b = SkGetPackedB4444(c);
if(py != NULL){
int y = ( CYR*r + CYG*g + CYB*b ) >> (CSHIFT - 4);
*py = SkToU8(y);
}
if(pu != NULL){
int u = ( CUR*r + CUG*g + CUB*b ) >> (CSHIFT - 4);
*pu = SkToU8(u + 128);
}
if(pv != NULL){
int v = ( CVR*r + CVG*g + CVB*b ) >> (CSHIFT - 4);
*pv = SkToU8(v + 128);
}
}
static void rgb2yuv_16(uint8_t dst[], U16CPU c) {
int r = SkGetPackedR16(c);
int g = SkGetPackedG16(c);
int b = SkGetPackedB16(c);
int y = ( 2*CYR*r + CYG*g + 2*CYB*b ) >> (CSHIFT - 2);
int u = ( 2*CUR*r + CUG*g + 2*CUB*b ) >> (CSHIFT - 2);
int v = ( 2*CVR*r + CVG*g + 2*CVB*b ) >> (CSHIFT - 2);
dst[0] = SkToU8(y);
dst[1] = SkToU8(u + 128);
dst[2] = SkToU8(v + 128);
}
static void rgb2yuv_16_x(uint8_t *py, uint8_t *pu, uint8_t *pv, U16CPU c) {
int r = SkGetPackedR16(c);
int g = SkGetPackedG16(c);
int b = SkGetPackedB16(c);
if(py != NULL){
int y = ( 2*CYR*r + CYG*g + 2*CYB*b ) >> (CSHIFT - 2);
*py = SkToU8(y);
}
if(pu != NULL){
int u = ( 2*CUR*r + CUG*g + 2*CUB*b ) >> (CSHIFT - 2);
*pu = SkToU8(u + 128);
}
if(pv != NULL){
int v = ( 2*CVR*r + CVG*g + 2*CVB*b ) >> (CSHIFT - 2);
*pv = SkToU8(v + 128);
}
}
int ConvertRGB5652YUV420SPBySkia(SkBitmap* bmp, unsigned char* dst) {
if(!bmp || !dst || bmp->getConfig() != SkBitmap::kRGB_565_Config)
return -1;
int width = bmp->width();
int height = bmp->height();
void *src = bmp->getPixels();
int src_rowbytes = bmp->rowBytes();
int stride = width;
int dstheight = height;
int i, j;
uint8_t *y_base = (uint8_t *)dst;
uint8_t *cb_base = (uint8_t *)((unsigned int)y_base + stride * dstheight);
uint8_t *cr_base = cb_base + 1;
uint8_t yuv[3];
uint8_t *y = NULL, *cb = NULL, *cr = NULL;
uint16_t *rgb = (uint16_t *)src;
for(i=0; i<height; i++){
rgb = (uint16_t *)((unsigned int)src + i * src_rowbytes);
y = (uint8_t *)((unsigned int)y_base + i * stride);
if((i & 0x1) == 0){
cb = (uint8_t *)((unsigned int)cb_base + ((i>>1) * stride));
cr = cb + 1;
}
for(j=0; j<width; j++){
if(i & 0x1){// valid y and cr
if(j & 0x01){ // only y
rgb2yuv_16_x(y++, NULL, NULL, *rgb++);
}else{ // both y and cr
rgb2yuv_16_x(y++, NULL, cr++, *rgb++);
cr++;
}
}else{// valid y and cb
if(j & 0x01){ // only y
rgb2yuv_16_x(y++, NULL, NULL, *rgb++);
}else{ // both y and cb
rgb2yuv_16_x(y++, cb++, NULL, *rgb++);
cb++;
}
}
}
}
return 0;
}