RenderScript one input- and two output-Allocations - android

I managed to write a Kernel that transforms an input-Bitmap to a float[] of Sobel gradients (two separate Kernels for SobelX and SobelY). I did this by assigning the input-Bitmap as a global variable and then passing the Kernel based on the Output allocation and referencing the neighbors of the Input-Bitmap via rsGetElementAt. Since I actually want to calculate the Magnitude (hypot(Sx,Sy) AND the Direction (atan2(Sy,Sx)) it would be nice to do the whole thing in one Kernel-pass. If I only had to calculate the Magnitude Array, this could be done with the same structure (1 intput Bitmap, 1 Output float[]). Now I wonder, whether it is possible to just add an additional Allocation for the Direction Output (also a float[]). I tried this with the rs-function rsSetElementAt() as follows:
#pragma version(1)
#pragma rs java_package_name(com.example.xxx)
#pragma rs_fp_relaxed
rs_allocation gIn, direction;
int32_t width;
int32_t height;
// Sobel, Magnitude und Direction
float __attribute__((kernel)) sobel_XY(uint32_t x, uint32_t y) {
float outX=0, outY=0;
if (x>0 && y>0 && x<(width-1) && y<(height-1)){
uchar4 c11=rsGetElementAt_uchar4(gIn, x-1, y-1); uchar4 c12=rsGetElementAt_uchar4(gIn, x-1, y);uchar4 c13=rsGetElementAt_uchar4(gIn, x-1, y+1);
uchar4 c21=rsGetElementAt_uchar4(gIn, x, y-1);uchar4 c23=rsGetElementAt_uchar4(gIn, x, y+1);
uchar4 c31=rsGetElementAt_uchar4(gIn, x+1, y-1);uchar4 c32=rsGetElementAt_uchar4(gIn, x+1, y);uchar4 c33=rsGetElementAt_uchar4(gIn, x+1, y+1);
float4 f11=rsUnpackColor8888(c11);float4 f12=rsUnpackColor8888(c12);float4 f13=rsUnpackColor8888(c13);
float4 f21=rsUnpackColor8888(c21); float4 f23=rsUnpackColor8888(c23);
float4 f31=rsUnpackColor8888(c31);float4 f32=rsUnpackColor8888(c32);float4 f33=rsUnpackColor8888(c33);
outX= f11.r-f31.r + 2*(f12.r-f32.r) + f13.r-f33.r;
outY= f11.r-f13.r + 2*(f21.r-f23.r) + f31.r-f33.r;
float d = atan2(outY, outX);
rsSetElementAt_float(direction, d, x, y);
return hypot(outX, outY);
}
}
And the corresponding Java code:
ScriptC_sobel script;
script=new ScriptC_sobel(rs);
script.set_gIn(Allocation.createFromBitmap(rs, bmpGray));
Type.Builder TypeOut = new Type.Builder(rs, Element.F32(rs));
TypeOut.setX(width).setY(height);
Allocation outAllocation = Allocation.createTyped(rs, TypeOut.create());
// the following 3 lines shall reflect the allocation to the Direction output
Type.Builder TypeDir = new Type.Builder(rs, Element.F32(rs));
TypeDir.setX(width).setY(height);
Allocation dirAllocation = Allocation.createTyped(rs, TypeDir.create());
script.forEach_sobel_XY(outAllocation);
outAllocation.copyTo(gm) ;
dirAllocation.copyTo(gd);
Unfortunately this does not work. I am not sure, whether the problem is with the structural logic of the rs-kernel or is it because I cannot use a second Type.Builder assignment within the Java code (because the kernel is already tied to the Magnitude Output-allocation). Any help is highly appreciated.
PS: I see that there is no link between the second Type.Builder assignment and the "direction" allocaton in rs - but how can this be achieved?

The outAllocation is passed as a parameter to the kernel. But the existence and location of dirAllocation also has to be communicated to the Renderscript side. Do this just before starting the script:
script.set_direction(dirAllocation);
Also, read about memory allocation in Renderscript.

Related

Optimizing renderscript summation of row and column cells

Any advise in optimizing the following code? The code first grayscales, inverts and then thresholds the image (code not included, because it is trivial). It then sums the elements of each row and column (all elements are either 1 or 0). It then finds the row and column index of the row and column with the highest value.
The code is supposed to find the centroid of the image and it works, but I want to make it faster
I'm developing for API 23, so a reduction kernel can not be used.
Java snippet:
private int[] sumValueY = new int[640];
private int[] sumValueX = new int[480];
rows_indices_alloc = Allocation.createSized( rs, Element.I32(rs), height, Allocation.USAGE_SCRIPT);
col_indices_alloc = Allocation.createSized( rs, Element.I32(rs), width, Allocation.USAGE_SCRIPT);
public RenderscriptProcessor(RenderScript rs, int width, int height)
{
mScript.set_gIn(mIntermAllocation);
mScript.forEach_detectX(rows_indices_alloc);
mScript.forEach_detectY(col_indices_alloc);
rows_indices_alloc.copyTo(sumValueX);
col_indices_alloc.copyTo(sumValueY);
}
Renderscript.rs snippet:
#pragma version(1)
#pragma rs java_package_name(org.gearvrf.renderscript)
#include "rs_debug.rsh"
#pragma rs_fp_relaxed
const int mImageWidth=640;
const int mImageHeight=480;
int32_t maxsX=-1;
int32_t maxIndexX;
int32_t maxsY=-1;
int32_t maxIndexY;
rs_allocation gIn;
void detectX(int32_t v_in, int32_t x, int32_t y) {
int32_t sum=0;
for ( int i = 0; i < (mImageWidth); i++) {
float4 f4 = rsUnpackColor8888(rsGetElementAt_uchar4(gIn, i, x));
sum+=(int)f4.r;
}
if((sum>maxsX)){
maxsX=sum;
maxIndexX = x;
}
}
void detectY(int32_t v_in, int32_t x, int32_t y) {
int32_t sum=0;
for ( int i = 0; i < (mImageHeight); i++) {
float4 f4 = rsUnpackColor8888(rsGetElementAt_uchar4(gIn, x, i));
sum+=(int)f4.r;
}
if((sum>maxsY)){
maxsY=sum;
maxIndexY = x;
}
}
Any help would be appreciated
float4 f4 = rsUnpackColor8888(rsGetElementAt_uchar4(gIn, x, i));
sum+=(int)f4.r;
This converts from int to float and then back to int again. I think you can simplify by just doing this:
sum += rsGetElementAt_uchar4(gIn, x, i).r;
I don't know exactly how your previous stages work because you haven't posted them, but you should try generating packed values to read here. So either put your grayscale channels in .rgba or use a single channel format and then use rsAllocationVLoad_uchar4 to fetch 4 values at once.
Also, try combining previous stages with this one, if you don't need the intermediate results of those calculations it may be cheaper to do the memory load once and then do those transformations in registers.
You might also play with how many values your threads operate on. You could try having each kernel processing width/2, width/4, width/8 elements and see how they perform. This will give GPUs more threads to play with especially on lower-resolution images but with the trade off of having more reduction steps.
You also have a multiple-writers race condition on the maxsX/maxsY and maxIndexX/maxIndexY variables. All those writes need to use atomics if you care about the exact right answer. I think maybe you posted the wrong code because you don't store to the *_indices_alloc but you copy from them at the end. So, actually you should store all the sums to those and then use either a single threaded function or a kernel with atomics to get the absolute max and max index.

Can I set input and output allocations on Renderscript to be of different sizes/dimensions?

Background
I'm trying to learn Renderscript, so I wish to try to do some simple operations that I think about.
The problem
I thought of rotating a bitmap, which is something that's simple enough to manage.
on C/C++, it's a simple thing to do (search for "jniRotateBitmapCw90") :
https://github.com/AndroidDeveloperLB/AndroidJniBitmapOperations/blob/master/JniBitmapOperationsLibrary/jni/JniBitmapOperationsLibrary.cpp
Thing is, when I try this on Renderscript, I get this error:
android.support.v8.renderscript.RSRuntimeException: Dimension mismatch
between parameters ain and aout!
Here's what I do:
RS:
void rotate90CW(const uchar4 *in, uchar4 *out, uint32_t x, uint32_t y) {
// XY. ..X ... ...
// ...>..Y>...>Y..
// ... ... .YX X..
out[...]=in[...] ...
}
Java:
mRenderScript = RenderScript.create(this);
mInBitmap = BitmapFactory.decodeResource(getResources(), R.drawable.sample_photo);
mOutBitmap = Bitmap.createBitmap(mInBitmap.getHeight(), mInBitmap.getWidth(), mInBitmap.getConfig());
final Allocation input = Allocation.createFromBitmap(mRenderScript, mInBitmap, Allocation.MipmapControl.MIPMAP_NONE, Allocation.USAGE_SCRIPT);
final Allocation output = Allocation.createFromBitmap(mRenderScript, mOutBitmap, Allocation.MipmapControl.MIPMAP_NONE, Allocation.USAGE_SCRIPT);
ScriptC_test script = new ScriptC_test(mRenderScript, getResources(), R.raw.test);
...
script.forEach_rotate90CW(input, output);
output.copyTo(mOutBitmap);
Even when I do set both allocations to be of the same size (squared bitmap), and I just set the output to be the input:
out[width * y + x] = in[width * y+x];
then what I get is a bitmap with holes... How come?
This is what I get:
The questions
Does this mean I can't do this kind of operation?
Does it mean that I can't use allocations of various sizes/dimensions?
Is it possible to overcome this issue (and still use Renderscript, of course) ? If so, how?
Maybe I could add an array variable inside the RS side, and set the allocation to it, instead?
Why do I get holes in the bitmap, for the case of a square input&output?
EDIT:This is my current code:
RS
rs_allocation *in;
uchar4 attribute((kernel)) rotate90CW(uint32_t x, uint32_t y){
// XY. ..X ... ...
// ...>..Y>...>Y..
// ... ... .YX X..
uchar4 curIn =rsGetElementAt_uchar4(in, 0, 0);
return curIn; //just for testing...
}
Java:
mRenderScript = RenderScript.create(this);
mInBitmap = BitmapFactory.decodeResource(getResources(), R.drawable.sample_photo);
mOutBitmap = Bitmap.createBitmap(mInBitmap.getHeight(), mInBitmap.getWidth(), mInBitmap.getConfig());
final Allocation input = Allocation.createFromBitmap(mRenderScript, mInBitmap, Allocation.MipmapControl.MIPMAP_NONE, Allocation.USAGE_SCRIPT);
final Allocation output = Allocation.createFromBitmap(mRenderScript, mOutBitmap, Allocation.MipmapControl.MIPMAP_NONE, Allocation.USAGE_SCRIPT);
ScriptC_test script = new ScriptC_test(mRenderScript, getResources(), R.raw.test);
script.bind_in(input);
script.forEach_rotate90CW(output);
output.copyTo(mOutBitmap);
mImageView.setImageBitmap(mOutBitmap);
Here goes:
Does this mean I can't do this kind of operation?
No, not really. You just have to craft things correctly.
Does it mean that I can't use allocations of various sizes/dimensions?
No, but it does mean you can't use different size allocations in the way you currently are doing things. The default kernel in/out mechanism expects the input and output sizes to match so it can iterate over all of the elements correctly. If you need something different, it's up to you to manage it. More on that below.
Is it possible to overcome this issues...how?
The easiest solution would be to create an Allocation for input and bind it to the renderscript instance rather than pass it as a parameter. Then your RS would only need an output allocation (and your kernel only take output, x and y). From there you can determine which coordinate within the input allocation you want and place it directly into the output location:
int inX = ...;
int inY = ...;
uchar4 curIn = rsGetElementAt_uchar4(inAlloc, inX, inY);
*out = curIn;
Why do I get holes in the bitmap, for the case of a square input&output?
It's because you cannot use the x and y parameters to offset into the input and output allocation. Those in/out parameters are already pointing to the correct (same) location in both the input and output. The indexing you're doing is unnecessary and not really supported. Each time your kernel is called, it is being called for 1 element location within the allocation. This is why the input and output sizes must be the same when provided as parameters.
This should solve your problem
RS
rs_allocation *in;
uchar4 attribute((kernel)) rotate90CW(uint32_t x, uint32_t y){
...
uchar4 curIn =rsGetElementAt_uchar4(in, x, y);
return curIn;
}

Renderscript: 3D lookup table to convert an RGB triplet to a byte value

I load a 3D lookup map which associates triplets of RGB byte values, to a single byt value. I define my allocation like this:
Type.Builder tbLookup = new Type.Builder(rs, Element.U8(rs));
tbLookup.setX(256);
tbLookup.setY(256);
tbLookup.setZ(256);
tbLookup.setMipmaps(false);
tbLookup.setFaces(false);
lookup = Allocation.createTyped(rs, tbLookup.create(), Allocation.MipmapControl.MIPMAP_NONE, Allocation.USAGE_GRAPHICS_CONSTANTS);
int ncolors = 256*256*256;
byte[] sampledata = new byte[ncolors];
lookup.copyFrom(sampledata);
script.set_gLookup(lookup); //global variable gLookup in renderscript
Then I define my kernel in renderscript:
rs_allocation gLookup;
uchar4 __attribute__((kernel)) lookItUp(const uchar4 in, uint32_t x, uint32_t y)
{
uchar4 out = in;
uchar p = rsGetElementAt_uchar(gLookup, in.r,in.g,in.b);
out.r = p;
out.g = p;
out.b = p;
return out;
}
This doesn't work, it outputs zero values (black image) and works extremely slow. If I don't do the rsGetElementAt_uchar, then it works fast (I can assign a fixed value and it's okay). So I must be doing something wrong with the lookup table type. Any clue?
Thank you!
P.S: A 3d lookup table is not a crazy idea, there already is an Intrinsic function for converting RGB to RGBA by using a 3D lookup table. But I need my own lookup table.
that should be USAGE_SCRIPT--I don't know what USAGE_GRAPHICS_CONSTANT will do there, but it's definitely not what you want.

How to use RenderScript with multiple input allocations?

Recently, I found render script is a better choice for image processing on Android. The performance is wonderful. But there are not many documents on it. I am wondering if I can merge multiple photos into a result photo by render script.
http://developer.android.com/guide/topics/renderscript/compute.html says:
A kernel may have an input Allocation, an output Allocation, or both. A kernel may not have more than one input or one output Allocation. If more than one input or output is required, those objects should be bound to rs_allocation script globals and accessed from a kernel or invokable function via rsGetElementAt_type() or rsSetElementAt_type().
Is there any code example for this issue?
For the kernel with multiple inputs you would have to manually handle additional inputs.
Let's say you want 2 inputs.
example.rs:
rs_allocation extra_alloc;
uchar4 __attribute__((kernel)) kernel(uchar4 i1, uint32_t x, uint32_t y)
{
// Manually getting current element from the extra input
uchar4 i2 = rsGetElementAt_uchar4(extra_alloc, x, y);
// Now process i1 and i2 and generate out
uchar4 out = ...;
return out;
}
Java:
Bitmap bitmapIn = ...;
Bitmap bitmapInExtra = ...;
Bitmap bitmapOut = Bitmap.createBitmap(bitmapIn.getWidth(),
bitmapIn.getHeight(), bitmapIn.getConfig());
RenderScript rs = RenderScript.create(this);
ScriptC_example script = new ScriptC_example(rs);
Allocation inAllocation = Allocation.createFromBitmap(rs, bitmapIn);
Allocation inAllocationExtra = Allocation.createFromBitmap(rs, bitmapInExtra);
Allocation outAllocation = Allocation.createFromBitmap(rs, bitmapOut);
// Execute this kernel on two inputs
script.set_extra_alloc(inAllocationExtra);
script.forEach_kernel(inAllocation, outAllocation);
// Get the data back into bitmap
outAllocation.copyTo(bitmapOut);
you want to do something like
rs_allocation input1;
rs_allocation input2;
uchar4 __attribute__((kernel)) kernel() {
... // body of kernel goes here
uchar4 out = ...;
return out;
}
Call set_input1 and set_input2 from your Java code to set those to the appropriate Allocations, then call forEach_kernel with your output Allocation.
This is how you do it :
in the .rs file :
uchar4 RS_KERNEL myKernel(float4 in1, int in2, uint32_t x, uint32_t y)
{
//My code
}
in java :
myScript.forEach_myKernel(allocationInput1, allocationInput2, allocationOutput);
uchar4, float4, and int are used as example. It works for me, you can add more than 2 inputs.

Create image with dots in android [duplicate]

This question already has answers here:
Can I convert an image into a grid of dots?
(3 answers)
Closed 10 years ago.
I would like to create something similar to this question Can I convert an image into a grid of dots? but I cannot find any answer for my problem. The basic idea is to load a picture from the phone and apply this grid of dots. I would appreciate any suggestions to this.
As others may suggest, your problem can also be solved using a fragment shader in OpenGL Shading Language (GLSL). GLSL might require painful setup.
Here is my solution using Android Renderscript (a lot like GLSL, but specifically designed for Android. It isn't used much). First, setup the Renderscript > Hello Compute sample from inside the official Android SDK samples. Next, replace mono.rs with the following:
#pragma version(1)
#pragma rs java_package_name(com.android.example.hellocompute)
rs_allocation gIn;
rs_allocation gOut;
rs_script gScript;
static int mImageWidth;
const uchar4 *gPixels;
const float4 kBlack = {
0.0f, 0.0f, 0.0f, 1.0f
};
// There are two radius's for each circle for anti-aliasing reasons.
const static uint32_t radius = 15;
const static uint32_t smallerRadius = 13;
// Used so that we have smooth circle edges
static float smooth_step(float start_threshold, float end_threshold, float value) {
if (value < start_threshold) {
return 0;
}
if (value > end_threshold) {
return 1;
}
value = (value - start_threshold)/(end_threshold - start_threshold);
// As defined at http://en.wikipedia.org/wiki/Smoothstep
return value*value*(3 - 2*value);
}
void root(const uchar4 *v_in, uchar4 *v_out, uint32_t u_x, uint32_t u_y) {
int32_t diameter = radius * 2;
// Compute distance from center of the circle
int32_t x = u_x % diameter - radius;
int32_t y = u_y % diameter - radius;
float dist = hypot((float)x, (float)y);
// Compute center of the circle
uint32_t center_x = u_x /diameter*diameter + radius;
uint32_t center_y = u_y /diameter*diameter + radius;
float4 centerColor = rsUnpackColor8888(gPixels[center_x + center_y*mImageWidth]);
float amount = smooth_step(smallerRadius, radius, dist);
*v_out = rsPackColorTo8888(mix(centerColor, kBlack, amount));
}
void filter() {
mImageWidth = rsAllocationGetDimX(gIn);
rsForEach(gScript, gIn, gOut); // You may need a forth parameter, depending on your target SDK.
}
Inside HelloCompute.java, replace createScript() with the following:
private void createScript() {
mRS = RenderScript.create(this);
mInAllocation = Allocation.createFromBitmap(mRS, mBitmapIn,
Allocation.MipmapControl.MIPMAP_NONE,
Allocation.USAGE_SCRIPT);
mOutAllocation = Allocation.createTyped(mRS, mInAllocation.getType());
mScript = new ScriptC_mono(mRS, getResources(), R.raw.mono);
mScript.bind_gPixels(mInAllocation);
mScript.set_gIn(mInAllocation);
mScript.set_gOut(mOutAllocation);
mScript.set_gScript(mScript);
mScript.invoke_filter();
mOutAllocation.copyTo(mBitmapOut);
}
The end result will look like this
ALTERNATIVE
If you don't care about having each dot a solid color, you can do the following:
There is a very easy way to do this. You need a BitmapDrawable for the picture and a BitmapDrawable for the overlay tile (lets call it overlayTile). On overlayTile, call
overlayTile.setTileModeX(Shader.TileMode.REPEAT);
overlayTile.setTileModeY(Shader.TileMode.REPEAT);
Next, combine the two Drawable's into a single Drawable using LayerDrawable. You can use the resulting LayerDrawable as src for some ImageView, if you wish. Or, you can convert the Drawable to a Bitmap and save it to disk.
I think studying OpenGL might help in what you want to achieve.
You may want to go through the basics of Displaying Graphics with OpenGL ES
Hope that helps. :)

Categories

Resources