OpenGL ES: Bad performance when calculating vertex position in vertex shader - android

I'm a beginner at OpenGL and I´m trying to animate a numer of "objects" from one position to another every 5 second. If I calculate the position in the vertex shader, the fps drops drastically, shouldn't these type of calculations be done on the GPU?
This is the vertex shader code:
#version 300 es
precision highp float;
precision highp int;
layout(location = 0) in vec3 vertexData;
layout(location = 1) in vec3 colourData;
layout(location = 2) in vec3 normalData;
layout(location = 3) in vec3 personPosition;
layout(location = 4) in vec3 oldPersonPosition;
layout(location = 5) in int start;
layout(location = 6) in int duration;
layout(std140, binding = 0) uniform Matrices
{ //base //offset
mat4 projection; // 64 // 0
mat4 view; // 64 // 0 + 64 = 64
int time; // 4 // 64 + 64 = 128
bool shade; // 4 // 128 + 4 = 132 two empty slots after this
vec3 midPoint; // 16 // 128 + 16 = 144
vec3 cameraPos; // 16 // 144 + 16 = 160
// size = 160+16 = 176. Alligned to 16, becomes 176.
};
out vec3 vertexColour;
out vec3 vertexNormal;
out vec3 fragPos;
void main() {
vec3 scalePos;
scalePos.x = vertexData.x * 3.0;
scalePos.y = vertexData.y * 3.0;
scalePos.z = vertexData.z * 3.0;
vertexColour = colourData;
vertexNormal = normalData;
float startFloat = float(start);
float durationFloat = float(duration);
float timeFloat = float(time);
// Wrap around catch to avoid start being close to 1M but time has wrapped around to 0
if (startFloat > timeFloat) {
startFloat = startFloat - 1000000.0;
}
vec3 movePos;
float elapsedTime = timeFloat - startFloat;
if (elapsedTime > durationFloat) {
movePos = personPosition;
} else {
vec3 moveVector = personPosition - oldPersonPosition;
float moveBy = elapsedTime / durationFloat;
movePos = oldPersonPosition + moveVector * moveBy;
}
fragPos = movePos;
gl_Position = projection * view * vec4(scalePos + movePos, 1.0);
}
Every 5 second the buffers are updated:
glBindBuffer(GL_ARRAY_BUFFER, this->personPositionsVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(float) * this->persons.size() * 3, this->positions, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, this->personOldPositionsVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(float) * this->persons.size() * 3, this->oldPositions, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, this->timeStartVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(int) * this->persons.size(), animiationStart, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, this->timeDurationVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(int) * this->persons.size(), animiationDuration, GL_STATIC_DRAW);
I did a test calculating the positions on the CPU, and updating the positions buffer every draw call, and that doesn't give me a performance drop, but feels fundamentally wrong?
void PersonView::animatePositions() {
float duration = 1500;
double currentTime = now_ms();
double elapsedTime = currentTime - animationStartTime;
if (elapsedTime > duration) {
return;
}
for (int i = 0; i < this->persons.size() * 3; i++) {
float moveDistance = this->positions[i] - this->oldPositions[i];
float moveBy = (float)(elapsedTime / duration);
this->moveByPositions[i] = this->oldPositions[i] + moveDistance * moveBy;
}
glBindBuffer(GL_ARRAY_BUFFER, this->personMoveByPositionsVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(float) * this->persons.size() * 3, this->moveByPositions, GL_STATIC_DRAW);
}
On devices with better SOC:s (Snapdragon 835 etc) the framedrop isn't as drastically as on devices with midrange SOC:s (Snapdragon 625)

Right off the bat, I can see that you're multiplying the projection and view matrices in the vertex shader, but there are no places where you rely on the view or projection matrix independently.
Multiplying two 4x4 matrices results in a very large amount of arithmetic calculations which are done for every vertex you're drawing. In your case - it seems you can avoid this all-together.
Instead of your current implementation - try multiplying the view and proj matrix outside of the shader, then bind the resulting matrix as a single viewProjection matrix:
Old:
gl_Position = projection * view * vec4(scalePos + movePos, 1.0);
New:
gl_Position = projectionView * vec4(scalePos + movePos, 1.0);
This way, the proj and view matrix are multiplied once per frame, instead of once per vertex. This change should drastically improve performance - especially if you have a large amount of vertices.
Generally speaking, the GPU is indeed a lot more efficient then the CPU at performing arithmetic calculations like this, but you should also consider the amount of calculations. The vertex shader is executed per vertex - and should only calculate things that differ between vertices.
Performing a 1-time calculation on the CPU is always better than performing the same calculation on the GPU n-times (n = total vertices).

Related

Uniform value is wrong in adreno 305 qualcom device but right in Adreno Profiler

I´m doing my own game engine. Now, the next step is to build my fragment shader for multiple lighting sources.
I found a very strange behaviour I can't understand. In my Moto G 2014 with an 305 Adreno video chip a glsl length function call gives me an incorrect value over a ambient lighting uniform, resulting in wrong scene lighting.
Let's see for fist the fragment code:
#define numLights x
#pragma glsl
precision lowp float;
struct LightSourceParameters {
vec3 ambient;
vec3 lightColor;
vec4 position;
float spotExponent;
float spotCutoff; // (range: [0.0,90.0], 180.0)
vec3 spotDirection;
float constantAttenuation;
float linearAttenuation;
float quadraticAttenuation;
};
uniform LightSourceParameters LightSource[numLights];
struct MaterialParameters {
vec4 emission;
vec4 ambient;
vec4 diffuse;
vec4 specular;
float shininess;
bool hasDiffuseTexture;
bool hasSpecularTexture;
bool hasEmissionTexture;
bool hasAmbientTexture;
bool hasNormalTexture;
sampler2D emissionTexture;
sampler2D diffuseTexture;
sampler2D specularTexture;
sampler2D ambientTexture;
sampler2D normalTexture;
};
uniform MaterialParameters Material;
precision lowp float;
varying vec2 varyingTextcood;
varying vec3 varyingNormalDirection;
varying vec3 varyingViewDirection;
varying vec3 outLightVector[numLights];
void main()
{
vec3 normalDirection = normalize(varyingNormalDirection);
vec3 viewDirection = normalize(varyingViewDirection);
vec3 lightDirection;
float attenuation;
// initialize total lighting with ambient lighting
vec4 totalLighting;
vec4 emissionTerm;
if ((length(Material.emission) != 0.0) || (Material.hasEmissionTexture)) {
/* El material tiene un termino emisivo, es decir, emite luz. Lo andimos al total de color calculado */
if (!Material.hasEmissionTexture) {
emissionTerm = Material.emission.rgba;
}
else {
emissionTerm = texture2D(Material.emissionTexture, varyingTextcood).rgba;
}
if (emissionTerm.a > 0.0){
totalLighting = emissionTerm;
}
}
for (int index = 0; index < numLights; index++) // for all light sources
{
vec4 ambientalTerm;
vec4 specularReflection;
vec4 diffuseReflection;
if (length(LightSource[index].ambient.rgb) > 0.0){
// es luz ambiental
if (Material.hasAmbientTexture){
ambientalTerm = vec4(LightSource[index].ambient, 1.0) * texture2D(Material.ambientTexture, varyingTextcood);
}
else {
ambientalTerm = vec4(LightSource[index].ambient, 1.0) * vec4(Material.ambient);
}
//totalLighting = vec4(0.0,1.0,0.0,1.0);
}
else {
if (0.0 == LightSource[index].position.w) // directional light?
{
attenuation = 1.0; // no attenuation
lightDirection = normalize(outLightVector[index]);
}
else // point light or spotlight (or other kind of light)
{
vec3 positionToLightSource = outLightVector[index];
float distance = length(positionToLightSource);
lightDirection = normalize(positionToLightSource);
attenuation = 1.0 / (LightSource[index].constantAttenuation
+ LightSource[index].linearAttenuation * distance
+ LightSource[index].quadraticAttenuation * distance * distance);
if (LightSource[index].spotCutoff <= 90.0) // spotlight?
{
float clampedCosine = max(0.0, dot(-lightDirection, normalize(LightSource[index].spotDirection)));
if (clampedCosine < cos(radians(LightSource[index].spotCutoff))) // outside of spotlight cone?
{
attenuation = 0.0;
}
else
{
attenuation = attenuation * pow(clampedCosine, LightSource[index].spotExponent);
}
}
}
vec4 diffuseTerm;
if (Material.hasDiffuseTexture){
diffuseTerm = texture2D(Material.diffuseTexture,varyingTextcood);
}
else {
diffuseTerm = Material.diffuse;
}
if (diffuseTerm.a > 0.0){
diffuseReflection = attenuation
* vec4(LightSource[index].lightColor, 1.0) * diffuseTerm
* max(0.0, dot(normalDirection, lightDirection));
}
if (dot(normalDirection, lightDirection) < 0.0) // light source on the wrong side?
{
specularReflection = vec4(0.0, 0.0, 0.0, 0.0); // no specular reflection
}
else // light source on the right side
{
vec4 specularTerm;
if (Material.hasSpecularTexture){
specularTerm = texture2D(Material.specularTexture,varyingTextcood);
}
else {
specularTerm = Material.specular;
}
if (specularTerm.a > 0.0){
/* OPCION SIN HALFVECTOR
specularReflection = attenuation * vec4(LightSource[index].lightColor,1.0) * specularTerm
* pow(max(0.0, dot(reflect(-lightDirection, normalDirection), viewDirection)), Material.shininess);
*/
// OPCION CON HALFVECTOR
vec3 light_half_vector = normalize(outLightVector[index] + viewDirection);
specularReflection = attenuation * vec4(LightSource[index].lightColor,1.0) * specularTerm
* pow(max(dot(light_half_vector, normalDirection), 0.0), Material.shininess);
}
}
}
totalLighting = totalLighting + ambientalTerm + diffuseReflection + specularReflection;
}
gl_FragColor = clamp(totalLighting, 0.0, 1.0);
}
Well, inside the main function, if we look at the foor loop, we have this line:
if (length(LightSource[index].ambient.rgb) > 0.0){
I found that debbuging with my Moto G, the shader always enter inside this statement, no matter that the scene hasn't an ambient light source. I can test this easyly writing totalLighting = vec4(1.0) inside the if branch.
This is not happening in the Adreno Profiler. I can't understand what's wrong because the profiler connects to the Moto G GPU and retrieves all the uniforms value, and ambient is a vec3 of 0.0 values. Even if I take a screen capture of the same device in the profiler I get the expected lighting behaviour in the profiler and a wrong behaviour in the phone. Shouldn't it be the same in both sites if they are connected?
As a curiosity, if I change the order of the declarations inside the LightSourceParameters I get very diferent results, and I can't understand why. For example, take a look at he screen capture I upload, all the scene gets red that is the color I'm using to clear the screen when the scene has been rendered with
GLES20.glClear(GLES20.GL_DEPTH_BUFFER_BIT | GLES20.GL_COLOR_BUFFER_BIT);
It's red for debbuging prupourses.
This is the original image in the moto g phone, in the usual declaration of the uniforms I have:
The next one is the capture I get if I move the vec3 ambient declaration to the end of the LightSourceParameter struct:
And this is the profiler capture where you can see the value of the uniform. Doesn't matter if the ambient declaration is at the begining of the struct or the end, the result is the same, as I would expect in the phone:
Does somebody knows what is wrong here or what I'm missunderstanding ?
Edit 1:
Commenting out the if statement of the ambient light:
//if (0.0 < length(LightSource[index].ambient)){
I allow the flow to go ahead and calculate the diffuse and specular light/material data. This is not optimal, but it´s a way to debug so I´m going to use it for now. The scene keeps black except the yellow sun rays (like the first image) until I substitute the light color in the diffuse calculation with a vec3(1.0) in this way:
/*
diffuseReflection = attenuation
* vec4(LightSource[index].lightColor, 1.0) * diffuseTerm
* max(0.0, dot(normalDirection, lightDirection));
*/
diffuseReflection = attenuation
* vec4(1.0,1.0,1.0, 1.0) * diffuseTerm
* max(0.0, dot(normalDirection, lightDirection));
This way the diffuse term is calculated as if It was being done in the Adreno profiler and the image is rendered well. So my supossition is that the full array of light struct has garbage in it or wrong data, but I cant understand why this is only happening in the moto g.

GLSL ES fragment shader produces very different results on different devices

I am developing a game for Android using OpenGL ES 2.0 and have a problem with a fragment shader for drawing stars in the background. I've got the following code:
precision mediump float;
varying vec2 transformed_position;
float rand(vec2 co) {
return fract(sin(dot(co.xy ,vec2(12.9898,78.233))) * 43758.5453);
}
void main(void) {
float distance = 10.0;
float quantization_strength = 4.0;
vec3 background_color = vec3(0.09, 0.0, 0.288);
vec2 zero = vec2(0.0, 0.0);
vec2 distance_vec = vec2(distance, distance);
vec2 quantization_vec = vec2(quantization_strength, quantization_strength);
vec2 new_vec = floor(transformed_position / quantization_vec) * quantization_vec;
if(all(equal(mod(new_vec, distance_vec), zero))) {
float rand_val = rand(new_vec);
vec3 current_color = background_color * (1.0 + rand_val);
gl_FragColor = vec4(current_color.x, current_color.y, current_color.z, 1.0);
} else {
gl_FragColor = vec4(background_color.x, background_color.y, background_color.z, 1.0 );
}
}
My aim is to 'quantize' the fragment coordinates, so 'stars' are not 1px in size, and then light up quantized pixels that are distant enough by a random amount. This code, however, produces different results depending on where it is executed. I have used GLSL Sandbox (http://glsl.heroku.com), Nexus 7 and HTC Desire S to create comparison:
As you can see, GLSL Sandbox produces dense grid with many stars visible. On Nexus 7 stars are much fewer and distributed along lines (which may be not obvious on this small image) - the rand function does not work as expected. Desire S draws no stars at all.
Why does the rand function work so strangely on Nexus 7 (if I modify the vector used for dot product, stars are distributed along lines at different angle)? And what might cause Desire S not to render the stars?
I would also appreciate any optimization tips for this shader, as I am very inexperienced with GLSL. Or perhaps there is better way to draw 'stars' via fragment shader?
UPDATE
I changed the code to this (I used http://glsl.heroku.com/e#9364.0 as reference):
precision mediump float;
varying highp vec2 transformed_position;
highp float rand(vec2 co) {
highp float a = 1e3;
highp float b = 1e-3;
highp float c = 1e5;
return fract(sin((co.x+co.y*a)*b)*c);
}
void main(void) {
float size = 15.0;
float prob = 0.97;
lowp vec3 background_color = vec3(0.09, 0.0, 0.288);
highp vec2 world_pos = transformed_position;
vec2 pos = floor(1.0 / size * world_pos);
float color = 0.0;
highp float starValue = rand(pos);
if(starValue > prob) {
vec2 center = size * pos + vec2(size, size) * 0.5;
float xy_dist = abs(world_pos.x - center.x) * abs(world_pos.y - center.y) / 5.0;
color = 0.6 - distance(world_pos, center) / (0.5 * size) * xy_dist;
}
if(starValue < prob || color < 0.0) {
gl_FragColor = vec4(background_color, 1.0);
} else {
float starIntensity = fract(100.0 * starValue);
gl_FragColor = vec4(background_color * (1.0 + color * 3.0 * starIntensity), 1.0);
}
}
Desire S now gives me very nice, uniformly distributed stars. But the problem with Nexus 7 is still there. With prob = 0.97, no stars are displayed, and with very low prob = 0.01, they appear very sparsely placed along horizontal lines. Why does Tegra 3 behave so strangely?

GLSL performance issue in fragment shader on vec4 add

I have a fairly simple fragment shader used to handle a situation with multiple lights (code below trimmed down for clarity, only two lights shown).
The broad idea is to sum the various lighting contributions for each fragment, and it work fine, however I have found that it is unstably so on my hardware (Android HTX Desire X).
Measuring FPS, it becomes apparent that there is a single vec4 addition line which is causing the FPS to drop by 10.
What could be causing this performance hit on such an apparently simple operation?
void main (void)
{
vec4 v = u_ViewModelMatrix * vec4(v_Vertex, 1.0);
vec3 nv = normalize(-v.xyz);
vec3 normalVector = normalize((u_ViewModelTransposeMatrix * vec4(normalize(v_Normal), 0.0)).xyz);
vec4 finalColour = vec4(0.0, 0.0, 0.0, 1.0);
// LIGHT 0
lightPosition = vec4(u_LightData[2], u_LightData[3], u_LightData[4], 1);
lightColour = vec4(u_LightData[5], u_LightData[6], u_LightData[7], 1.0) * u_LightData[0];
lightVector = normalize((u_ViewMatrix * lightPosition).xyz - v.xyz);
halfwayVector = normalize(lightVector + nv);
facing = dot(normalVector, lightVector);
if (facing >= 0.0) {
finalColour = finalColour + diffuseColour * facing * lightColour;
}
// LIGHT 1
lightPosition = vec4(u_LightData[LIGHTS_FLOATS_PER_LIGHT*1+2],
u_LightData[LIGHTS_FLOATS_PER_LIGHT*1+3],
u_LightData[LIGHTS_FLOATS_PER_LIGHT*1+4],
1);
lightColour = vec4(u_LightData[LIGHTS_FLOATS_PER_LIGHT*1+5],
u_LightData[LIGHTS_FLOATS_PER_LIGHT*1+6],
u_LightData[LIGHTS_FLOATS_PER_LIGHT*1+7],
1.0) * u_LightData[LIGHTS_FLOATS_PER_LIGHT*1];
lightVector = normalize((u_ViewMatrix * lightPosition).xyz - v.xyz);
halfwayVector = normalize(lightVector + nv);
facing = dot(normalVector, lightVector);
if (facing >= 0.01) {
vec4 qwe = diffuseColour * facing * lightColour;
// HERE .............
finalColour = finalColour + qwe; // takes 10 fps
// HERE ^^^^^^^^^^^^^
}
gl_FragColor = finalColour;
}
Branching causes this. Avoid using ifs and for loops.
Replace
if (facing >= 0.0) {
finalColour = finalColour + diffuseColour * facing * lightColour;
}
with
finalColour += max(0.0, facing) * diffuseColour * lightColour;
and
if (facing >= 0.01) {
vec4 qwe = diffuseColour * facing * lightColour;
// HERE .............
finalColour = finalColour + qwe; // takes 10 fps
// HERE ^^^^^^^^^^^^^
}
with
finalColour += step(0.01, facing) * facing * diffuseColour * lightColour;
Don't worry if you will be calculating some values even when you don't need it. Since shaders are executed in parallel you can't get much faster than the slowest instance.
Also you should move as many things as possible to the vertex shader since it'll be executed just once for every vertex vs for every pixel in the fragment shader; basically you calculate everything that (tri)interpolates well in vertex shader and pass it as varyings:
Position and color of the lights
Vectors L, V and H (in this example at least)

Simple textured quad rotation in OpenGL ES 2.0

Edit 6 - Complete re-write in relation to comments/ongoing research
Edit 7 - Added projection / view matrix.....
As I'm not getting far with this, I added view/projection matrix from the Google demo - please see code below: If anyone can point out where I'm going wrong it really would be appreciated, as I'm still getting a blank screen when I put ""gl_position = a_position * uMVPMatrix;" + into my vertex shader (with "gl_position = a_position;" + my quad is displayed at least.......)
Declared at class level: (Quad class)
private final float[] rotationMat = new float[16];
private FloatBuffer flotRotBuf;
ByteBuffer rotBuf;
private int muRotationHandle = -1; // Handle to the rotation matrix in the vertex shader called "uRotate"
Declared at class lever: (Renderer class)
private final float[] mVMatrix = new float[16];
private final float[] mProjMatrix = new float[16];
private final float[] mMVPMatrix = new float[16];
Routine that sets texture and does (or is supposed to do) rotation (This is in my Quad class
public void setTexture(GLSurfaceView view, Bitmap imgTexture, float[] mvpMatrix){
this.imgTexture=imgTexture;
// get handle to shape's transformation matrix
mMVPMatrixHandle = GLES20.glGetUniformLocation(iProgId, "uMVPMatrix");
// Apply the projection and view transformation
GLES20.glUniformMatrix4fv(mMVPMatrixHandle, 1, false, mvpMatrix, 0);
// Matrix.setRotateM(rotationMat, 0, 45f, 0, 0, 1.0f); //Set rotation matrix with angle and (z) axis
// rotBuf = ByteBuffer.allocateDirect(rotationMat.length * 4);
// use the device hardware's native byte order
// rotBuf.order(ByteOrder.nativeOrder());
// create a floating point buffer from the ByteBuffer
// flotRotBuf = rotBuf.asFloatBuffer();
// add the coordinates to the FloatBuffer
// flotRotBuf.put(rotationMat);
// set the buffer to read the first coordinate
// flotRotBuf.position(0);
// muRotationHandle = GLES20.glGetUniformLocation(iProgId, "uRotation"); // grab the variable from the shader
// GLES20.glUniformMatrix4fv(muRotationHandle, 1, false, flotRotBuf); //Pass floatbuffer contraining rotation matrix info into vertex shader
//GLES20.glUniformMatrix4fv(muRotationHandle, 1, false, rotationMat, 1); //Also tried this ,not use floatbuffer
//Vertex shader
String strVShader =
// "uniform mat4 uRotation;" +
"uniform mat4 uMVPMatrix;" +
"attribute vec4 a_position;\n"+
"attribute vec2 a_texCoords;" +
"varying vec2 v_texCoords;" +
"void main()\n" +
"{\n" +
"gl_Position = a_Position * uMVPMatrix;"+ //This is where it all goes wrong....
"v_texCoords = a_texCoords;" +
"}";
//Fragment shader
String strFShader =
"precision mediump float;" +
"varying vec2 v_texCoords;" +
"uniform sampler2D u_baseMap;" +
"void main()" +
"{" +
"gl_FragColor = texture2D(u_baseMap, v_texCoords);" +
"}";
iProgId = Utils.LoadProgram(strVShader, strFShader);
iBaseMap = GLES20.glGetUniformLocation(iProgId, "u_baseMap");
iPosition = GLES20.glGetAttribLocation(iProgId, "a_position");
iTexCoords = GLES20.glGetAttribLocation(iProgId, "a_texCoords");
texID = Utils.LoadTexture(view, imgTexture);
}
From my renderer class:
public void onSurfaceChanged(GL10 gl, int width, int height) {
// TODO Auto-generated method stub
//Set viewport size based on screen dimensions
GLES20.glViewport(0, 0, width, height);
float ratio = (float) width / height;
Matrix.frustumM(mProjMatrix, 0, -ratio, ratio, -1, 1, 3, 7);
}
public void onDrawFrame(GL10 gl) {
// TODO Auto-generated method stub
//Paint the screen the colour defined in onSurfaceCreated
GLES20.glClear(GLES20.GL_COLOR_BUFFER_BIT);
// Set the camera position (View matrix)
Matrix.setLookAtM(mVMatrix, 0, 0, 0, -3, 0f, 0f, 0f, 0f, 1.0f, 0.0f);
// Calculate the projection and view transformation
Matrix.multiplyMM(mMVPMatrix, 0, mProjMatrix, 0, mVMatrix, 0);
quad1.setTexture(curView, myBitmap, mMVPMatrix); //SetTexture now modified to take a float array (See above) - Note I know it's not a good idea to have this in my onDrawFrame method - will move it once I have it working!
quad1.drawBackground();
}
I've now removed all rotation related stuff and am now just attempting to get a static quad to display after applying the uMVPMatrix in the vertex shader. But still nothing :-(
If I simply change that line back to the 'default' :
"gl_Position = a_position;\n"+
Then I at least get my textured quad displayed (Obviously no rotation and I would expect that).
Also just to point out, that mvpMatrix is definately being received intact into the setTexture method is valid (contains the same data as appears when I log the contents of mvpMatrix from the Google developers code). I'm not sure how to check if the shader is receiving it intact? I have no reason to believe it isn't though.
Really do appreciate and and all help - I must be going very wrong somewhere but I just can't spot it. Thank you!
EDIT 2: Having added a bounty to this question, I would just like to know how how to rotate my textured quad sprite (2D) keeping the code I have to render it as a base. (ie, what do I need to add to it in order to rotate and why). Thanks!
EDIT 3 N/A
EDIT 4 Re-worded / simplified question
EDIT 5 Added error screenshot
Edit: Edited to support Java using Android SDK.
As Tobias indicated, the idiomatic solution to any vertex transformation in OpenGL is accomplished through the use of matrix operations. If you plan to continue developing with OpenGL, it is important that you (eventually) understand the underlying linear algebra involved in matrix operations, but it is often best to utilize a math library for abstracting linear algebra computation into a more readable format. Under the android environment, you should manipulate float arrays with the [matrix][1] class to create a rotation matrix like this:
// initialize rotation matrix
float[16] rotationMat;
Matrix.setIdentityM(rotationMat,0);
// angle in degrees to rotate
float angle = 90;
// axis to rotate about (z axis in your case)
float[3] axis = { 0.0,0.0,1.0};
// For your case, rotate angle (in degrees) about the z axis.
Matrix.rotateM(rotationMat,0,angle,axis[0],axis[1],axis[2]);
Then you can bind the rotation Matrix to a shader program like this:
// assuming shader program is currently bound ...
GLES20.glUniformMatrix4fv(GLES20.glGetUniformLocation(shaderProgramID, "uRotation"), 1, GL_FALSE, rotationMat);
Where your vertex shader (of the program being passed rotationMat) would look something like:
precision mediump float;
uniform mat4 uMVPMatrix;
uniform mat4 uRotation;
attribute vec2 a_texCoords;
attribute vec3 a_position;
varying v_texCoord;
void main(void)
{
v_texCoord = a_texCoords;
gl_Position = uMVPMatrix* uRotation * vec4(a_position,1.0f);
}
Alternatively, you could premultiply uMVPMatrix* uRotation outside of this shader program and pass the result to your shader program to avoid excessive duplicate computation.
Once you are comfortable using this higher level API for matrix operations you can investigate how the internal operation is performed by reading this fantastic tutorial written by Nicol Bolas.
Rotation matrix for rotation around z:
cos a -sin a 0
sin a cos a 0
0 0 1
How to remember how to construct it:
a is the angle in radians, for a = 0 the matrix yields the identity-matrix. cos has to be on the diagonal. There has to be one sign in front of one sin, switching the signs inverses the rotation's direction.
Likewise rotations around x and y can be constructed:
1 0 0
0 cos a sin a
0 -sin a cos a
cos a 0 sin a
0 1 0
-sin a 0 cos a
If you are not familiar with matrix-arithmetic, here is some code:
for (int i=0; i<4; i++) {
vertices_new[i*5+0] = cos(a) * vertices[i*5+0] - sin(a) * vertices[i*5+1]; // cos(a) * v[i].x - sin(a) * v[i].y + 0 * v[i].z
vertices_new[i*5+1] = sin(a) * vertices[i*5+0] + cos(a) * vertices[i*5+1]; // sin(a) * v[i].x + cos(a) * v[i].y + 0 * v[i].z
vertices_new[i*5+2] = vertices[i*5+2]; // 0 * v[i].x + 0 * v[i].y + 1 * v[i].z
vertices_new[i*5+3] = vertices[i*5+3]; // copy texture u
vertices_new[i*5+4] = vertices[i*5+4]; // copy texture v
}

Fragment shader with multiple floating operations slow on Xoom (tegra2)

I am trying to make "burning star" impression on android game I have been developing with little help of noise function (simplex noise in this case). Unfortunately I cant use 3d textures as they are in gles extension and android packages doesn't have them included.
Only option left for me is therefore calculating noise function in fragment shader. Code provided below runs smoothly or acceptably (20-60fps) on HTC Desire Z andLG optimus one. With same program on Motorola XOOM (which have tegra2 chipset) however I get fraction(1-3) of fps even when displaying only small part of object.
Thing we tried so far:
meddling with precision(lowp-higp), both in first line directive and specifying for each occurrence of float/vec separately
commenting parts of noise function - it seem that there isn't any particular bottleneck, its combination of all things together
googling problems related to tegra, floating point in shaders etc
This is stripped down part of code needed for reproduction of this behavior. Note that on XOOM there are some artifacts which we believe is caused by 16bit floating operations in tegra.
precision mediump float;
#define pi 3.141592653589793238462643383279
//
// Description : Array and textureless GLSL 2D/3D/4D simplex
// noise functions.
// Author : Ian McEwan, Ashima Arts.
// Maintainer : ijm
// Lastmod : 20110822 (ijm)
// License : Copyright (C) 2011 Ashima Arts. All rights reserved.
// Distributed under the MIT License. See LICENSE file.
// https://github.com/ashima/webgl-noise
//
vec3 mod289(vec3 x) {
return x - floor(x * (1.0 / 289.0)) * 289.0;
}
vec4 mod289(vec4 x) {
return x - floor(x * (1.0 / 289.0)) * 289.0;
}
vec4 permute(vec4 x) {
return mod289(((x*34.0)+1.0)*x);
}
vec4 taylorInvSqrt(vec4 r)
{
return 1.79284291400159 - 0.85373472095314 * r;
}
float snoise(vec3 v)
{
const vec2 C = vec2(1.0/6.0, 1.0/3.0) ;
const vec4 D = vec4(0.0, 0.5, 1.0, 2.0);
// First corner
vec3 i = floor(v + dot(v, C.yyy) );
vec3 x0 = v - i + dot(i, C.xxx) ;
// Other corners
vec3 g = step(x0.yzx, x0.xyz);
vec3 l = 1.0 - g;
vec3 i1 = min( g.xyz, l.zxy );
vec3 i2 = max( g.xyz, l.zxy );
// x0 = x0 - 0.0 + 0.0 * C.xxx;
// x1 = x0 - i1 + 1.0 * C.xxx;
// x2 = x0 - i2 + 2.0 * C.xxx;
// x3 = x0 - 1.0 + 3.0 * C.xxx;
vec3 x1 = x0 - i1 + C.xxx;
vec3 x2 = x0 - i2 + C.yyy; // 2.0*C.x = 1/3 = C.y
vec3 x3 = x0 - D.yyy; // -1.0+3.0*C.x = -0.5 = -D.y
// Permutations
i = mod289(i);
vec4 p = permute( permute( permute(
i.z + vec4(0.0, i1.z, i2.z, 1.0 ))
+ i.y + vec4(0.0, i1.y, i2.y, 1.0 ))
+ i.x + vec4(0.0, i1.x, i2.x, 1.0 ));
// Gradients: 7x7 points over a square, mapped onto an octahedron.
// The ring size 17*17 = 289 is close to a multiple of 49 (49*6 = 294)
float n_ = 0.142857142857; // 1.0/7.0
vec3 ns = n_ * D.wyz - D.xzx;
vec4 j = p - 49.0 * floor(p * ns.z * ns.z); // mod(p,7*7)
vec4 x_ = floor(j * ns.z);
vec4 y_ = floor(j - 7.0 * x_ ); // mod(j,N)
vec4 x = x_ *ns.x + ns.yyyy;
vec4 y = y_ *ns.x + ns.yyyy;
vec4 h = 1.0 - abs(x) - abs(y);
vec4 b0 = vec4( x.xy, y.xy );
vec4 b1 = vec4( x.zw, y.zw );
//vec4 s0 = vec4(lessThan(b0,0.0))*2.0 - 1.0;
//vec4 s1 = vec4(lessThan(b1,0.0))*2.0 - 1.0;
vec4 s0 = floor(b0)*2.0 + 1.0;
vec4 s1 = floor(b1)*2.0 + 1.0;
vec4 sh = -step(h, vec4(0.0));
vec4 a0 = b0.xzyw + s0.xzyw*sh.xxyy ;
vec4 a1 = b1.xzyw + s1.xzyw*sh.zzww ;
vec3 p0 = vec3(a0.xy,h.x);
vec3 p1 = vec3(a0.zw,h.y);
vec3 p2 = vec3(a1.xy,h.z);
vec3 p3 = vec3(a1.zw,h.w);
//Normalise gradients
vec4 norm = taylorInvSqrt(vec4(dot(p0,p0), dot(p1,p1), dot(p2, p2), dot(p3,p3)));
p0 *= norm.x;
p1 *= norm.y;
p2 *= norm.z;
p3 *= norm.w;
// Mix final noise value
vec4 m = max(0.6 - vec4(dot(x0,x0), dot(x1,x1), dot(x2,x2), dot(x3,x3)), 0.0);
m = m * m;
return 42.0 * dot( m*m, vec4( dot(p0,x0), dot(p1,x1),
dot(p2,x2), dot(p3,x3) ) );
}
uniform vec3 color1;
uniform vec3 color2;
uniform float t;
varying vec3 vTextureCoord;
void main()
{
float t = 0.5; //mod(t, 3.0);
float x = (vTextureCoord.x)*2.0;
float y = -(vTextureCoord.y)*2.0;
float r = sqrt(x * x + y * y);
gl_FragColor = vec4(0.0,0.0,0.0,0.0);
if(r<= 1.0){
float n = snoise( vec3(vec2(x,y), Mr_T*3.3 ) );
gl_FragColor = vec4( mix(color1,color2, abs(n) ) ,1.0);
}
}
I was facing the same problem for tegra2 a while ago. Read 1.3 : http://www.opengl.org/wiki/GLSL_:_common_mistakes. and feel the pain. In my case fps went up twice but it still sucked.

Categories

Resources