Speex echo cancellation configuration - android

I am making an Android-to-Android VoIP (loudspeaker) app using its AudioRecord and AudioTrack class, along with Speex via NDK to do echo cancellation. I was able to successfully pass into and retrieve data from Speex's speex_echo_cancellation() function, but the echo remains.
Here is the relevant android thread code that is recording/sending and receiving/playing audio:
public MyThread(DatagramSocket socket, int frameSize, int filterLength){
this.socket = socket;
nativeMethod_initEchoState(frameSize, filterLength);
public void run(){
short[] audioShorts, recvShorts, recordedShorts, filteredShorts;
byte[] audioBytes, recvBytes;
int shortsRead;
DatagramPacket packet;
//initialize recorder and player
int samplingRate = 8000;
int managerBufferSize = 2000;
AudioTrack player = new AudioTrack(AudioManager.STREAM_MUSIC, samplingRate, AudioFormat.CHANNEL_OUT_MONO, AudioFormat.ENCODING_PCM_16BIT, managerBufferSize, AudioTrack.MODE_STREAM);
recorder = new AudioRecord(MediaRecorder.AudioSource.MIC, samplingRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, managerBufferSize);
//record first packet
audioShorts = new short[1000];
shortsRead = recorder.read(audioShorts, 0, audioShorts.length);
//convert shorts to bytes to send
audioBytes = new byte[shortsRead*2];
//send bytes
packet = new DatagramPacket(audioBytes, audioBytes.length);
while (!this.isInterrupted()){
//recieve packet/bytes (received audio data should have echo cancelled already)
recvBytes = new byte[2000];
packet = new DatagramPacket(recvBytes, recvBytes.length);
//convert bytes to shorts
recvShorts = new short[packet.getLength()/2];
ByteBuffer.wrap(packet.getData(), 0, packet.getLength()).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(recvShorts);
//play shorts
player.write(recvShorts, 0, recvShorts.length);
//record shorts
recordedShorts = new short[1000];
shortsRead = recorder.read(recordedShorts, 0, recordedShorts.length);
//send played and recorded shorts into speex,
//returning audio data with the echo removed
filteredShorts = nativeMethod_speexEchoCancel(recordedShorts, recvShorts);
//convert filtered shorts to bytes
audioBytes = new byte[shortsRead*2];
//send off bytes
packet = new DatagramPacket(audioBytes, audioBytes.length);
}//end of while loop
Here is the relevant NDK / JNI code:
void nativeMethod_initEchoState(JNIEnv *env, jobject jobj, jint frameSize, jint filterLength){
echo_state = speex_echo_state_init(frameSize, filterLength);
jshortArray nativeMethod_speexEchoCancel(JNIEnv *env, jobject jObj, jshortArray input_frame, jshortArray echo_frame){
//create native shorts from java shorts
jshort *native_input_frame = (*env)->GetShortArrayElements(env, input_frame, NULL);
jshort *native_echo_frame = (*env)->GetShortArrayElements(env, echo_frame, NULL);
//allocate memory for output data
jint length = (*env)->GetArrayLength(env, input_frame);
jshortArray temp = (*env)->NewShortArray(env, length);
jshort *native_output_frame = (*env)->GetShortArrayElements(env, temp, 0);
//call echo cancellation
speex_echo_cancellation(echo_state, native_input_frame, native_echo_frame, native_output_frame);
//convert native output to java layer output
jshortArray output_shorts = (*env)->NewShortArray(env, length);
(*env)->SetShortArrayRegion(env, output_shorts, 0, length, native_output_frame);
//cleanup and return
(*env)->ReleaseShortArrayElements(env, input_frame, native_input_frame, 0);
(*env)->ReleaseShortArrayElements(env, echo_frame, native_echo_frame, 0);
(*env)->ReleaseShortArrayElements(env, temp, native_output_frame, 0);
return output_shorts;
These code runs fine and audio data is definitely being sent/received/processed/played from android-to-android. Given audio sample rate of 8000 Hz and packet size of 2000bytes/1000shorts, I've found that a frameSize of 1000 is needed in order for the played audio to be smooth. Most value of filterLength (aka tail length according to Speex doc) will run, but seems to have no effect on the echo removal.
Does anyone understand enough AEC as to provide me some pointers on implementing or configuring Speex? Thanks for reading.

Your code is right but missing something in native codes, I modified init method and added speex preprocess after echo cancellation, then your code worked well (I tried in windows)
Here is Native Code
#include <jni.h>
#include "speex/speex_echo.h"
#include "speex/speex_preprocess.h"
#include "EchoCanceller_jniHeader.h"
SpeexEchoState *st;
SpeexPreprocessState *den;
JNIEXPORT void JNICALL Java_speex_EchoCanceller_open
(JNIEnv *env, jobject jObj, jint jSampleRate, jint jBufSize, jint jTotalSize)
int sampleRate=jSampleRate;
st = speex_echo_state_init(jBufSize, jTotalSize);
den = speex_preprocess_state_init(jBufSize, sampleRate);
speex_echo_ctl(st, SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRate);
speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_ECHO_STATE, st);
JNIEXPORT jshortArray JNICALL Java_speex_EchoCanceller_process
(JNIEnv * env, jobject jObj, jshortArray input_frame, jshortArray echo_frame)
//create native shorts from java shorts
jshort *native_input_frame = (*env)->GetShortArrayElements(env, input_frame, NULL);
jshort *native_echo_frame = (*env)->GetShortArrayElements(env, echo_frame, NULL);
//allocate memory for output data
jint length = (*env)->GetArrayLength(env, input_frame);
jshortArray temp = (*env)->NewShortArray(env, length);
jshort *native_output_frame = (*env)->GetShortArrayElements(env, temp, 0);
//call echo cancellation
speex_echo_cancellation(st, native_input_frame, native_echo_frame, native_output_frame);
//preprocess output frame
speex_preprocess_run(den, native_output_frame);
//convert native output to java layer output
jshortArray output_shorts = (*env)->NewShortArray(env, length);
(*env)->SetShortArrayRegion(env, output_shorts, 0, length, native_output_frame);
//cleanup and return
(*env)->ReleaseShortArrayElements(env, input_frame, native_input_frame, 0);
(*env)->ReleaseShortArrayElements(env, echo_frame, native_echo_frame, 0);
(*env)->ReleaseShortArrayElements(env, temp, native_output_frame, 0);
return output_shorts;
JNIEXPORT void JNICALL Java_speex_EchoCanceller_close
(JNIEnv *env, jobject jObj)
You can find useful samples such as Encoding, Decoding, Echo Cancellation in speex library's source (http://www.speex.org/downloads/)

Are you properly aligning the far-end signal (what you call recv) and near end signal (what you call record)? There is always some playback/record latency which needs to be accounted for. This generally requires buffering of the far-end signal in a ring buffer for some specified period of time. On PCs this is usually about 50 - 120ms. On Android I suspect it's much higher. Probably in the range of 150 - 400ms. I would recommend using a 100ms taillength with speex and adjusting the size of your far-end buffer until the AEC converges. These changes should allow the AEC to converge, independently of the inclusion of the preprocessor, which is not required here.


Statement has no effect 'AVPacket'

I am developing a Decoder using android NDK and FFmpeg native libraries. I have put Native Support for the project using Android Tools and I have the C code in videodecoder.cpp file. In the file the following function gives me this problem
JNIEXPORT jint Java_ssrp_android_ffmpegdecoder_H264Decoder_consumeNalUnitsFromDirectBuffer(
JNIEnv* env, jobject thiz, jobject nal_units, jint num_bytes,
jlong pkt_pts) {
DecoderContext *ctx = get_ctx(env, thiz);
void *buf = NULL;
if (nal_units == NULL) {
D("Received null buffer, sending empty packet to decoder");
} else {
buf = env->GetDirectBufferAddress(nal_units);
if (buf == NULL) {
D("Error getting direct buffer address");
return -1;
AVPacket packet = {.data = (uint8_t*) buf, .size = num_bytes, .pts = pkt_pts };
int frameFinished = 0;
int res = avcodec_decode_video2(ctx->codec_ctx, ctx->src_frame,&frameFinished, &packet);
if (frameFinished)
ctx->frame_ready = 1;
return res;
At the line AVPacket packet = {.data = (uint8_t*) buf, .size = num_bytes, .pts = pkt_pts };
It says that `Statement has no effect "AVPAcket" and
At the line int res = avcodec_decode_video2(ctx->codec_ctx, ctx->src_frame,&frameFinished, &packet);
It says that Invalid arguments '
Candidates are:
int avcodec_decode_video2(AVCodecContext *, AVFrame *, int *, const AVPacket *)'
The Problem is
AVPacket packet = {.data = (uint8_t*) buf, .size = num_bytes, .pts = pkt_pts }
as the Compiler does not understand the type / initialization.
This leads to the invalid argument error.
Maybe split the line into:
AVPacket packet;
packet.data = (uint8_t*) buf;
packet.size = num_bytes;
packet.pts = pkt_pts;
This should get more clear error output.

How to copy decoded frame from C to Android

I used ffmpeg library to decode the video and got a frame buffer data.
I want to copy the frame buffer into Android byte array (format is RGB565).
How to copy the frame buffer data from C into Android byte array?
Have any one can give me some example or advice?
You could use java.nio.ByteBuffer for that:
ByteBuffer theVideoFrame = ByteBuffer.allocateDirect(frameSize);
And the native code could be something like:
JNIEXPORT void JNICALL Java_blah_blah_blah_CopyFrame(JNIEnv *ioEnv, jobject ioThis, jobject byteBuffer)
char *buffer;
buffer = (char*)(ioEnv->GetDirectBufferAddress(byteBuffer));
if (buffer == NULL) {
__android_log_write(ANDROID_LOG_VERBOSE, "foo", "failed to get NIO buffer address");
memcpy(buffer, theNativeVideoFrame, frameSize);
To copy the data from the ByteBuffer to a byte[] you'd then use something like:

use ffmpeg api to convert audio files. crash on avcodec_encode_audio2

From the examples I got the basic idea of this code.
However I am not sure, what I am missing, as muxing.c demuxing.c and decoding_encoding.c
all use different approaches.
The process of converting an audio file to another file should go roughly like this:
inputfile -demux-> audiostream -read-> inPackets -decode2frames->
-encode2packets-> outPackets -write-> audiostream -mux-> outputfile
However I found the following comment in demuxing.c:
/* Write the raw audio data samples of the first plane. This works
* fine for packed formats (e.g. AV_SAMPLE_FMT_S16). However,
* most audio decoders output planar audio, which uses a separate
* plane of audio samples for each channel (e.g. AV_SAMPLE_FMT_S16P).
* In other words, this code will write only the first audio channel
* in these cases.
* You should use libswresample or libavfilter to convert the frame
* to packed data. */
My questions about this are:
Can I expect a frame that was retrieved by calling one of the decoder functions, f.e.
avcodec_decode_audio4 to hold suitable values to directly put it into an encoder or is
the resampling step mentioned in the comment mandatory?
Am I taking the right approach? ffmpeg is very asymmetric, i.e. if there is a function
open_file_for_input there might not be a function open_file_for_output. Also there are different versions of many functions (avcodec_decode_audio[1-4]) and different naming
schemes, so it's very hard to tell, if the general approach is right, or actually an
ugly mixture of techniques that where used at different version bumps of ffmpeg.
ffmpeg uses a lot of specific terms, like 'planar sampling' or 'packed format' and I am having a hard time, finding definitions for these terms. Is it possible to write working code, without deep knowledge of audio?
Here is my code so far that right now crashes at avcodec_encode_audio2
and I don't know why.
int Java_com_fscz_ffmpeg_Audio_convert(JNIEnv * env, jobject this, jstring jformat, jstring jcodec, jstring jsource, jstring jdest) {
jboolean isCopy;
jclass configClass = (*env)->FindClass(env, "com.fscz.ffmpeg.Config");
jfieldID fid = (*env)->GetStaticFieldID(env, configClass, "ffmpeg_logging", "I");
logging = (*env)->GetStaticIntField(env, configClass, fid);
/// open input
const char* sourceFile = (*env)->GetStringUTFChars(env, jsource, &isCopy);
AVFormatContext* pInputCtx;
AVStream* pInputStream;
open_input(sourceFile, &pInputCtx, &pInputStream);
// open output
const char* destFile = (*env)->GetStringUTFChars(env, jdest, &isCopy);
const char* cformat = (*env)->GetStringUTFChars(env, jformat, &isCopy);
const char* ccodec = (*env)->GetStringUTFChars(env, jcodec, &isCopy);
AVFormatContext* pOutputCtx;
AVOutputFormat* pOutputFmt;
AVStream* pOutputStream;
open_output(cformat, ccodec, destFile, &pOutputCtx, &pOutputFmt, &pOutputStream);
/// decode/encode
error = avformat_write_header(pOutputCtx, NULL);
DIE_IF_LESS_ZERO(error, "error writing output stream header to file: %s, error: %s", destFile, e2s(error));
AVFrame* frame = avcodec_alloc_frame();
DIE_IF_UNDEFINED(frame, "Could not allocate audio frame");
frame->pts = 0;
LOGI("allocate packet");
AVPacket pktIn;
AVPacket pktOut;
int got_frame, got_packet, len, frame_count = 0;
int64_t processed_time = 0, duration = pInputStream->duration;
while (av_read_frame(pInputCtx, &pktIn) >= 0) {
do {
len = avcodec_decode_audio4(pInputStream->codec, frame, &got_frame, &pktIn);
DIE_IF_LESS_ZERO(len, "Error decoding frame: %s", e2s(len));
if (len < 0) break;
len = FFMIN(len, pktIn.size);
size_t unpadded_linesize = frame->nb_samples * av_get_bytes_per_sample(frame->format);
LOGI("audio_frame n:%d nb_samples:%d pts:%s\n", frame_count++, frame->nb_samples, av_ts2timestr(frame->pts, &(pInputStream->codec->time_base)));
if (got_frame) {
do {
pktOut.data = NULL;
pktOut.size = 0;
LOGI("encode frame");
DIE_IF_UNDEFINED(pOutputStream->codec, "no output codec");
DIE_IF_UNDEFINED(frame->nb_samples, "no nb samples");
DIE_IF_UNDEFINED(pOutputStream->codec->internal, "no internal");
LOGI("tests done");
len = avcodec_encode_audio2(pOutputStream->codec, &pktOut, frame, &got_packet);
LOGI("encode done");
DIE_IF_LESS_ZERO(len, "Error (re)encoding frame: %s", e2s(len));
} while (!got_packet);
// write packet;
LOGI("write packet");
/* Write the compressed frame to the media file. */
error = av_interleaved_write_frame(pOutputCtx, &pktOut);
DIE_IF_LESS_ZERO(error, "Error while writing audio frame: %s", e2s(error));
pktIn.data += len;
pktIn.size -= len;
} while (pktIn.size > 0);
LOGI("write trailer");
/// close resources
return 0;
Meanwhile I have figured this out and written an Android Library Project that does this
(for audio files). https://github.com/fscz/FFmpeg-Android
See the file /jni/audiodecoder.c for details

Not able to read audio streams with ffmpeg

I am trying to solve a big problem but stuck with very small issue. I am trying to read audio streams inside a video file with the help of ffmpeg but the loop that should traverse the whole file of streams only runs couple of times. Can not figure out what is the issue as others have used it very similarly.
Following is my code please check:
JNIEXPORT jint JNICALL Java_ru_dzakhov_ffmpeg_test_MainActivity_logFileInfo
(JNIEnv * env,
jobject this,
jstring filename
AVFormatContext *pFormatCtx;
int i,j,k, videoStream, audioStream;
AVCodecContext *pCodecCtx;
AVCodec *pCodec;
AVFrame *pFrame;
AVPacket packet;
int frameFinished;
float aspect_ratio;
AVCodecContext *aCodecCtx;
AVCodec *aCodec;
char *str = (*env)->GetStringUTFChars(env, filename, 0);
// Open video file
if(av_open_input_file(&pFormatCtx, str, NULL, 0, NULL)!=0)
// Retrieve stream information
// Find the first video stream
for(i=0; i<&pFormatCtx->nb_streams; i++) {
LOGI("Audio Stream");
LOGI("Video stream is -1");
LOGI("Audio stream is -1");
return i;}
you may be having issue related to library loading and unloading and how that relates to repeated calls thru jni. Not sure from what your symptom is , but if u have no solution try reading :
and here

Android : Playing MP3 files with AudioTrack using ffmpeg

I have integrated ffmpeg lib in my project and I can also get the information of media files. But now i have to play mp3 files using AudioTrack class in android using ffmpeg lib.
For this I have to pass byte buffer to AudioTrack but I dont know how to get byte buffer from ffmpeg and use it with AudioTrack. I also want to play file instantly without delay.
Here is my audio track code in java :
AudioTrack track;
bufferSize = AudioTrack.getMinBufferSize(44100,AudioFormat.CHANNEL_CONFIGURATION_MONO, AudioFormat.ENCODING_PCM_16BIT)
track = new AudioTrack(AudioManager.STREAM_MUSIC, 44100, AudioFormat.CHANNEL_CONFIGURATION_MONO, AudioFormat.ENCODING_PCM_16BIT, bufferSize, mode);
//Play audio clip
//Copy the decoded raw buffer from native code to "buffer" .....
track.write(buffer, 0, readBytes);
Can anyone please give me the working code to play mp3 files with audio track. I have searched a lot but haven't find any correct answer.
I managed this by creating buffer of audio files and then playing it with AudioTrack class on the fly.
Now i'm trying to pause/stop the audio file cause stopping or pausing AudioTrack is not working.
Here is my code to pass byte buffer to my java class :
#include <assert.h>
#include <jni.h>
#include <string.h>
#include <android/log.h>
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#define LOG_TAG "mylib"
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
#define AUDIO_INBUF_SIZE 20480
void Java_ru_dzakhov_ffmpeg_test_MainActivity_createEngine(JNIEnv* env,
jclass clazz) {
jstring Java_ru_dzakhov_ffmpeg_test_MainActivity_loadFile(JNIEnv* env,
jobject obj, jstring file, jbyteArray array) {
jboolean isfilenameCopy;
const char * filename = (*env)->GetStringUTFChars(env, file,
int audioStreamIndex;
AVCodec *codec;
AVCodecContext *c = NULL;
AVFormatContext * pFormatCtx;
AVCodecContext * aCodecCtx;
int out_size, len, audioStream = -1, i, err;
FILE *f, *outfile;
uint8_t *outbuf;
AVPacket avpkt;
jclass cls = (*env)->GetObjectClass(env, obj);
jmethodID play = (*env)->GetMethodID(env, cls, "playSound", "([BI)V"); //At the begining of your main function
LOGE("source file name is %s", filename);
LOGE("Stage 1");
/* get format somthing of source file to AVFormatContext */
int lError;
if ((lError = av_open_input_file(&pFormatCtx, filename, NULL, 0, NULL))
!= 0) {
LOGE("Error open source file: %d", lError);
if ((lError = av_find_stream_info(pFormatCtx)) < 0) {
LOGE("Error find stream information: %d", lError);
LOGE("Stage 1.5");
LOGE("audio format: %s", pFormatCtx->iformat->name);
LOGE("audio bitrate: %d", pFormatCtx->bit_rate);
audioStreamIndex = av_find_best_stream(pFormatCtx, AVMEDIA_TYPE_AUDIO,
-1, -1, &codec, 0);
LOGE("audio codec: %s", codec->name);
/* get codec somthing of audio stream to AVCodecContext */
aCodecCtx = pFormatCtx->streams[audioStreamIndex]->codec;
if (avcodec_open(aCodecCtx, codec) < 0) {
LOGE("cannot open the audio codec!");
printf("Audio decoding\n");
LOGE("Stage 1.7");
codec = avcodec_find_decoder(aCodecCtx->codec_id);
LOGE("Stage 1.8");
if (!codec) {
LOGE("codec not found\n");
LOGE("Stage 2");
// c= avcodec_alloc_context();
LOGE("Stage 3");
/* open it */
if (avcodec_open(aCodecCtx, codec) < 0) {
LOGE("could upper");
fprintf(stderr, "could not open codec\n");
LOGE("could not open codec");
LOGE("Stage 4");
f = fopen(filename, "rb");
if (!f) {
fprintf(stderr, "could not open %s\n", filename);
LOGE("could not open");
/* decode until eof */
avpkt.data = inbuf;
avpkt.size = fread(inbuf, 1, AUDIO_INBUF_SIZE, f);
LOGE("Stage 5");
while (avpkt.size > 0) {
// LOGE("Stage 6");
out_size = (AVCODEC_MAX_AUDIO_FRAME_SIZE / 3) * 2;
len = avcodec_decode_audio3(aCodecCtx, (int16_t *) outbuf,
&out_size, &avpkt);
LOGE("data_size %d len %d", out_size, len);
if (len < 0) {
fprintf(stderr, "Error while decoding\n");
// LOGE("Stage 7");
if (out_size > 0) {
/* if a frame has been decoded, output it */
// LOGE("Stage 8");
jbyte *bytes = (*env)->GetByteArrayElements(env, array, NULL);
memcpy(bytes, outbuf, out_size); //
(*env)->ReleaseByteArrayElements(env, array, bytes, 0);
(*env)->CallVoidMethod(env, obj, play, array, out_size);
LOGE("Stage 9");
avpkt.size -= len;
avpkt.data += len;
if (avpkt.size < AUDIO_REFILL_THRESH) {
/* Refill the input buffer, to avoid trying to decode
* incomplete frames. Instead of this, one could also use
* a parser, or use a proper container format through
* libavformat. */
memmove(inbuf, avpkt.data, avpkt.size);
avpkt.data = inbuf;
len = fread(avpkt.data + avpkt.size, 1,
AUDIO_INBUF_SIZE - avpkt.size, f);
if (len > 0)
avpkt.size += len;
LOGE("Stage 12");
I have no clue about programming in android, but google just introduced a new low-level media api at I/O 2012
Here's the link to the yt video: http://www.youtube.com/watch?v=YmCqJlzIUXs

