Related
I am encoding raw data on Android using ffmpeg libraries. The native code reads the audio data from an external device and encodes it into AAC format in an mp4 container. I am finding that the audio data is successfully encoded (I can play it with Groove Music, my default Windows audio player). But the metadata, as reported by ffprobe, has an incorrect duration of 0.05 secs - it's actually several seconds long. Also the bitrate is reported wrongly as around 65kbps even though I specified 192kbps.
I've tried recordings of various durations but the result is always similar - the (very small) duration and bitrate. I've tried various other audio players such as Quicktime but they play only the first 0.05 secs or so of the audio.
I've removed error-checking from the following. The actual code checks every call and no problems are reported.
Initialisation:
void AudioWriter::initialise( const char *filePath )
{
AVCodecID avCodecID = AVCodecID::AV_CODEC_ID_AAC;
int bitRate = 192000;
char *containerFormat = "mp4";
int sampleRate = 48000;
int nChannels = 2;
mAvCodec = avcodec_find_encoder(avCodecID);
mAvCodecContext = avcodec_alloc_context3(mAvCodec);
mAvCodecContext->codec_id = avCodecID;
mAvCodecContext->codec_type = AVMEDIA_TYPE_AUDIO;
mAvCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;
mAvCodecContext->bit_rate = bitRate;
mAvCodecContext->sample_rate = sampleRate;
mAvCodecContext->channels = nChannels;
mAvCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;
avcodec_open2( mAvCodecContext, mAvCodec, nullptr );
mAvFormatContext = avformat_alloc_context();
avformat_alloc_output_context2(&mAvFormatContext, nullptr, containerFormat, nullptr);
mAvFormatContext->audio_codec = mAvCodec;
mAvFormatContext->audio_codec_id = avCodecID;
mAvOutputStream = avformat_new_stream(mAvFormatContext, mAvCodec);
avcodec_parameters_from_context(mAvOutputStream->codecpar, mAvCodecContext);
if (!(mAvFormatContext->oformat->flags & AVFMT_NOFILE))
{
avio_open(&mAvFormatContext->pb, filePath, AVIO_FLAG_WRITE);
}
if ( mAvFormatContext->oformat->flags & AVFMT_GLOBALHEADER )
{
mAvCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
avformat_write_header(mAvFormatContext, NULL);
mAvAudioFrame = av_frame_alloc();
mAvAudioFrame->nb_samples = mAvCodecContext->frame_size;
mAvAudioFrame->format = mAvCodecContext->sample_fmt;
mAvAudioFrame->channel_layout = mAvCodecContext->channel_layout;
av_samples_get_buffer_size(NULL, mAvCodecContext->channels, mAvCodecContext->frame_size,
mAvCodecContext->sample_fmt, 0);
av_frame_get_buffer(mAvAudioFrame, 0);
av_frame_make_writable(mAvAudioFrame);
mAvPacket = av_packet_alloc();
}
Encoding:
// SoundRecording is a custom class with the raw samples to be encoded
bool AudioWriter::encodeToContainer( SoundRecording *soundRecording )
{
int ret;
int frameCount = mAvCodecContext->frame_size;
int nChannels = mAvCodecContext->channels;
float *buf = new float[frameCount*nChannels];
while ( soundRecording->hasReadableData() )
{
//Populate the frame
int samplesRead = soundRecording->read( buf, frameCount*nChannels );
// Planar data
int nFrames = samplesRead/nChannels;
for ( int i = 0; i < nFrames; ++i )
{
for (int c = 0; c < nChannels; ++c )
{
samples[c][i] = buf[nChannels*i +c];
}
}
// Fill a gap at the end with silence
if ( samplesRead < frameCount*nChannels )
{
for ( int i = samplesRead; i < frameCount*nChannels; ++i )
{
for (int c = 0; c < nChannels; ++c )
{
samples[c][i] = 0.0;
}
}
}
encodeFrame( mAvAudioFrame ) )
}
finish();
}
bool AudioWriter::encodeFrame( AVFrame *frame )
{
//send the frame for encoding
int ret;
if ( frame != nullptr )
{
frame->pts = mAudFrameCounter++;
}
avcodec_send_frame(mAvCodecContext, frame );
while (ret >= 0)
{
ret = avcodec_receive_packet(mAvCodecContext, mAvPacket);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF )
{
break;
}
else
if (ret < 0) {
return false;
}
av_packet_rescale_ts(mAvPacket, mAvCodecContext->time_base, mAvOutputStream->time_base);
mAvPacket->stream_index = mAvOutputStream->index;
av_interleaved_write_frame(mAvFormatContext, mAvPacket);
av_packet_unref(mAvPacket);
}
return true;
}
void AudioWriter::finish()
{
// Flush by sending a null frame
encodeFrame( nullptr );
av_write_trailer(mAvFormatContext);
}
Since the resultant file contains the recorded music, the code to manipulate the audio data seems to be correct (unless I am overwriting other memory somehow).
The inaccurate duration and bitrate suggest that information concerning time is not being properly managed. I set the pts of the frames using a simple increasing integer. I'm unclear what the code that sets the timestamp and stream index achieves - and whether it's even necessary: I copied it from supposedly working code but I've seen other code without it.
Can anyone see what I'm doing wrong?
The timestamp need to be correct. Set the time_base to 1/sample_rate and increment the timestamp by 1024 each frame. Note: 1024 is aac specific. If you change codecs, you need to change the frame size.
Using FFmpeg 4.0.2 and call its ffmpeg.c's main function twice causes Android app crash (using FFmpeg shared libs and JNI)
A/libc: Fatal signal 11 (SIGSEGV), code 1, fault addr 0x0 in tid 20153
Though it works ok for FFmpeg 3.2.5
FFmpeg 4.0.2 main
int main(int argc, char **argv) {
int i, ret;
int64_t ti;
init_dynload();
register_exit(ffmpeg_cleanup);
setvbuf(stderr,NULL,_IONBF,0); /* win32 runtime needs this */
av_log_set_flags(AV_LOG_SKIP_REPEATED);
parse_loglevel(argc, argv, options);
if(argc>1 && !strcmp(argv[1], "-d")){
run_as_daemon=1;
av_log_set_callback(log_callback_null);
argc--;
argv++;
}
#if CONFIG_AVDEVICE
avdevice_register_all();
#endif
avformat_network_init();
show_banner(argc, argv, options);
/* parse options and open all input/output files */
ret = ffmpeg_parse_options(argc, argv);
if (ret < 0)
exit_program(1);
if (nb_output_files <= 0 && nb_input_files == 0) {
show_usage();
av_log(NULL, AV_LOG_WARNING, "Use -h to get full help or, even better, run 'man %s'\n", program_name);
exit_program(1);
}
/* file converter / grab */
if (nb_output_files <= 0) {
av_log(NULL, AV_LOG_FATAL, "At least one output file must be specified\n");
exit_program(1);
}
// if (nb_input_files == 0) {
// av_log(NULL, AV_LOG_FATAL, "At least one input file must be specified\n");
// exit_program(1);
// }
for (i = 0; i < nb_output_files; i++) {
if (strcmp(output_files[i]->ctx->oformat->name, "rtp"))
want_sdp = 0;
}
current_time = ti = getutime();
if (transcode() < 0)
exit_program(1);
ti = getutime() - ti;
if (do_benchmark) {
av_log(NULL, AV_LOG_INFO, "bench: utime=%0.3fs\n", ti / 1000000.0);
}
av_log(NULL, AV_LOG_DEBUG, "%"PRIu64" frames successfully decoded, %"PRIu64" decoding errors\n",
decode_error_stat[0], decode_error_stat[1]);
if ((decode_error_stat[0] + decode_error_stat[1]) * max_error_rate < decode_error_stat[1])
exit_program(69);
ffmpeg_cleanup(received_nb_signals ? 255 : main_return_code);
return main_return_code;
}
FFmpeg 3.2.5 main
int main(int argc, char **argv) {
av_log(NULL, AV_LOG_WARNING, " Command start");
int i, ret;
int64_t ti;
init_dynload();
register_exit(ffmpeg_cleanup);
setvbuf(stderr, NULL, _IONBF, 0); /* win32 runtime needs this */
av_log_set_flags(AV_LOG_SKIP_REPEATED);
parse_loglevel(argc, argv, options);
if (argc > 1 && !strcmp(argv[1], "-d")) {
run_as_daemon = 1;
av_log_set_callback(log_callback_null);
argc--;
argv++;
}
avcodec_register_all();
#if CONFIG_AVDEVICE
avdevice_register_all();
#endif
avfilter_register_all();
av_register_all();
avformat_network_init();
av_log(NULL, AV_LOG_WARNING, " Register to complete the codec");
show_banner(argc, argv, options);
/* parse options and open all input/output files */
ret = ffmpeg_parse_options(argc, argv);
if (ret < 0)
exit_program(1);
if (nb_output_files <= 0 && nb_input_files == 0) {
show_usage();
av_log(NULL, AV_LOG_WARNING, "Use -h to get full help or, even better, run 'man %s'\n",
program_name);
exit_program(1);
}
/* file converter / grab */
if (nb_output_files <= 0) {
av_log(NULL, AV_LOG_FATAL, "At least one output file must be specified\n");
exit_program(1);
}
// if (nb_input_files == 0) {
// av_log(NULL, AV_LOG_FATAL, "At least one input file must be specified\n");
// exit_program(1);
// }
for (i = 0; i < nb_output_files; i++) {
if (strcmp(output_files[i]->ctx->oformat->name, "rtp"))
want_sdp = 0;
}
current_time = ti = getutime();
if (transcode() < 0)
exit_program(1);
ti = getutime() - ti;
if (do_benchmark) {
av_log(NULL, AV_LOG_INFO, "bench: utime=%0.3fs\n", ti / 1000000.0);
}
av_log(NULL, AV_LOG_DEBUG, "%"PRIu64" frames successfully decoded, %"PRIu64" decoding errors\n",
decode_error_stat[0], decode_error_stat[1]);
if ((decode_error_stat[0] + decode_error_stat[1]) * max_error_rate < decode_error_stat[1])
exit_program(69);
exit_program(received_nb_signals ? 255 : main_return_code);
nb_filtergraphs = 0;
nb_input_streams = 0;
nb_input_files = 0;
progress_avio = NULL;
input_streams = NULL;
nb_input_streams = 0;
input_files = NULL;
nb_input_files = 0;
output_streams = NULL;
nb_output_streams = 0;
output_files = NULL;
nb_output_files = 0;
return main_return_code;
}
So what could be issue? It seems FFmpeg 4.0.2 doesn't release something (resources or its static variables to initial values after the first command)
Adding next lines from FFmpeg 3.2.5 to FFmpeg 4.0.2 to the end of main function solved the problem (I downloaded FFmpeg 3.2.5 as someone's Android project so that user added those lines)
nb_filtergraphs = 0;
nb_input_streams = 0;
nb_input_files = 0;
progress_avio = NULL;
input_streams = NULL;
nb_input_streams = 0;
input_files = NULL;
nb_input_files = 0;
output_streams = NULL;
nb_output_streams = 0;
output_files = NULL;
nb_output_files = 0;
We are working on a project that consumes FFMPEG library for video frame extraction on Android platform.
On Windows, we have observed:
Using CLI, ffmpeg is capable of extracting frames at 30 fps using command ffmpeg -i input.flv -vf fps=1 out%d.png.
Using Xuggler, we are able to extract frames at 30 fps.
Using FFMPEG APIs directly in code, we are getting frames at 30 fps.
But when we use FFMPEG APIs directly on Android (See Hardware Details), we are getting following results:
720p video (1280 x 720) - 16 fps (approx. 60 ms/frame)
1080p video (1920 x 1080) - 7 fps (approx. 140 ms/frame)
We haven't tested Xuggler/CLI on Android yet.
Ideally, we should be able to get the data in constant time (approx. 30 ms/frame).
How can we get 30 fps on Android?
Code being used on Android:
if (avformat_open_input(&pFormatCtx, pcVideoFile, NULL, NULL)) {
iError = -1; //Couldn't open file
}
if (!iError) {
//Retrieve stream information
if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
iError = -2; //Couldn't find stream information
}
//Find the first video stream
if (!iError) {
for (i = 0; i < pFormatCtx->nb_streams; i++) {
if (AVMEDIA_TYPE_VIDEO
== pFormatCtx->streams[i]->codec->codec_type) {
iFramesInVideo = pFormatCtx->streams[i]->nb_index_entries;
duration = pFormatCtx->streams[i]->duration;
begin = pFormatCtx->streams[i]->start_time;
time_base = (pFormatCtx->streams[i]->time_base.num * 1.0f)
/ pFormatCtx->streams[i]->time_base.den;
pCodecCtx = avcodec_alloc_context3(NULL);
if (!pCodecCtx) {
iError = -6;
break;
}
AVCodecParameters params = { 0 };
iReturn = avcodec_parameters_from_context(¶ms,
pFormatCtx->streams[i]->codec);
if (iReturn < 0) {
iError = -7;
break;
}
iReturn = avcodec_parameters_to_context(pCodecCtx, ¶ms);
if (iReturn < 0) {
iError = -7;
break;
}
//pCodecCtx = pFormatCtx->streams[i]->codec;
iVideoStreamIndex = i;
break;
}
}
}
if (!iError) {
if (iVideoStreamIndex == -1) {
iError = -3; // Didn't find a video stream
}
}
if (!iError) {
// Find the decoder for the video stream
pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL) {
iError = -4;
}
}
if (!iError) {
// Open codec
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
iError = -5;
}
if (!iError) {
iNumBytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, pCodecCtx->width,
pCodecCtx->height, 1);
// initialize SWS context for software scaling
sws_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height,
pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height,
AV_PIX_FMT_RGB24,
SWS_BILINEAR,
NULL,
NULL,
NULL);
if (!sws_ctx) {
iError = -7;
}
}
clock_gettime(CLOCK_MONOTONIC_RAW, &end);
delta_us = (end.tv_sec - start.tv_sec) * 1000000
+ (end.tv_nsec - start.tv_nsec) / 1000;
start = end;
//LOGI("Starting_Frame_Extraction: %lld", delta_us);
if (!iError) {
while (av_read_frame(pFormatCtx, &packet) == 0) {
// Is this a packet from the video stream?
if (packet.stream_index == iVideoStreamIndex) {
pFrame = av_frame_alloc();
if (NULL == pFrame) {
iError = -8;
break;
}
// Decode video frame
avcodec_decode_video2(pCodecCtx, pFrame, &iFrameFinished,
&packet);
if (iFrameFinished) {
//OUR CODE
}
av_frame_free(&pFrame);
pFrame = NULL;
}
av_packet_unref(&packet);
}
}
You need some structures and functions from libavfilter.
The vf option means "video filter". The command line ffmpeg -i input -vf fps=30 out%d.png will output video_length_in_seconds * 30 regardless the original video fps. That means if the video is of 25 fps, you'll get some duplicate frames. While if the video is more than 30 fps, you'll lose some frames.
To achieve this, you have to init some filter context. See filtering_video.c example from ffmpeg source.
AVFilter* buffersrc = avfilter_get_by_name("buffer");
AVFilter* buffersink = avfilter_get_by_name("buffersink");
AVFilterInOut* outputs = avfilter_inout_alloc();
AVFilterInOut* inputs = avfilter_inout_alloc();
AVRational time_base = p_format_ctx->streams[video_stream]->time_base;
enum AVPixelFormat pix_fmts[] = { p_codec_ctx->pix_fmt, AV_PIX_FMT_NONE };
filter_graph = avfilter_graph_alloc();
if (!outputs || !inputs || !filter_graph) {
// failed, goto cleanup
}
char args[512];
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
p_codec_ctx->width, p_codec_ctx->height, p_codec_ctx->pix_fmt,
time_base.num, time_base.den,
p_codec_ctx->sample_aspect_ratio.num, p_codec_ctx->sample_aspect_ratio.den);
int ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in",
args, NULL, filter_graph);
if (ret < 0) {
LOG(ERROR) << "Cannot create buffer source";
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
return false;
}
ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out",
NULL, NULL, filter_graph);
if (ret < 0) {
// failed... blabla
}
ret = av_opt_set_int_list(buffersink_ctx, "pix_fmts", pix_fmts,
AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
// failed... blabla
}
outputs->name = av_strdup("in");
outputs->filter_ctx = buffersrc_ctx;
outputs->pad_idx = 0;
outputs->next = NULL;
inputs->name = av_strdup("out");
inputs->filter_ctx = buffersink_ctx;
inputs->pad_idx = 0;
inputs->next = NULL;
const char* filter_description[256] = "fps=fps=30";
if ((ret = avfilter_graph_parse_ptr(filter_graph, filters_descr.c_str(),
&inputs, &outputs, NULL)) < 0) {
// failed...
}
if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) {
// failed...
}
Ok, this is all initialization needed.
And adding some codes to decoding part:
avcodec_decode_video2(p_codec_ctx, p_frame, &got_frame, &packet);
if (*got_frame) {
p_frame->pts = av_frame_get_best_effort_timestamp(p_frame);
if (av_buffersrc_add_frame_flags(buffersrc_ctx, p_frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
// failed... blabla
}
while (1) {
int ret = av_buffersink_get_frame(buffersink_ctx, p_frame_stage);
// p_frame_stage is a AVFrame struct. Same size as p_frame. Need to allocated before.
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
if (ret < 0) {
// something wrong. filter failed.
}
// Do something with p_frame_stage here.
}
}
Please take a look at https://gitter.im/mobile-ffmpeg/Lobby?at=5c5bb384f04ef00644f1bb4e A few lines below, they mention options to accelerate the process, such as ... -preset ultrafast, -threads 10, -tune zerolatency, -x264-params sliced-threads=1
I have found open source video player for Android, which uses ffmpeg to decode video.
I have some problems with audio, that sometimes plays with jerks, but video picture is shown well. The basic idea of player is that audio and video are decoded in two different streams, and then in the third stream the are passed back, video picture is shown on SurfaceView and video sound is passed in byte array to AudioTrack and then plays. But sometimes sound is lost or playing with jerks. Can anyone give me start point for what to do (some basic concepts). May be I should change buffer size for AudioTrack or add some flags to it. Here is a piece of code, where AudioTrack class is created.
private AudioTrack prepareAudioTrack(int sampleRateInHz,
int numberOfChannels) {
for (;;) {
int channelConfig;
if (numberOfChannels == 1) {
channelConfig = AudioFormat.CHANNEL_OUT_MONO;
} else if (numberOfChannels == 2) {
channelConfig = AudioFormat.CHANNEL_OUT_STEREO;
} else if (numberOfChannels == 3) {
channelConfig = AudioFormat.CHANNEL_OUT_FRONT_CENTER
| AudioFormat.CHANNEL_OUT_FRONT_RIGHT
| AudioFormat.CHANNEL_OUT_FRONT_LEFT;
} else if (numberOfChannels == 4) {
channelConfig = AudioFormat.CHANNEL_OUT_QUAD;
} else if (numberOfChannels == 5) {
channelConfig = AudioFormat.CHANNEL_OUT_QUAD
| AudioFormat.CHANNEL_OUT_LOW_FREQUENCY;
} else if (numberOfChannels == 6) {
channelConfig = AudioFormat.CHANNEL_OUT_5POINT1;
} else if (numberOfChannels == 8) {
channelConfig = AudioFormat.CHANNEL_OUT_7POINT1;
} else {
channelConfig = AudioFormat.CHANNEL_OUT_STEREO;
}
try {
Log.d("MyLog","Creating Audio player");
int minBufferSize = AudioTrack.getMinBufferSize(sampleRateInHz,
channelConfig, AudioFormat.ENCODING_PCM_16BIT);
AudioTrack audioTrack = new AudioTrack(
AudioManager.STREAM_MUSIC, sampleRateInHz,
channelConfig, AudioFormat.ENCODING_PCM_16BIT,
minBufferSize, AudioTrack.MODE_STREAM);
return audioTrack;
} catch (IllegalArgumentException e) {
if (numberOfChannels > 2) {
numberOfChannels = 2;
} else if (numberOfChannels > 1) {
numberOfChannels = 1;
} else {
throw e;
}
}
}
}
And this is a piece of native code where sound bytes are written to AudioTrack
int player_write_audio(struct DecoderData *decoder_data, JNIEnv *env,
int64_t pts, uint8_t *data, int data_size, int original_data_size) {
struct Player *player = decoder_data->player;
int stream_no = decoder_data->stream_no;
int err = ERROR_NO_ERROR;
int ret;
AVCodecContext * c = player->input_codec_ctxs[stream_no];
AVStream *stream = player->input_streams[stream_no];
LOGI(10, "player_write_audio Writing audio frame")
jbyteArray samples_byte_array = (*env)->NewByteArray(env, data_size);
if (samples_byte_array == NULL) {
err = -ERROR_NOT_CREATED_AUDIO_SAMPLE_BYTE_ARRAY;
goto end;
}
if (pts != AV_NOPTS_VALUE) {
player->audio_clock = av_rescale_q(pts, stream->time_base, AV_TIME_BASE_Q);
LOGI(9, "player_write_audio - read from pts")
} else {
int64_t sample_time = original_data_size;
sample_time *= 1000000ll;
sample_time /= c->channels;
sample_time /= c->sample_rate;
sample_time /= av_get_bytes_per_sample(c->sample_fmt);
player->audio_clock += sample_time;
LOGI(9, "player_write_audio - added")
}
enum WaitFuncRet wait_ret = player_wait_for_frame(player,
player->audio_clock + AUDIO_TIME_ADJUST_US, stream_no);
if (wait_ret == WAIT_FUNC_RET_SKIP) {
goto end;
}
LOGI(10, "player_write_audio Writing sample data")
jbyte *jni_samples = (*env)->GetByteArrayElements(env, samples_byte_array,
NULL);
memcpy(jni_samples, data, data_size);
(*env)->ReleaseByteArrayElements(env, samples_byte_array, jni_samples, 0);
LOGI(10, "player_write_audio playing audio track");
ret = (*env)->CallIntMethod(env, player->audio_track,
player->audio_track_write_method, samples_byte_array, 0, data_size);
jthrowable exc = (*env)->ExceptionOccurred(env);
if (exc) {
err = -ERROR_PLAYING_AUDIO;
LOGE(3, "Could not write audio track: reason in exception");
// TODO maybe release exc
goto free_local_ref;
}
if (ret < 0) {
err = -ERROR_PLAYING_AUDIO;
LOGE(3,
"Could not write audio track: reason: %d look in AudioTrack.write()", ret);
goto free_local_ref;
}
free_local_ref:
LOGI(10, "player_write_audio releasing local ref");
(*env)->DeleteLocalRef(env, samples_byte_array);
end: return err;
}
I will be pleased for any help!!!! Thank you very much!!!!
I had the same problem. The problem is for start point of audio data that write to audio player. In PCM data each 2 byte of data create one sample of audio base on little_endian conversion. for correct playing the PCM data samples must be correctly create an write to audio player. If the start point of reading buffer is not the first byte of sample then the samples of audio can not create correctly and sound will be destroyed. In my situation I read samples from file. In some times the start point of reading data from file had been second byte of sample and then the all data that I read from file had been decode uncorrectly. I solve the problem by checking the start point and if the start point is odd number I increase that and change it to even number.
excuse me for bad english.
I'm trying to play an audio stream with ffmpeg and opensles in android. And the problem seems to be when passing the decoded and resampled frames from ffmpeg to opensles as the sound I can hear sounds robotic and has scratching.
The decoded frames from ffmpeg:
PCM
48000 Hz
S16p
Opensles needs in this case:
PCM
48000 Hz
S16
Opensles setup:
SLDataLocator_AndroidSimpleBufferQueue loc_bufq = {SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE, 255};
SLDataFormat_PCM format_pcm = { SL_DATAFORMAT_PCM, 2 , SL_SAMPLINGRATE_48, SL_PCMSAMPLEFORMAT_FIXED_16, SL_PCMSAMPLEFORMAT_FIXED_16,
SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT, SL_BYTEORDER_LITTLEENDIAN};
SLDataSource audioSrc = {&loc_bufq, &format_pcm};
This is the pseudocode for resampling and enqueueing to opensles:
#define OPENSLES_BUFLEN 10
#define MAX_AUDIO_FRAME_SIZE 192000
DECLARE_ALIGNED(16,uint8_t,audio_buffer)[MAX_AUDIO_FRAME_SIZE * OPENSLES_BUFLEN];
int decode_audio(AVCodecContext * ctx, SwrContext *swr_context, AVPacket *packet, AVFrame * frame){
int got_frame_ptr;
int len = avcodec_decode_audio4(ctx, frame, &got_frame_ptr, packet);
if(!got_frame_ptr)
return -ERROR;
int original_data_size = av_samples_get_buffer_size(NULL, ctx->channels,
frame->nb_samples, ctx->sample_fmt, 1);
uint8_t *audio_buf;
int data_size;
if (swr_context != NULL) {
uint8_t *out[] = { audio_buffer };
int sample_per_buffer_divider = 2* av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);;
int len2 = swr_convert(swr_context, out,
sizeof(audio_buffer) / sample_per_buffer_divider,
frame->extended_data, frame->nb_samples);
if (len2 < 0) {
return -ERROR;
}
if (len2 == sizeof(audio_buffer) / sample_per_buffer_divider) {
swr_init(swr_context);
}
audio_buf = audio_buffer;
data_size = len2 * sample_per_buffer_divider;
}
else {
audio_buf = frame->data[0];
data_size = original_data_size;
}
(*opengSLESData->bqPlayerBufferQueue)->Enqueue(opengSLESData->bqPlayerBufferQueue, audio_buf, data_size)
}
I would appreciate any help, thanks.
example may help
#include "stdafx.h"
#include <iostream>
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
//#include "swscale.h"
#include "libswresample/swresample.h"
};
FILE *fin, *fout;
int ffmpeg_audio_decode( const char * inFile, const char * outFile)
{
// Initialize FFmpeg
av_register_all();
AVFrame* frame = avcodec_alloc_frame();
if (!frame)
{
std::cout << "Error allocating the frame" << std::endl;
return 1;
}
// you can change the file name "01 Push Me to the Floor.wav" to whatever the file is you're reading, like "myFile.ogg" or
// "someFile.webm" and this should still work
AVFormatContext* formatContext = NULL;
//if (avformat_open_input(&formatContext, "01 Push Me to the Floor.wav", NULL, NULL) != 0)
if (avformat_open_input(&formatContext, inFile, NULL, NULL) != 0)
{
av_free(frame);
std::cout << "Error opening the file" << std::endl;
return 1;
}
if (avformat_find_stream_info(formatContext, NULL) < 0)
{
av_free(frame);
av_close_input_file(formatContext);
std::cout << "Error finding the stream info" << std::endl;
return 1;
}
AVStream* audioStream = NULL;
// Find the audio stream (some container files can have multiple streams in them)
for (unsigned int i = 0; i < formatContext->nb_streams; ++i)
{
if (formatContext->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
{
audioStream = formatContext->streams[i];
break;
}
}
if (audioStream == NULL)
{
av_free(frame);
av_close_input_file(formatContext);
std::cout << "Could not find any audio stream in the file" << std::endl;
return 1;
}
AVCodecContext* codecContext = audioStream->codec;
codecContext->codec = avcodec_find_decoder(codecContext->codec_id);
if (codecContext->codec == NULL)
{
av_free(frame);
av_close_input_file(formatContext);
std::cout << "Couldn't find a proper decoder" << std::endl;
return 1;
}
else if (avcodec_open2(codecContext, codecContext->codec, NULL) != 0)
{
av_free(frame);
av_close_input_file(formatContext);
std::cout << "Couldn't open the context with the decoder" << std::endl;
return 1;
}
std::cout << "This stream has " << codecContext->channels << " channels and a sample rate of " << codecContext->sample_rate << "Hz" << std::endl;
std::cout << "The data is in the format " << av_get_sample_fmt_name(codecContext->sample_fmt) << std::endl;
//codecContext->sample_fmt = AV_SAMPLE_FMT_S16;
int64_t outChannelLayout = AV_CH_LAYOUT_MONO; //AV_CH_LAYOUT_STEREO;
AVSampleFormat outSampleFormat = AV_SAMPLE_FMT_S16; // Packed audio, non-planar (this is the most common format, and probably what you want; also, WAV needs it)
int outSampleRate = 8000;//44100;
// Note that AVCodecContext::channel_layout may or may not be set by libavcodec. Because of this,
// we won't use it, and will instead try to guess the layout from the number of channels.
SwrContext* swrContext = swr_alloc_set_opts(NULL,
outChannelLayout,
outSampleFormat,
outSampleRate,
av_get_default_channel_layout(codecContext->channels),
codecContext->sample_fmt,
codecContext->sample_rate,
0,
NULL);
if (swrContext == NULL)
{
av_free(frame);
avcodec_close(codecContext);
avformat_close_input(&formatContext);
std::cout << "Couldn't create the SwrContext" << std::endl;
return 1;
}
if (swr_init(swrContext) != 0)
{
av_free(frame);
avcodec_close(codecContext);
avformat_close_input(&formatContext);
swr_free(&swrContext);
std::cout << "Couldn't initialize the SwrContext" << std::endl;
return 1;
}
fout = fopen(outFile, "wb+");
AVPacket packet;
av_init_packet(&packet);
// Read the packets in a loop
while (av_read_frame(formatContext, &packet) == 0)
{
if (packet.stream_index == audioStream->index)
{
AVPacket decodingPacket = packet;
while (decodingPacket.size > 0)
{
// Try to decode the packet into a frame
int frameFinished = 0;
int result = avcodec_decode_audio4(
codecContext,
frame,
&frameFinished,
&decodingPacket);
if (result < 0 || frameFinished == 0)
{
break;
}
unsigned char buffer[100000] = {NULL};
unsigned char* pointers[SWR_CH_MAX] = {NULL};
pointers[0] = &buffer[0];
int numSamplesOut = swr_convert(
swrContext,
pointers,
outSampleRate,
(const unsigned char**)frame->extended_data,
frame->nb_samples);
fwrite(
(short *)buffer,
sizeof(short),
(size_t)numSamplesOut,
fout);
decodingPacket.size -= result;
decodingPacket.data += result;
}
}
// You *must* call av_free_packet() after each call to av_read_frame() or else you'll leak memory
av_free_packet(&packet);
}
// Some codecs will cause frames to be buffered up in the decoding process. If the CODEC_CAP_DELAY flag
// is set, there can be buffered up frames that need to be flushed, so we'll do that
if (codecContext->codec->capabilities & CODEC_CAP_DELAY)
{
av_init_packet(&packet);
// Decode all the remaining frames in the buffer, until the end is reached
int frameFinished = 0;
while (avcodec_decode_audio4(codecContext, frame, &frameFinished, &packet) >= 0 && frameFinished)
{
}
}
// Clean up!
av_free(frame);
avcodec_close(codecContext);
av_close_input_file(formatContext);
fclose(fout);
}