We are working on a project that consumes FFMPEG library for video frame extraction on Android platform.
On Windows, we have observed:
Using CLI, ffmpeg is capable of extracting frames at 30 fps using command ffmpeg -i input.flv -vf fps=1 out%d.png.
Using Xuggler, we are able to extract frames at 30 fps.
Using FFMPEG APIs directly in code, we are getting frames at 30 fps.
But when we use FFMPEG APIs directly on Android (See Hardware Details), we are getting following results:
720p video (1280 x 720) - 16 fps (approx. 60 ms/frame)
1080p video (1920 x 1080) - 7 fps (approx. 140 ms/frame)
We haven't tested Xuggler/CLI on Android yet.
Ideally, we should be able to get the data in constant time (approx. 30 ms/frame).
How can we get 30 fps on Android?
Code being used on Android:
if (avformat_open_input(&pFormatCtx, pcVideoFile, NULL, NULL)) {
iError = -1; //Couldn't open file
}
if (!iError) {
//Retrieve stream information
if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
iError = -2; //Couldn't find stream information
}
//Find the first video stream
if (!iError) {
for (i = 0; i < pFormatCtx->nb_streams; i++) {
if (AVMEDIA_TYPE_VIDEO
== pFormatCtx->streams[i]->codec->codec_type) {
iFramesInVideo = pFormatCtx->streams[i]->nb_index_entries;
duration = pFormatCtx->streams[i]->duration;
begin = pFormatCtx->streams[i]->start_time;
time_base = (pFormatCtx->streams[i]->time_base.num * 1.0f)
/ pFormatCtx->streams[i]->time_base.den;
pCodecCtx = avcodec_alloc_context3(NULL);
if (!pCodecCtx) {
iError = -6;
break;
}
AVCodecParameters params = { 0 };
iReturn = avcodec_parameters_from_context(¶ms,
pFormatCtx->streams[i]->codec);
if (iReturn < 0) {
iError = -7;
break;
}
iReturn = avcodec_parameters_to_context(pCodecCtx, ¶ms);
if (iReturn < 0) {
iError = -7;
break;
}
//pCodecCtx = pFormatCtx->streams[i]->codec;
iVideoStreamIndex = i;
break;
}
}
}
if (!iError) {
if (iVideoStreamIndex == -1) {
iError = -3; // Didn't find a video stream
}
}
if (!iError) {
// Find the decoder for the video stream
pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL) {
iError = -4;
}
}
if (!iError) {
// Open codec
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
iError = -5;
}
if (!iError) {
iNumBytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, pCodecCtx->width,
pCodecCtx->height, 1);
// initialize SWS context for software scaling
sws_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height,
pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height,
AV_PIX_FMT_RGB24,
SWS_BILINEAR,
NULL,
NULL,
NULL);
if (!sws_ctx) {
iError = -7;
}
}
clock_gettime(CLOCK_MONOTONIC_RAW, &end);
delta_us = (end.tv_sec - start.tv_sec) * 1000000
+ (end.tv_nsec - start.tv_nsec) / 1000;
start = end;
//LOGI("Starting_Frame_Extraction: %lld", delta_us);
if (!iError) {
while (av_read_frame(pFormatCtx, &packet) == 0) {
// Is this a packet from the video stream?
if (packet.stream_index == iVideoStreamIndex) {
pFrame = av_frame_alloc();
if (NULL == pFrame) {
iError = -8;
break;
}
// Decode video frame
avcodec_decode_video2(pCodecCtx, pFrame, &iFrameFinished,
&packet);
if (iFrameFinished) {
//OUR CODE
}
av_frame_free(&pFrame);
pFrame = NULL;
}
av_packet_unref(&packet);
}
}
You need some structures and functions from libavfilter.
The vf option means "video filter". The command line ffmpeg -i input -vf fps=30 out%d.png will output video_length_in_seconds * 30 regardless the original video fps. That means if the video is of 25 fps, you'll get some duplicate frames. While if the video is more than 30 fps, you'll lose some frames.
To achieve this, you have to init some filter context. See filtering_video.c example from ffmpeg source.
AVFilter* buffersrc = avfilter_get_by_name("buffer");
AVFilter* buffersink = avfilter_get_by_name("buffersink");
AVFilterInOut* outputs = avfilter_inout_alloc();
AVFilterInOut* inputs = avfilter_inout_alloc();
AVRational time_base = p_format_ctx->streams[video_stream]->time_base;
enum AVPixelFormat pix_fmts[] = { p_codec_ctx->pix_fmt, AV_PIX_FMT_NONE };
filter_graph = avfilter_graph_alloc();
if (!outputs || !inputs || !filter_graph) {
// failed, goto cleanup
}
char args[512];
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
p_codec_ctx->width, p_codec_ctx->height, p_codec_ctx->pix_fmt,
time_base.num, time_base.den,
p_codec_ctx->sample_aspect_ratio.num, p_codec_ctx->sample_aspect_ratio.den);
int ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in",
args, NULL, filter_graph);
if (ret < 0) {
LOG(ERROR) << "Cannot create buffer source";
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
return false;
}
ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out",
NULL, NULL, filter_graph);
if (ret < 0) {
// failed... blabla
}
ret = av_opt_set_int_list(buffersink_ctx, "pix_fmts", pix_fmts,
AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
// failed... blabla
}
outputs->name = av_strdup("in");
outputs->filter_ctx = buffersrc_ctx;
outputs->pad_idx = 0;
outputs->next = NULL;
inputs->name = av_strdup("out");
inputs->filter_ctx = buffersink_ctx;
inputs->pad_idx = 0;
inputs->next = NULL;
const char* filter_description[256] = "fps=fps=30";
if ((ret = avfilter_graph_parse_ptr(filter_graph, filters_descr.c_str(),
&inputs, &outputs, NULL)) < 0) {
// failed...
}
if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) {
// failed...
}
Ok, this is all initialization needed.
And adding some codes to decoding part:
avcodec_decode_video2(p_codec_ctx, p_frame, &got_frame, &packet);
if (*got_frame) {
p_frame->pts = av_frame_get_best_effort_timestamp(p_frame);
if (av_buffersrc_add_frame_flags(buffersrc_ctx, p_frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
// failed... blabla
}
while (1) {
int ret = av_buffersink_get_frame(buffersink_ctx, p_frame_stage);
// p_frame_stage is a AVFrame struct. Same size as p_frame. Need to allocated before.
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
if (ret < 0) {
// something wrong. filter failed.
}
// Do something with p_frame_stage here.
}
}
Please take a look at https://gitter.im/mobile-ffmpeg/Lobby?at=5c5bb384f04ef00644f1bb4e A few lines below, they mention options to accelerate the process, such as ... -preset ultrafast, -threads 10, -tune zerolatency, -x264-params sliced-threads=1
Related
I am encoding raw data on Android using ffmpeg libraries. The native code reads the audio data from an external device and encodes it into AAC format in an mp4 container. I am finding that the audio data is successfully encoded (I can play it with Groove Music, my default Windows audio player). But the metadata, as reported by ffprobe, has an incorrect duration of 0.05 secs - it's actually several seconds long. Also the bitrate is reported wrongly as around 65kbps even though I specified 192kbps.
I've tried recordings of various durations but the result is always similar - the (very small) duration and bitrate. I've tried various other audio players such as Quicktime but they play only the first 0.05 secs or so of the audio.
I've removed error-checking from the following. The actual code checks every call and no problems are reported.
Initialisation:
void AudioWriter::initialise( const char *filePath )
{
AVCodecID avCodecID = AVCodecID::AV_CODEC_ID_AAC;
int bitRate = 192000;
char *containerFormat = "mp4";
int sampleRate = 48000;
int nChannels = 2;
mAvCodec = avcodec_find_encoder(avCodecID);
mAvCodecContext = avcodec_alloc_context3(mAvCodec);
mAvCodecContext->codec_id = avCodecID;
mAvCodecContext->codec_type = AVMEDIA_TYPE_AUDIO;
mAvCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;
mAvCodecContext->bit_rate = bitRate;
mAvCodecContext->sample_rate = sampleRate;
mAvCodecContext->channels = nChannels;
mAvCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;
avcodec_open2( mAvCodecContext, mAvCodec, nullptr );
mAvFormatContext = avformat_alloc_context();
avformat_alloc_output_context2(&mAvFormatContext, nullptr, containerFormat, nullptr);
mAvFormatContext->audio_codec = mAvCodec;
mAvFormatContext->audio_codec_id = avCodecID;
mAvOutputStream = avformat_new_stream(mAvFormatContext, mAvCodec);
avcodec_parameters_from_context(mAvOutputStream->codecpar, mAvCodecContext);
if (!(mAvFormatContext->oformat->flags & AVFMT_NOFILE))
{
avio_open(&mAvFormatContext->pb, filePath, AVIO_FLAG_WRITE);
}
if ( mAvFormatContext->oformat->flags & AVFMT_GLOBALHEADER )
{
mAvCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
avformat_write_header(mAvFormatContext, NULL);
mAvAudioFrame = av_frame_alloc();
mAvAudioFrame->nb_samples = mAvCodecContext->frame_size;
mAvAudioFrame->format = mAvCodecContext->sample_fmt;
mAvAudioFrame->channel_layout = mAvCodecContext->channel_layout;
av_samples_get_buffer_size(NULL, mAvCodecContext->channels, mAvCodecContext->frame_size,
mAvCodecContext->sample_fmt, 0);
av_frame_get_buffer(mAvAudioFrame, 0);
av_frame_make_writable(mAvAudioFrame);
mAvPacket = av_packet_alloc();
}
Encoding:
// SoundRecording is a custom class with the raw samples to be encoded
bool AudioWriter::encodeToContainer( SoundRecording *soundRecording )
{
int ret;
int frameCount = mAvCodecContext->frame_size;
int nChannels = mAvCodecContext->channels;
float *buf = new float[frameCount*nChannels];
while ( soundRecording->hasReadableData() )
{
//Populate the frame
int samplesRead = soundRecording->read( buf, frameCount*nChannels );
// Planar data
int nFrames = samplesRead/nChannels;
for ( int i = 0; i < nFrames; ++i )
{
for (int c = 0; c < nChannels; ++c )
{
samples[c][i] = buf[nChannels*i +c];
}
}
// Fill a gap at the end with silence
if ( samplesRead < frameCount*nChannels )
{
for ( int i = samplesRead; i < frameCount*nChannels; ++i )
{
for (int c = 0; c < nChannels; ++c )
{
samples[c][i] = 0.0;
}
}
}
encodeFrame( mAvAudioFrame ) )
}
finish();
}
bool AudioWriter::encodeFrame( AVFrame *frame )
{
//send the frame for encoding
int ret;
if ( frame != nullptr )
{
frame->pts = mAudFrameCounter++;
}
avcodec_send_frame(mAvCodecContext, frame );
while (ret >= 0)
{
ret = avcodec_receive_packet(mAvCodecContext, mAvPacket);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF )
{
break;
}
else
if (ret < 0) {
return false;
}
av_packet_rescale_ts(mAvPacket, mAvCodecContext->time_base, mAvOutputStream->time_base);
mAvPacket->stream_index = mAvOutputStream->index;
av_interleaved_write_frame(mAvFormatContext, mAvPacket);
av_packet_unref(mAvPacket);
}
return true;
}
void AudioWriter::finish()
{
// Flush by sending a null frame
encodeFrame( nullptr );
av_write_trailer(mAvFormatContext);
}
Since the resultant file contains the recorded music, the code to manipulate the audio data seems to be correct (unless I am overwriting other memory somehow).
The inaccurate duration and bitrate suggest that information concerning time is not being properly managed. I set the pts of the frames using a simple increasing integer. I'm unclear what the code that sets the timestamp and stream index achieves - and whether it's even necessary: I copied it from supposedly working code but I've seen other code without it.
Can anyone see what I'm doing wrong?
The timestamp need to be correct. Set the time_base to 1/sample_rate and increment the timestamp by 1024 each frame. Note: 1024 is aac specific. If you change codecs, you need to change the frame size.
Using FFmpeg 4.0.2 and call its ffmpeg.c's main function twice causes Android app crash (using FFmpeg shared libs and JNI)
A/libc: Fatal signal 11 (SIGSEGV), code 1, fault addr 0x0 in tid 20153
Though it works ok for FFmpeg 3.2.5
FFmpeg 4.0.2 main
int main(int argc, char **argv) {
int i, ret;
int64_t ti;
init_dynload();
register_exit(ffmpeg_cleanup);
setvbuf(stderr,NULL,_IONBF,0); /* win32 runtime needs this */
av_log_set_flags(AV_LOG_SKIP_REPEATED);
parse_loglevel(argc, argv, options);
if(argc>1 && !strcmp(argv[1], "-d")){
run_as_daemon=1;
av_log_set_callback(log_callback_null);
argc--;
argv++;
}
#if CONFIG_AVDEVICE
avdevice_register_all();
#endif
avformat_network_init();
show_banner(argc, argv, options);
/* parse options and open all input/output files */
ret = ffmpeg_parse_options(argc, argv);
if (ret < 0)
exit_program(1);
if (nb_output_files <= 0 && nb_input_files == 0) {
show_usage();
av_log(NULL, AV_LOG_WARNING, "Use -h to get full help or, even better, run 'man %s'\n", program_name);
exit_program(1);
}
/* file converter / grab */
if (nb_output_files <= 0) {
av_log(NULL, AV_LOG_FATAL, "At least one output file must be specified\n");
exit_program(1);
}
// if (nb_input_files == 0) {
// av_log(NULL, AV_LOG_FATAL, "At least one input file must be specified\n");
// exit_program(1);
// }
for (i = 0; i < nb_output_files; i++) {
if (strcmp(output_files[i]->ctx->oformat->name, "rtp"))
want_sdp = 0;
}
current_time = ti = getutime();
if (transcode() < 0)
exit_program(1);
ti = getutime() - ti;
if (do_benchmark) {
av_log(NULL, AV_LOG_INFO, "bench: utime=%0.3fs\n", ti / 1000000.0);
}
av_log(NULL, AV_LOG_DEBUG, "%"PRIu64" frames successfully decoded, %"PRIu64" decoding errors\n",
decode_error_stat[0], decode_error_stat[1]);
if ((decode_error_stat[0] + decode_error_stat[1]) * max_error_rate < decode_error_stat[1])
exit_program(69);
ffmpeg_cleanup(received_nb_signals ? 255 : main_return_code);
return main_return_code;
}
FFmpeg 3.2.5 main
int main(int argc, char **argv) {
av_log(NULL, AV_LOG_WARNING, " Command start");
int i, ret;
int64_t ti;
init_dynload();
register_exit(ffmpeg_cleanup);
setvbuf(stderr, NULL, _IONBF, 0); /* win32 runtime needs this */
av_log_set_flags(AV_LOG_SKIP_REPEATED);
parse_loglevel(argc, argv, options);
if (argc > 1 && !strcmp(argv[1], "-d")) {
run_as_daemon = 1;
av_log_set_callback(log_callback_null);
argc--;
argv++;
}
avcodec_register_all();
#if CONFIG_AVDEVICE
avdevice_register_all();
#endif
avfilter_register_all();
av_register_all();
avformat_network_init();
av_log(NULL, AV_LOG_WARNING, " Register to complete the codec");
show_banner(argc, argv, options);
/* parse options and open all input/output files */
ret = ffmpeg_parse_options(argc, argv);
if (ret < 0)
exit_program(1);
if (nb_output_files <= 0 && nb_input_files == 0) {
show_usage();
av_log(NULL, AV_LOG_WARNING, "Use -h to get full help or, even better, run 'man %s'\n",
program_name);
exit_program(1);
}
/* file converter / grab */
if (nb_output_files <= 0) {
av_log(NULL, AV_LOG_FATAL, "At least one output file must be specified\n");
exit_program(1);
}
// if (nb_input_files == 0) {
// av_log(NULL, AV_LOG_FATAL, "At least one input file must be specified\n");
// exit_program(1);
// }
for (i = 0; i < nb_output_files; i++) {
if (strcmp(output_files[i]->ctx->oformat->name, "rtp"))
want_sdp = 0;
}
current_time = ti = getutime();
if (transcode() < 0)
exit_program(1);
ti = getutime() - ti;
if (do_benchmark) {
av_log(NULL, AV_LOG_INFO, "bench: utime=%0.3fs\n", ti / 1000000.0);
}
av_log(NULL, AV_LOG_DEBUG, "%"PRIu64" frames successfully decoded, %"PRIu64" decoding errors\n",
decode_error_stat[0], decode_error_stat[1]);
if ((decode_error_stat[0] + decode_error_stat[1]) * max_error_rate < decode_error_stat[1])
exit_program(69);
exit_program(received_nb_signals ? 255 : main_return_code);
nb_filtergraphs = 0;
nb_input_streams = 0;
nb_input_files = 0;
progress_avio = NULL;
input_streams = NULL;
nb_input_streams = 0;
input_files = NULL;
nb_input_files = 0;
output_streams = NULL;
nb_output_streams = 0;
output_files = NULL;
nb_output_files = 0;
return main_return_code;
}
So what could be issue? It seems FFmpeg 4.0.2 doesn't release something (resources or its static variables to initial values after the first command)
Adding next lines from FFmpeg 3.2.5 to FFmpeg 4.0.2 to the end of main function solved the problem (I downloaded FFmpeg 3.2.5 as someone's Android project so that user added those lines)
nb_filtergraphs = 0;
nb_input_streams = 0;
nb_input_files = 0;
progress_avio = NULL;
input_streams = NULL;
nb_input_streams = 0;
input_files = NULL;
nb_input_files = 0;
output_streams = NULL;
nb_output_streams = 0;
output_files = NULL;
nb_output_files = 0;
I'm doing a transcoder using MediaCodec.
I created two mediacodec instance, one is for decoding and another is for encoding. I'm trying to send decoders outputBuffer directly into encoders inputBuffer.
It seems has no problem while compiling and executing.And it runs quickly.
But the output video file has something wrong.I checked the metadata of the output video and they are all right : bitrate, framerate, resolution ...Only the images in the video is wrong like this:screen shot
I thought it has somethings wrong,but I cannot figure it out...
I searched libraries and documents, and I found some sample codes using Texture surface to render the decoder output data and tranfer the data into the encoder. But I thought it should not be neccessary for me. Because I dont need to edit images of the video.What I only need to do is changing the bitrate and resolution to make the file's size smaller.
here is the core code in my project:
private void decodeCore() {
MediaCodec.BufferInfo bufferInfo = new MediaCodec.BufferInfo();
int frameCount = 0;
while (mDecodeRunning) {
int inputBufferId = mDecoder.dequeueInputBuffer(50);
if (inputBufferId >= 0) {
// fill inputBuffers[inputBufferId] with valid data
int sampleSize = mExtractor.readSampleData(mDecodeInputBuffers[inputBufferId], 0);
if (sampleSize >= 0) {
long time = mExtractor.getSampleTime();
mDecoder.queueInputBuffer(inputBufferId, 0, sampleSize, time, 0);
} else {
mDecoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
}
mExtractor.advance();
}
int outputBufferId = mDecoder.dequeueOutputBuffer(bufferInfo, 50);
if (outputBufferId >= 0) {
FrameData data = mFrameDataQueue.obtain();
//wait until queue has space to push data
while (data == null) {
try {
Thread.sleep(20);
} catch (InterruptedException e) {
e.printStackTrace();
}
data = mFrameDataQueue.obtain();
}
data.data.clear();
data.size = 0;
data.offset = 0;
data.flag = 0;
data.frameTimeInUs = bufferInfo.presentationTimeUs;
// outputBuffers[outputBufferId] is ready to be processed or rendered.
if (bufferInfo.size > 0) {
ByteBuffer buffer = mDecodeOutputBuffers[outputBufferId];
buffer.position(bufferInfo.offset);
buffer.limit(bufferInfo.offset + bufferInfo.size);
data.data.put(buffer);
data.data.flip();
data.size = bufferInfo.size;
data.frameIndex = frameCount++;
}
data.flag = bufferInfo.flags;
if ((bufferInfo.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) == MediaCodec.BUFFER_FLAG_END_OF_STREAM) {
Log.d("bingbing_transcode", "decode over! frames:" + (frameCount - 1));
mDecodeRunning = false;
}
mFrameDataQueue.pushToQueue(data);
mDecoder.releaseOutputBuffer(outputBufferId, false);
Log.d("bingbing_transcode", "decode output:\n frame:" + (frameCount - 1) + "\n" + "size:" + bufferInfo.size);
} else if (outputBufferId == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
mDecodeOutputBuffers = mDecoder.getOutputBuffers();
} else if (outputBufferId == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
// Subsequent data will conform to new format.
mDecodeOutputVideoFormat = mDecoder.getOutputFormat();
configureAndStartEncoder();
}
}
mDecoder.stop();
mDecoder.release();
}
private void encodeCore() {
int trackIndex = 0;
boolean muxerStarted = false;
MediaCodec.BufferInfo bufferInfo = new MediaCodec.BufferInfo();
int frameCount = 0;
while (mEncodeRunning) {
int inputBufferId = mEncoder.dequeueInputBuffer(50);
if (inputBufferId >= 0) {
FrameData data = mFrameDataQueue.pollFromQueue();
//wait until queue has space to push data
while (data == null) {
try {
Thread.sleep(20);
} catch (InterruptedException e) {
e.printStackTrace();
}
data = mFrameDataQueue.obtain();
}
if (data.size > 0) {
ByteBuffer inputBuffer = mEncodeInputBuffers[inputBufferId];
inputBuffer.clear();
inputBuffer.put(data.data);
inputBuffer.flip();
}
mEncoder.queueInputBuffer(inputBufferId, 0, data.size, data.frameTimeInUs, data.flag);
mFrameDataQueue.recycle(data);
}
int outputBufferId = mEncoder.dequeueOutputBuffer(bufferInfo, 50);
if (outputBufferId >= 0) {
// outputBuffers[outputBufferId] is ready to be processed or rendered.
ByteBuffer encodedData = mEncodeOutputBuffers[outputBufferId];
if (bufferInfo.size > 0) {
if (encodedData == null) {
throw new RuntimeException("encoderOutputBuffer " + outputBufferId + " was null");
}
if (!muxerStarted) {
throw new RuntimeException("muxer hasn't started");
}
frameCount++;
}
// adjust the ByteBuffer values to match BufferInfo (not needed?)
encodedData.position(bufferInfo.offset);
encodedData.limit(bufferInfo.offset + bufferInfo.size);
mMuxer.writeSampleData(trackIndex, encodedData, bufferInfo);
if ((bufferInfo.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) == MediaCodec.BUFFER_FLAG_END_OF_STREAM) {
Log.d("bingbing_transcode", "encode over! frames:" + (frameCount - 1));
mEncodeRunning = false;
}
mEncoder.releaseOutputBuffer(outputBufferId, false);
Log.d("bingbing_transcode", "encode output:\n frame:" + (frameCount - 1) + "\n" + "size:" + bufferInfo.size);
} else if (outputBufferId == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
mEncodeOutputBuffers = mEncoder.getOutputBuffers();
} else if (outputBufferId == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
// should happen before receiving buffers, and should only happen once
if (muxerStarted) {
throw new RuntimeException("format changed twice");
}
MediaFormat newFormat = mEncoder.getOutputFormat();
Log.d("bingbing_transcode", "encoder output format changed: " + newFormat);
// now that we have the Magic Goodies, start the muxer
trackIndex = mMuxer.addTrack(newFormat);
mMuxer.start();
muxerStarted = true;
mEncodeOutputVideoFormat = newFormat;
}
}
mEncoder.stop();
mEncoder.release();
if (muxerStarted) {
mMuxer.stop();
mMuxer.release();
}
}
these two functions run in two different threads.
FrameData is a simple storage of frame bytebuffer and frame present time and something needed
When using bytebuffer input, there are a few details that are undefined about the input data layout. When the width isn't a multiple of 16, some encoders want to have the input data row length padded to a multiple of 16, while others will assume a line length equal to the width, with no extra padding.
The Android CTS tests (which define what behaviour one can expect across all devices) for encoding from bytebuffer inputs intentionally only test resolutions that are a multiple of 16, since they know different hardware vendors do this differently, and they didn't want to enforce any particular handling.
You can't generally assume that the decoder output would use a similar row size as what the encoder consumes either. The decoder is free to (and some actually do) return a significantly larger width than the actual content size, and use the crop_left/crop_right fields for indicating what parts of it actually are intended to be visible. So in case the decoder did that, you can't pass the data straight from the decoder to the encoder unless you copy it line by line, taking into account the actual line sizes used by the decoder and encoder.
Additionally, you can't even assume that the decoder uses a similar pixel format as the encoder. Many qualcomm devices use a special tiled pixel format for the decoder output, while the encoder input is normal planar data. In these cases, you'd have to implement a pretty complex logic for unshuffling the data before you can feed it into the encoder.
Using a texture surface as intermediate hides all of these details. It might not sound completely necessary for your use case, but it does hide all the variation in buffer formats between decoder and encoder.
I am decoding a h264 video stream with the following code (original guide):
public void configure(Surface surface, int width, int height, ByteBuffer csd0) {
String VIDEO_FORMAT = "video/avc";
if (mConfigured) {
throw new IllegalStateException("Decoder is already configured");
}
MediaFormat format = MediaFormat.createVideoFormat(VIDEO_FORMAT, width, height);
// little tricky here, csd-0 is required in order to configure the codec properly
// it is basically the first sample from encoder with flag: BUFFER_FLAG_CODEC_CONFIG
format.setByteBuffer("csd-0", csd0);
try {
mCodec = MediaCodec.createDecoderByType(VIDEO_FORMAT);
} catch (IOException e) {
throw new RuntimeException("Failed to create codec", e);
}
mCodec.configure(format, surface, null, 0);
mCodec.start();
mConfigured = true;
}
#SuppressWarnings("deprecation")
public void decodeSample(byte[] data, int offset, int size, long presentationTimeUs, int flags) {
if (mConfigured && mRunning) {
int index = mCodec.dequeueInputBuffer(mTimeoutUs);
if (index >= 0) {
ByteBuffer buffer;
// since API 21 we have new API to use
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.LOLLIPOP) {
buffer = mCodec.getInputBuffers()[index];
buffer.clear();
} else {
buffer = mCodec.getInputBuffer(index);
}
if (buffer != null) {
buffer.put(data, offset, size);
mCodec.queueInputBuffer(index, 0, size, presentationTimeUs, flags);
}
}
}
}
#Override
public void run() {
try {
MediaCodec.BufferInfo info = new MediaCodec.BufferInfo();
while (mRunning) {
if (mConfigured) {
int index = mCodec.dequeueOutputBuffer(info, mTimeoutUs);
if (index >= 0) {
// setting true is telling system to render frame onto Surface
mCodec.releaseOutputBuffer(index, true);
if ((info.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) == MediaCodec.BUFFER_FLAG_END_OF_STREAM) {
break;
}
}
} else {
// just waiting to be configured, then decode and render
try {
Thread.sleep(10);
} catch (InterruptedException ignore) {
}
}
}
} finally {
if (mConfigured) {
mCodec.stop();
mCodec.release();
}
}
}
I can run this on both my Nexus 6 (api 22) and Samsung galaxy core (api 16) on low and medium quality. However when I switch to high quality (720p) it crashes on the Samsung after about 30 frames (but nothing is rendered to the screen).
E/ACodec﹕ [OMX.qcom.video.decoder.avc] ERROR(0x8000100a)
E/MediaCodec﹕ Codec reported an error. (omx error 0x8000100a, internalError -2147483648)
[...]
W/System.err﹕ java.lang.IllegalStateException
W/System.err﹕ at android.media.MediaCodec.dequeueInputBuffer(Native Method)
W/System.err﹕ at com.test.stream.VideoDecoder$Worker.decodeSample(VideoDecoder.java:95)
W/System.err﹕ at com.test.stream.VideoDecoder.decodeSample(VideoDecoder.java:24)
W/System.err﹕ at com.test.stream.VideoThread.run(VideoThread.java:160)
The error above is the first error that appears, the IllegalStateException is afterwards thrown on each frame.
My question is, is this a device specific problem (because of: older api/device, less powerful, etc.) or is something actually wrong?
and how should I deal with this?
For my Android h.264 decoder i do it slightly different to your setup. I think your using more modern api level. But for me it looks more like this:
public void startDecoder() {
// Initilize codec
mediaCodec = MediaCodec.createDecoderByType("video/avc");
mediaFormat = MediaFormat.createVideoFormat("video/avc", 0, 0);
bufferInfo = new MediaCodec.BufferInfo();
// STOPS unit-tests from crashing here from mocked out android
if (mediaCodec != null) {
mediaCodec.configure(mediaFormat, targetSurface, null, 0);
mediaCodec.start();
decoderThread = new Thread(this);
decoderThread.start();
}
}
// Decoder Thread refers to this class which does the decoder/render loop:
public void run() {
//mediaCodec input + output dequeue timeouts
long kInputBufferTimeoutMs = 50;
long kOutputBufferTimeoutMs = 50;
while (running && mediaCodec != null) {
synchronized (mediaCodec) {
// stop if not running.
if (!running || mediaCodec == null)
break;
// Only push in new data if there is data available in the queue
if (naluSegmentQueue.size() > 0) {
int inputBufferIndex = mediaCodec.dequeueInputBuffer(kInputBufferTimeoutMs);
if (inputBufferIndex >= 0) {
NaluSegment segment = naluSegmentQueue.poll();
codecInputBufferAvailable(segment, mediaCodec, inputBufferIndex);
}
}
// always check if output is available.
int outputBufferIndex = mediaCodec.dequeueOutputBuffer(bufferInfo, kOutputBufferTimeoutMs);
if (outputBufferIndex >= 0) {
// Try and render first
codecOuputBufferAvailable(mediaCodec, outputBufferIndex, bufferInfo);
} else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
// Subsequent data will conform to new format.
// Can ignore if using getOutputFormat(outputBufferId)
mediaFormat = mediaCodec.getOutputFormat();
}
}
}
}
To put data into the decoder including the parameters. I don't bother with trying to use the csd-0/1 network streams can have changing format descriptions and its easier to just let it be picked up dynamically.
private void codecInputBufferAvailable(NaluSegment segment, MediaCodec codec, int index) {
int flags = (segment.getType() == NaluType.SPS
|| segment.getType() == NaluType.PPS
|| segment.getType() == NaluType.SUPP_ENHANCEMENT) ?
MediaCodec.BUFFER_FLAG_CODEC_CONFIG : MediaCodec.BUFFER_FLAG_SYNC_FRAME;
ByteBuffer[] buffers = codec.getInputBuffers();
ByteBuffer buffer = buffers[index];
// Can throw buffer overflow exception when buffer sizes are too small.
try {
buffer.put(segment.getBuffer());
codec.queueInputBuffer(index, 0, segment.getBufferSize(), 0, flags);
} catch(Exception e) {
Log.e(TAG, "Failed to push buffer to decoder");
}
}
IMPORTANT: buffer.put(segment.getBuffer());
getBuffer() here always returns a 4 byte annexb buffer. The android decoders do not understand 3 byte nal units. So if you have a 3 byte nal unit turn it into 4 bytes magic sequence with length + 1 and 0x00, 0x00, 0x00, 0x01 as the start magic sequence the rest of the buffer should be &buffer[headerLength].
Notice the try-catch here this doesn't give a compiler warning but it can throw a buffer overflow exception here if your have a very big payload and the byte-buffer is too small.
So long as your parse out your NAL units correctly this should work for you. But for my case i noticed that the NAL units can be 3 or 4 bytes for the magic header.
/**
* H264 is comprised of NALU segments.
*
* XXXX Y ZZZZZZZZ -> XXXX Y ZZZZZZZZ -> XXXX Y ZZZZZZZZ
*
* Each segment is comprised of:
*
* XXXX -> Magic byte header (0x00, 0x00, 0x00, 0x01) NOTE: this can be either 3 of 4 bytes
* Y -> The Nalu Type
* ZZZ... -> The Payload
*
* Notice there is no nalu length specified. To parse an nalu, you must
* read until the next magic-byte-sequence AKA the next segment to figure
* out the full nalu length
**/
public static List<NaluSegment> parseNaluSegments(byte[] buffer) throws NaluBufferException {
List<NaluSegment> segmentList = new ArrayList<>();
if (buffer.length < 6) {
return segmentList;
}
int lastStartingOffset = -1;
for (int i = 0; i < buffer.length - 10; ++i) {
**if (buffer[i] == 0x00 && buffer[i+1] == 0x00 && buffer[i+2] == 0x01)** {
int naluType = (buffer[i+3] & 0x1F);
NaluSegment segment = new NaluSegment(naluType, 3, i);
**if (i > 0 && buffer[i-1] == 0x00)** {
// This is actually a 4 byte segment
int currentSegmentOffset = segment.getOffset();
segment.setHeaderSize(4);
segment.setOffset(currentSegmentOffset - 1);
}
...
Create your own nalu-segment objects and don't forget the trailing NAL.
I hope this helps.
I've read everything I can get my hands on here and everywhere else about decoding a video stream using MediaCodec. I have met with some success and find myself stuck. I get video on the surface after a few buffers of input but it is garbled with green blobs randomly.
I first pass the SPS and PPS NALUs individually. After the PPS I get a response of Output Buffers Changed from dequeuOutputBuffer().
The first handful of NALUs after that dequeueOutputBuffer requests result in -1 return value (not understood).
After a handful of this I get Output Format Changed. This is when I start to get video.
I dumped the format values and found that the only thing changing is the MIME to video/raw
I tried to use this to set the format up front but MediaCodec throws errors.
Here is my Code:
File extStorage = Environment.getExternalStorageDirectory();
File media = new File(extStorage,"video.h264");
BufferedInputStream in = new BufferedInputStream(new FileInputStream(media));
codec = MediaCodec.createDecoderByType("video/avc");
MediaFormat format = MediaFormat.createVideoFormat("video/avc", 480, 384);
format.setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, 480 * 384);
format.setString(MediaFormat.KEY_MIME, "video/avc");
codec.configure(format, videoSurface, null, 0);
codec.start();
//Get arrays of our codec buffers
ByteBuffer[] inputBuffers = codec.getInputBuffers();
ByteBuffer[] outputBuffers = codec.getOutputBuffers();
long timeoutUs = 3000;
BYTES_READ = new byte[1500];
while (in.read(BYTES_READ) != -1) {
for ( byte b : BYTES_READ) {
nalUnit.write(b);
if ( String.format("%02X", b).equals("00") && hdrIndex < 3 ) {
NAL_HEADER[hdrIndex++]=b;
} else if ( hdrIndex == 3 && String.format("%02X", b).equals("01") ) {
NAL_HEADER[hdrIndex++]=b;
} else if ( hdrIndex == 4 ) {
NAL_HEADER[hdrIndex++]=b;
if (nalUnitIndxS == -1) {
nalUnitIndxS=0;
nalUnitIndxE=nalUnit.size()-5;
} else if (nalUnitIndxS >= 0){
nalUnitIndxE=nalUnit.size()-5;
}
if (nalUnitIndxE > 0 ) {
Log.d(TAG,"Attempting to write NAL unit to codec buffer... SIZE:"+nalUnit.size()+" IndxStart: "+nalUnitIndxS+" IndxEnd: "+nalUnitIndxE);
Log.d(TAG,"NAL Unit Type: "+String.format("%02X", nalUnit.toByteArray()[4]));
/*
* Get an input buffer
*/
int inputBufferIndex=-1;
for ( int x = 0; x < 4; x++ ) {
inputBufferIndex = codec.dequeueInputBuffer(timeoutUs);
if ( inputBufferIndex >= 0 ) {
break;
} else {
Thread.sleep(250);
}
}
if (inputBufferIndex >= 0) {
// fill inputBuffers[inputBufferIndex] with valid data
long presentationTimeUs = Calendar.getInstance().getTimeInMillis();
int nalUnitLen=nalUnitIndxE-nalUnitIndxS;
inputBuffers[inputBufferIndex].put(nalUnit.toByteArray(), nalUnitIndxS, nalUnitLen);
if ( configPacket ) {
Log.d(TAG,"Writing payload as configuration to codec...");
codec.queueInputBuffer(inputBufferIndex,0,nalUnitLen,presentationTimeUs,MediaCodec.BUFFER_FLAG_CODEC_CONFIG);
} else {
codec.queueInputBuffer(inputBufferIndex,0,nalUnitLen,presentationTimeUs,0);
//deQueue the Output Buffer
MediaCodec.BufferInfo bufInfo = new MediaCodec.BufferInfo();
int outputBufferIndex = codec.dequeueOutputBuffer(bufInfo, timeoutUs);
if (outputBufferIndex >= 0) {
Log.d(TAG,"OutputBuffer is ready to be processed or rendered.");
codec.releaseOutputBuffer(outputBufferIndex,true);
} else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
Log.d(TAG,"Output Buffers Changed!");
outputBuffers = codec.getOutputBuffers();
} else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
// Subsequent data will conform to new format.
Log.d(TAG,"Output Format Changed! Updating format...");
format = codec.getOutputFormat();
} else {
Log.w(TAG,"Did not understand OutputBuffer Index Response: "+outputBufferIndex);
}
}
nalUnit.reset();
nalUnit.write(NAL_HEADER,0,5);
nalUnitIndxS=0;
nalUnitIndxE=0;
} else {
Log.w(TAG, "We did not get a buffer!");
}
}
} else {
hdrIndex=0;
}
if ( hdrIndex == 5 && ( String.format("%02X", NAL_HEADER[4]).equals("21") || String.format("%02X", NAL_HEADER[4]).equals("25") ) ) {
configPacket=false;
hdrIndex=0;
} else if ( hdrIndex == 5 ){
configPacket=true;
hdrIndex=0;
}
}
}
Log.d(TAG,"Cleaning up Codec and Socket....");
codec.stop();
codec.release();