网站首页 > 知识剖析 正文
上一章节中,ffplay.c源码分析【1】、ffplay.c源码分析【2】:ffpaly解码得到frame,存放在相应的队列中,本文主要讲音频、视频输出以及音视频同步。
音频输出
ffplay的音频输出主要通过SDL实现,SDL是一套开放源代码的跨平台多媒体开发库,在ffplay中,在开启SDL音频后,当SDL需要数据输出时,通过回调函数的方式告诉应用者需要传入多少数据,那么问题就来了,ffmpeg解码AVPacket的音频得到AVFrame后,此时,解码得到的AVFrame大小跟需要传入数据的大小不一定相等,再加上如果要实现变速播放功能,每帧AVFrame做变速后的数据大小和SDL回调需要的数据大小也会不一样,所以需要增加一级缓冲区解决,即从FrameQueue队列读取到Frame数据后,先缓存到buffer,然后再从buffer读取数据到SDL回调函数。
SDL通过sdl_audio_callback函数向ffplay要音频数据,ffplay将sampq中的数据通过audio_decode_frame函数取出,放入is->audio_buf,然后送给sdl,在后续回调时先找audio_buf要数据,数据不足的情况下,再调用audio_decode_frame补充audio_buf。
static int audio_open(void *opaque, int64_t wanted_channel_layout, int wanted_nb_channels, int wanted_sample_rate, struct AudioParams *audio_hw_params)
{
SDL_AudioSpec wanted_spec, spec;
const char *env;
static const int next_nb_channels[] = {0, 0, 1, 6, 2, 6, 4, 6};
static const int next_sample_rates[] = {0, 44100, 48000, 96000, 192000};
int next_sample_rate_idx = FF_ARRAY_ELEMS(next_sample_rates) - 1;
env = SDL_getenv("SDL_AUDIO_CHANNELS");
if (env) {// 若环境变量有设置,优先从环境变量取得声道数和声道布局
wanted_nb_channels = atoi(env);
wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
}
if (!wanted_channel_layout || wanted_nb_channels != av_get_channel_layout_nb_channels(wanted_channel_layout)) {
wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
wanted_channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX;
}
//根据channel_layout获取nb_channels,当传入参数wanted_nb_channels不匹配时,此处会作修正
wanted_nb_channels = av_get_channel_layout_nb_channels(wanted_channel_layout);
wanted_spec.channels = wanted_nb_channels;
wanted_spec.freq = wanted_sample_rate;
if (wanted_spec.freq <= 0 || wanted_spec.channels <= 0) {
av_log(NULL, AV_LOG_ERROR, "Invalid sample rate or channel count!\n");
return -1;
}
while (next_sample_rate_idx && next_sample_rates[next_sample_rate_idx] >= wanted_spec.freq)
next_sample_rate_idx--;
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.silence = 0;<br> //SDL_AUDIO_MAX_CALLBACKS_PER_SEC一秒最多回调次数,避免频繁的回调
wanted_spec.samples = FFMAX(SDL_AUDIO_MIN_BUFFER_SIZE, 2 << av_log2(wanted_spec.freq / SDL_AUDIO_MAX_CALLBACKS_PER_SEC));
wanted_spec.callback = sdl_audio_callback;//设置回调函数
wanted_spec.userdata = opaque;
while (!(audio_dev = SDL_OpenAudioDevice(NULL, 0, &wanted_spec, &spec, SDL_AUDIO_ALLOW_FREQUENCY_CHANGE | SDL_AUDIO_ALLOW_CHANNELS_CHANGE))) {
av_log(NULL, AV_LOG_WARNING, "SDL_OpenAudio (%d channels, %d Hz): %s\n",
wanted_spec.channels, wanted_spec.freq, SDL_GetError());
wanted_spec.channels = next_nb_channels[FFMIN(7, wanted_spec.channels)];
if (!wanted_spec.channels) {
wanted_spec.freq = next_sample_rates[next_sample_rate_idx--];
wanted_spec.channels = wanted_nb_channels;
if (!wanted_spec.freq) {
av_log(NULL, AV_LOG_ERROR,
"No more combinations to try, audio open failed\n");
return -1;
}
}
wanted_channel_layout = av_get_default_channel_layout(wanted_spec.channels);
}
//检查打开音频设备的实际参数:采样格式
if (spec.format != AUDIO_S16SYS) {
av_log(NULL, AV_LOG_ERROR,
"SDL advised audio format %d is not supported!\n", spec.format);
return -1;
}
//声道数,spec.channels实际参数,wanted_spec.channels期望参数
if (spec.channels != wanted_spec.channels) {
wanted_channel_layout = av_get_default_channel_layout(spec.channels);
if (!wanted_channel_layout) {
av_log(NULL, AV_LOG_ERROR,
"SDL advised channel count %d is not supported!\n", spec.channels);
return -1;
}
}
audio_hw_params->fmt = AV_SAMPLE_FMT_S16;
audio_hw_params->freq = spec.freq;
audio_hw_params->channel_layout = wanted_channel_layout;
audio_hw_params->channels = spec.channels;
//audio_hw_params->frame_size计算一个采样点占用的字节数
audio_hw_params->frame_size = av_samples_get_buffer_size(NULL, audio_hw_params->channels, 1, audio_hw_params->fmt, 1);
audio_hw_params->bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params->channels, audio_hw_params->freq, audio_hw_params->fmt, 1);
if (audio_hw_params->bytes_per_sec <= 0 || audio_hw_params->frame_size <= 0) {
av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size failed\n");
return -1;
}
return spec.size;//硬件內部缓存的数据字节,samples * channels *byte_per_sample
}
看看sdl_audio_callback的实现
领音视频开发资料→「链接」
static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
{
VideoState *is = opaque;
int audio_size, len1;
audio_callback_time = av_gettime_relative();
//循环读取,直到读取足够的数据
while (len > 0) {
if (is->audio_buf_index >= is->audio_buf_size) {//数据不足,audio_decode_frame读取数据
audio_size = audio_decode_frame(is);//读取数据,重采样处理
if (audio_size < 0) {
/* if error, just output silence */
is->audio_buf = NULL;
is->audio_buf_size = SDL_AUDIO_MIN_BUFFER_SIZE / is->audio_tgt.frame_size * is->audio_tgt.frame_size;
} else {
if (is->show_mode != SHOW_MODE_VIDEO)
update_sample_display(is, (int16_t *)is->audio_buf, audio_size);
is->audio_buf_size = audio_size;
}
is->audio_buf_index = 0;
}
//根据缓冲区剩余大小量力而行
len1 = is->audio_buf_size - is->audio_buf_index;
if (len1 > len)
len1 = len;
//根据audio_volume决定如何输出audio_buf
//判断是否为静音,以及当前音量的大小,如果音量为最大则直接拷贝数据
if (!is->muted && is->audio_buf && is->audio_volume == SDL_MIX_MAXVOLUME)
memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
else {
memset(stream, 0, len1);//如果处于mute状态,则直接给stream填0
if (!is->muted && is->audio_buf)//如果不处于mute状态,即控制音量在某一个值(0,SDL_MIX_MAXVOLUME),使用SDL_MixAudioFormat进行音量调整和混音
SDL_MixAudioFormat(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, AUDIO_S16SYS, len1, is->audio_volume);
}
len -= len1;
stream += len1;
//更新is->audio_buf_index,指向audio_buf中未被拷贝到stream的数据的起始位置
is->audio_buf_index += len1;
}
is->audio_write_buf_size = is->audio_buf_size - is->audio_buf_index;
/* Let's assume the audio driver that is used by SDL has two periods. */
if (!isnan(is->audio_clock)) {
set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
sync_clock_to_slave(&is->extclk, &is->audclk);
}
}
输出audio_buf到stream,如果audio_volume为最大音量,则只需要需memcpy复制给stream即可,否则调用SDL_MixAudioFormat进行音量调整(比如改变了音量的情况下),如果audio_buf消耗完了,就会调用audio_decode_frame重新填充audio_buf,接下来分析audio_decode_frame函数。
static int audio_decode_frame(VideoState *is)
{
int data_size, resampled_data_size;
int64_t dec_channel_layout;
av_unused double audio_clock0;
int wanted_nb_samples;
Frame *af;
if (is->paused)//暂停状态,退出,不从队列取帧,不会播放音频
return -1;
do {//1.从sampq取一帧,如果发生了seek,此时serial会不连续,就需要丢帧处理
#if defined(_WIN32)
while (frame_queue_nb_remaining(&is->sampq) == 0) {
if ((av_gettime_relative() - audio_callback_time) > 1000000LL * is->audio_hw_buf_size / is->audio_tgt.bytes_per_sec / 2)
return -1;
av_usleep (1000);
}
#endif
if (!(af = frame_queue_peek_readable(&is->sampq)))//判断是否可读,从队列头部读取一帧
return -1;
frame_queue_next(&is->sampq);//更新读索引,此时Frame才真正出队列,丢帧
} while (af->serial != is->audioq.serial);//判断是否连续
//计算这一帧,占用的字节数
data_size = av_samples_get_buffer_size(NULL, af->frame->channels,
af->frame->nb_samples,
af->frame->format, 1);
dec_channel_layout =
(af->frame->channel_layout && af->frame->channels == av_get_channel_layout_nb_channels(af->frame->channel_layout)) ?
af->frame->channel_layout : av_get_default_channel_layout(af->frame->channels);
wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
//判断是否需要重新初始化重采样
if (af->frame->format != is->audio_src.fmt ||
dec_channel_layout != is->audio_src.channel_layout ||
af->frame->sample_rate != is->audio_src.freq ||
(wanted_nb_samples != af->frame->nb_samples && !is->swr_ctx)) {
swr_free(&is->swr_ctx);
//设置输出、输入目标参数
is->swr_ctx = swr_alloc_set_opts(NULL,
is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
dec_channel_layout, af->frame->format, af->frame->sample_rate,
0, NULL);
if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
av_log(NULL, AV_LOG_ERROR,
"Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
swr_free(&is->swr_ctx);
return -1;
}
is->audio_src.channel_layout = dec_channel_layout;
is->audio_src.channels = af->frame->channels;
is->audio_src.freq = af->frame->sample_rate;
is->audio_src.fmt = af->frame->format;
}
//如果初始化了重采样
if (is->swr_ctx) {
const uint8_t **in = (const uint8_t **)af->frame->extended_data;
uint8_t **out = &is->audio_buf1;
int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
int out_size = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
int len2;
if (out_size < 0) {
av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
return -1;
}
if (wanted_nb_samples != af->frame->nb_samples) {
if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
return -1;
}
}
av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
if (!is->audio_buf1)
return AVERROR(ENOMEM);
//音频重采样:返回值是重采样后得到的音频数据中单个声道的样本数
len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
if (len2 < 0) {
av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
return -1;
}
if (len2 == out_count) {
av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
if (swr_init(is->swr_ctx) < 0)
swr_free(&is->swr_ctx);
}
is->audio_buf = is->audio_buf1;
resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
} else {
//不用重采样,则将指针指向frame中音频数据
is->audio_buf = af->frame->data[0];
resampled_data_size = data_size;
}
audio_clock0 = is->audio_clock;
/* update the audio clock with the pts */
if (!isnan(af->pts))
is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
else
is->audio_clock = NAN;
is->audio_clock_serial = af->serial;
#ifdef DEBUG
{
static double last_clock;
printf("audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
is->audio_clock - last_clock,
is->audio_clock, audio_clock0);
last_clock = is->audio_clock;
}
#endif
return resampled_data_size;//返回audio_buf的数据大小
}
首先从sampq读取一帧,判断serial是否连续,如果不连续就做丢帧处理,比如发生了seek操作,serial就会不连续,就要丢帧处理;接着计算这一帧的字节数,通过
av_samples_get_buffer_size计算得到;然后就是重采样处理,当解码得到的音频帧的格式未必能被SDL支持,在这种情况下,需要进行音频重采样,即将音频帧格式转换为SDL支持的音频格式。
视频输出
ffplay视频输出使用SDL来显示,视频的显示在main函数处理。
int main(int argc, char **argv)
{
..............................
flags = SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER;
if (audio_disable)
flags &= ~SDL_INIT_AUDIO;
else {
/* Try to work around an occasional ALSA buffer underflow issue when the
* period size is NPOT due to ALSA resampling by forcing the buffer size. */
if (!SDL_getenv("SDL_AUDIO_ALSA_SET_BUFFER_SIZE"))
SDL_setenv("SDL_AUDIO_ALSA_SET_BUFFER_SIZE","1", 1);
}
if (display_disable)
flags &= ~SDL_INIT_VIDEO;<br> //1.SDL的初始化
if (SDL_Init (flags)) {
av_log(NULL, AV_LOG_FATAL, "Could not initialize SDL - %s\n", SDL_GetError());
av_log(NULL, AV_LOG_FATAL, "(Did you set the DISPLAY variable?)\n");
exit(1);
}
SDL_EventState(SDL_SYSWMEVENT, SDL_IGNORE);
SDL_EventState(SDL_USEREVENT, SDL_IGNORE);
av_init_packet(&flush_pkt);
flush_pkt.data = (uint8_t *)&flush_pkt;
if (!display_disable) {
int flags = SDL_WINDOW_HIDDEN;
if (alwaysontop)
#if SDL_VERSION_ATLEAST(2,0,5)
flags |= SDL_WINDOW_ALWAYS_ON_TOP;
#else
av_log(NULL, AV_LOG_WARNING, "Your SDL version doesn't support SDL_WINDOW_ALWAYS_ON_TOP. Feature will be inactive.\n");
#endif
if (borderless)
flags |= SDL_WINDOW_BORDERLESS;
else
flags |= SDL_WINDOW_RESIZABLE;<br> //2.创建窗口
window = SDL_CreateWindow(program_name, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, default_width, default_height, flags);
SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "linear");
if (window) {
renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
if (!renderer) {
av_log(NULL, AV_LOG_WARNING, "Failed to initialize a hardware accelerated renderer: %s\n", SDL_GetError());
renderer = SDL_CreateRenderer(window, -1, 0);
}
if (renderer) {
if (!SDL_GetRendererInfo(renderer, &renderer_info))
av_log(NULL, AV_LOG_VERBOSE, "Initialized %s renderer.\n", renderer_info.name);
}
}
if (!window || !renderer || !renderer_info.num_texture_formats) {
av_log(NULL, AV_LOG_FATAL, "Failed to create window or renderer: %s", SDL_GetError());
do_exit(NULL);
}
}
//3.初始化队列,创建数据读取线程 read_thread
is = stream_open(input_filename, file_iformat);
if (!is) {
av_log(NULL, AV_LOG_FATAL, "Failed to initialize VideoState!\n");
do_exit(NULL);
}
//4.事件响应
event_loop(is);
/* never returns */
return 0;
}
基本上窗口操作都会在主函数main中处理,SDL创建主窗口后,创建render用于渲染输出,event_loop播放控制事件轮询,也包括video的显示输出。
在代码断点,视频输出的调用栈如下:
event_loop开始处理SDL事件:
static void event_loop(VideoState *cur_stream)
{
SDL_Event event;
double incr, pos, frac;
for (;;) {
double x;
refresh_loop_wait_event(cur_stream, &event);//video显示
switch (event.type) {
按键事件
............................
}
}
}
视频的显示主要在refresh_loop_wait_event:
static void refresh_loop_wait_event(VideoState *is, SDL_Event *event) {
double remaining_time = 0.0;
SDL_PumpEvents();
while (!SDL_PeepEvents(event, 1, SDL_GETEVENT, SDL_FIRSTEVENT, SDL_LASTEVENT)) {
if (!cursor_hidden && av_gettime_relative() - cursor_last_shown > CURSOR_HIDE_DELAY) {
SDL_ShowCursor(0);
cursor_hidden = 1;
}
if (remaining_time > 0.0)
av_usleep((int64_t)(remaining_time * 1000000.0));
remaining_time = REFRESH_RATE;
if (is->show_mode != SHOW_MODE_NONE && (!is->paused || is->force_refresh))
video_refresh(is, &remaining_time);//显示画面,remaining_time:下一轮应当sleep的时间,以保持稳定的画面输出
SDL_PumpEvents();
}
}
video_refresh具体实现如下
static void video_refresh(void *opaque, double *remaining_time)
{
VideoState *is = opaque;
double time;
Frame *sp, *sp2;
if (!is->paused && get_master_sync_type(is) == AV_SYNC_EXTERNAL_CLOCK && is->realtime)
check_external_clock_speed(is);
if (!display_disable && is->show_mode != SHOW_MODE_VIDEO && is->audio_st) {
time = av_gettime_relative() / 1000000.0;
if (is->force_refresh || is->last_vis_time + rdftspeed < time) {
video_display(is);
is->last_vis_time = time;
}
*remaining_time = FFMIN(*remaining_time, is->last_vis_time + rdftspeed - time);
}
if (is->video_st) {
retry:
if (frame_queue_nb_remaining(&is->pictq) == 0) {//判断队列是否为空
// nothing to do, no picture to display in the queue
} else {
double last_duration, duration, delay;
Frame *vp, *lastvp;
/* dequeue the picture */
lastvp = frame_queue_peek_last(&is->pictq);//获取上一帧,已在显示
vp = frame_queue_peek(&is->pictq);//获取待显示帧
if (vp->serial != is->videoq.serial) {//如果序列不连续(比如seek),则将其出队列,以尽快读取最新的序列
frame_queue_next(&is->pictq);//丢帧
goto retry;
}
if (lastvp->serial != vp->serial)//新的播放序列重置当前时间
is->frame_timer = av_gettime_relative() / 1000000.0;//frame_timer自己独立的时间轴
if (is->paused)//暂停状态,一直显示上一帧
goto display;
/* compute nominal last_duration */
last_duration = vp_duration(is, lastvp, vp);//计算相邻两帧间隔时长
delay = compute_target_delay(last_duration, is);//计算上一帧lastvp还需要播放时间
time= av_gettime_relative()/1000000.0;//当前时间
if (time < is->frame_timer + delay) {//如果还没有到播放时间,说明当前显示帧还需要继续显示
*remaining_time = FFMIN(is->frame_timer + delay - time, *remaining_time);//延迟的时间,继续显示时间长
goto display;
}
//该到vp播放了
is->frame_timer += delay;
if (delay > 0 && time - is->frame_timer > AV_SYNC_THRESHOLD_MAX)
is->frame_timer = time;
SDL_LockMutex(is->pictq.mutex);
if (!isnan(vp->pts))
update_video_pts(is, vp->pts, vp->pos, vp->serial);
SDL_UnlockMutex(is->pictq.mutex);
//判断丢帧处理
if (frame_queue_nb_remaining(&is->pictq) > 1) {//队列至少有2帧
Frame *nextvp = frame_queue_peek_next(&is->pictq);//取下一帧
duration = vp_duration(is, vp, nextvp);
if(!is->step && //非逐帧播放才检测是否需要丢帧
(framedrop>0 //framedrop = 0,不开启丢帧
|| (framedrop && get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER)) && //如果同步时钟以视频为基准不需要丢帧,非视频时钟为基准,才需要丢帧
time > is->frame_timer + duration){//落后,也需要丢帧
is->frame_drops_late++;//累计丢帧
frame_queue_next(&is->pictq);
goto retry;
}
}
//字幕显示
if (is->subtitle_st) {
........
}
frame_queue_next(&is->pictq);
is->force_refresh = 1;
if (is->step && !is->paused)
stream_toggle_pause(is);
}
display:
/* display picture */
if (!display_disable && is->force_refresh && is->show_mode == SHOW_MODE_VIDEO && is->pictq.rindex_shown)
video_display(is);//显示图像
}
is->force_refresh = 0;
if (show_status) {
AVBPrint buf;
static int64_t last_time;
int64_t cur_time;
int aqsize, vqsize, sqsize;
double av_diff;
cur_time = av_gettime_relative();
if (!last_time || (cur_time - last_time) >= 30000) {
aqsize = 0;
vqsize = 0;
sqsize = 0;
if (is->audio_st)
aqsize = is->audioq.size;
if (is->video_st)
vqsize = is->videoq.size;
if (is->subtitle_st)
sqsize = is->subtitleq.size;
av_diff = 0;
if (is->audio_st && is->video_st)
av_diff = get_clock(&is->audclk) - get_clock(&is->vidclk);
else if (is->video_st)
av_diff = get_master_clock(is) - get_clock(&is->vidclk);
else if (is->audio_st)
av_diff = get_master_clock(is) - get_clock(&is->audclk);
av_bprint_init(&buf, 0, AV_BPRINT_SIZE_AUTOMATIC);
av_bprintf(&buf,
"%7.2f %s:%7.3f fd=%4d aq=%5dKB vq=%5dKB sq=%5dB f=%"PRId64"/%"PRId64" \r",
get_master_clock(is),
(is->audio_st && is->video_st) ? "A-V" : (is->video_st ? "M-V" : (is->audio_st ? "M-A" : " ")),
av_diff,
is->frame_drops_early + is->frame_drops_late,
aqsize / 1024,
vqsize / 1024,
sqsize,
is->video_st ? is->viddec.avctx->pts_correction_num_faulty_dts : 0,
is->video_st ? is->viddec.avctx->pts_correction_num_faulty_pts : 0);
if (show_status == 1 && AV_LOG_INFO > av_log_get_level())
fprintf(stderr, "%s", buf.str);
else
av_log(NULL, AV_LOG_INFO, "%s", buf.str);
fflush(stderr);
av_bprint_finalize(&buf, NULL);
last_time = cur_time;
}
}
}
其处理流程:
static void video_display(VideoState *is)
{
if (!is->width)
video_open(is);
SDL_SetRenderDrawColor(renderer, 0, 0, 0, 255);
SDL_RenderClear(renderer);//清理render
if (is->audio_st && is->show_mode != SHOW_MODE_VIDEO)
video_audio_display(is);//音频显示(显示的方式根据SHOW_MODE_VIDEO的值)
else if (is->video_st)
video_image_display(is);//图像显示
SDL_RenderPresent(renderer);
}
video_image_display的逻辑是先frame_queue_peek_last取出要显示帧,然后upload_texture更新到SDL_Texture,最后通过SDL_RenaerCopyEx拷贝纹理给render显示。
static void video_image_display(VideoState *is)
{
Frame *vp;
Frame *sp = NULL;
SDL_Rect rect;
vp = frame_queue_peek_last(&is->pictq);//取要显示的视频帧
if (is->subtitle_st) {//字幕显示
if (frame_queue_nb_remaining(&is->subpq) > 0) {
sp = frame_queue_peek(&is->subpq);
if (vp->pts >= sp->pts + ((float) sp->sub.start_display_time / 1000)) {
if (!sp->uploaded) {
uint8_t* pixels[4];
int pitch[4];
int i;
if (!sp->width || !sp->height) {
sp->width = vp->width;
sp->height = vp->height;
}
if (realloc_texture(&is->sub_texture, SDL_PIXELFORMAT_ARGB8888, sp->width, sp->height, SDL_BLENDMODE_BLEND, 1) < 0)
return;
for (i = 0; i < sp->sub.num_rects; i++) {
AVSubtitleRect *sub_rect = sp->sub.rects[i];
sub_rect->x = av_clip(sub_rect->x, 0, sp->width );
sub_rect->y = av_clip(sub_rect->y, 0, sp->height);
sub_rect->w = av_clip(sub_rect->w, 0, sp->width - sub_rect->x);
sub_rect->h = av_clip(sub_rect->h, 0, sp->height - sub_rect->y);
is->sub_convert_ctx = sws_getCachedContext(is->sub_convert_ctx,
sub_rect->w, sub_rect->h, AV_PIX_FMT_PAL8,
sub_rect->w, sub_rect->h, AV_PIX_FMT_BGRA,
0, NULL, NULL, NULL);
if (!is->sub_convert_ctx) {
av_log(NULL, AV_LOG_FATAL, "Cannot initialize the conversion context\n");
return;
}
if (!SDL_LockTexture(is->sub_texture, (SDL_Rect *)sub_rect, (void **)pixels, pitch)) {
sws_scale(is->sub_convert_ctx, (const uint8_t * const *)sub_rect->data, sub_rect->linesize,
0, sub_rect->h, pixels, pitch);
SDL_UnlockTexture(is->sub_texture);
}
}
sp->uploaded = 1;
}
} else
sp = NULL;
}
}
//每次显示,都会计算窗口显示的大小
calculate_display_rect(&rect, is->xleft, is->ytop, is->width, is->height, vp->width, vp->height, vp->sar);
if (!vp->uploaded) {//判断帧vp是否加载显示过
if (upload_texture(&is->vid_texture, vp->frame, &is->img_convert_ctx) < 0)
return;
vp->uploaded = 1;
vp->flip_v = vp->frame->linesize[0] < 0;
}
set_sdl_yuv_conversion_mode(vp->frame);
SDL_RenderCopyEx(renderer, is->vid_texture, NULL, &rect, 0, NULL, vp->flip_v ? SDL_FLIP_VERTICAL : 0);//拷贝纹理到render
set_sdl_yuv_conversion_mode(NULL);
if (sp) {<br> //字幕显示逻辑
#if USE_ONEPASS_SUBTITLE_RENDER
SDL_RenderCopy(renderer, is->sub_texture, NULL, &rect);
#else
int i;
double xratio = (double)rect.w / (double)sp->width;
double yratio = (double)rect.h / (double)sp->height;
for (i = 0; i < sp->sub.num_rects; i++) {
SDL_Rect *sub_rect = (SDL_Rect*)sp->sub.rects[i];
SDL_Rect target = {.x = rect.x + sub_rect->x * xratio,
.y = rect.y + sub_rect->y * yratio,
.w = sub_rect->w * xratio,
.h = sub_rect->h * yratio};
SDL_RenderCopy(renderer, is->sub_texture, sub_rect, &target);
}
#endif
}
}
upload_texure是将AVFormat的图像数据传给SDL的纹理的过程。
static int upload_texture(SDL_Texture **tex, AVFrame *frame, struct SwsContext **img_convert_ctx) {
int ret = 0;
Uint32 sdl_pix_fmt;
SDL_BlendMode sdl_blendmode;<br> //根据frame的图像格式,获取对应的SDL像素格式
get_sdl_pix_fmt_and_blendmode(frame->format, &sdl_pix_fmt, &sdl_blendmode);<br> //realloc_texture 根据新得到的SDL像素格式,为&is->vid_texture重新分配空间
if (realloc_texture(tex, sdl_pix_fmt == SDL_PIXELFORMAT_UNKNOWN ? SDL_PIXELFORMAT_ARGB8888 : sdl_pix_fmt, frame->width, frame->height, sdl_blendmode, 0) < 0)
return -1;
switch (sdl_pix_fmt) {
case SDL_PIXELFORMAT_UNKNOWN://frame的格式是sdl不支持的格式,则需要进行图像格式转换,转换为目标格式AV_PIX_FMT_BGRA,对应SDL_PIXELFORMAT_BGRA32
/* This should only happen if we are not using avfilter... */
*img_convert_ctx = sws_getCachedContext(*img_convert_ctx,
frame->width, frame->height, frame->format, frame->width, frame->height,
AV_PIX_FMT_BGRA, sws_flags, NULL, NULL, NULL);
if (*img_convert_ctx != NULL) {
uint8_t *pixels[4];
int pitch[4];
if (!SDL_LockTexture(*tex, NULL, (void **)pixels, pitch)) {<br> //图像格式转换,做视频像素格式和分辨率的转换,比如图像缩放,其实使用libyuv或shader效率更高
sws_scale(*img_convert_ctx, (const uint8_t * const *)frame->data, frame->linesize,
0, frame->height, pixels, pitch);
SDL_UnlockTexture(*tex);
}
} else {
av_log(NULL, AV_LOG_FATAL, "Cannot initialize the conversion context\n");
ret = -1;
}
break;
case SDL_PIXELFORMAT_IYUV://framr格式对应SDL_PIXELFORMAT_IYUV,不用进行图像格式转换,调用SDL_UpdateYUVTexture更新texture
if (frame->linesize[0] > 0 && frame->linesize[1] > 0 && frame->linesize[2] > 0) {
ret = SDL_UpdateYUVTexture(*tex, NULL, frame->data[0], frame->linesize[0],
frame->data[1], frame->linesize[1],
frame->data[2], frame->linesize[2]);
} else if (frame->linesize[0] < 0 && frame->linesize[1] < 0 && frame->linesize[2] < 0) {
ret = SDL_UpdateYUVTexture(*tex, NULL, frame->data[0] + frame->linesize[0] * (frame->height - 1), -frame->linesize[0],
frame->data[1] + frame->linesize[1] * (AV_CEIL_RSHIFT(frame->height, 1) - 1), -frame->linesize[1],
frame->data[2] + frame->linesize[2] * (AV_CEIL_RSHIFT(frame->height, 1) - 1), -frame->linesize[2]);
} else {
av_log(NULL, AV_LOG_ERROR, "Mixed negative and positive linesizes are not supported.\n");
return -1;
}
break;<br><br>//framr格式对应其他的SDL格式,不用进行图像格式转换,调用SDL_UpdateYUVTexture更新texturedefault:<br>
if (frame->linesize[0] < 0) {
ret = SDL_UpdateTexture(*tex, NULL, frame->data[0] + frame->linesize[0] * (frame->height - 1), -frame->linesize[0]);
} else {
ret = SDL_UpdateTexture(*tex, NULL, frame->data[0], frame->linesize[0]);
}
break;
}
return ret;
}
音视频同步
首先要明白为啥要做音视频同步?经过上面音频和视频的输出可知道,它们的输出不在同一个线程,那么不一定会同时解出同一个pts的音频帧和视频帧,有时编码或封装的时候可能pts还是不连续的,如果不做处理就直接播放,就会出现声音和画面不同步,因此在进行音频和视频的播放时,需要对音频和视频的播放速度、播放时刻进行控制,以实现音频和视频保持同步。
在ffplay中有3种同步策略:音频为准、视频为准、外部时钟为准,由于人耳朵对声音变化的敏感度比视觉高,一般的同步策略都是以音频为准,即将视频同步到音频,对画面进行适当的丢帧或重复显示,以追赶或等待音频,在ffplay中默认也是以音频时钟为准进行同步。
在做同步的时候,需要一个“时钟“概念,音频、视频、外部时钟都有自己独立时钟,各自set各自的时钟,以谁为基准(master),其他的则只能get该时钟进行同步,时钟的结构体定义如下:
typedef struct Clock {
double pts;//时钟基础, 当前帧(待播放)显示时间戳,播放后,当前帧变成上一帧
double pts_drift;//当前pts与当前系统时钟的差值, audio、video对于该值是独立的
double last_updated;//最后一次更新的系统时钟
double speed;//时钟速度控制,用于控制播放速度
int serial;//播放序列,所谓播放序列就是一段连续的播放动作,一个seek操作会启动一段新的播放序列
int paused;//= 1 说明是暂停状态
int *queue_serial; //指向packet_serial
} Clock;
时钟的工作原理,需要不断“对时”,通过set_clock_at(Clock *c, double pts, int serial, double time),用pts、serial、time(系统时间)进行对时。获取的时间是一个估算值,是通过“对时”时记录的pts_drifft估算的。
以音频为准
ffplay默认采用的是音频时钟为准进行同步,此时音频的时钟设置在sdl_audio_callback:
static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
{<br>
audio_callback_time = av_gettime_relative();
..........................................
/* Let's assume the audio driver that is used by SDL has two periods. */
if (!isnan(is->audio_clock)) {
set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
sync_clock_to_slave(&is->extclk, &is->audclk);
}
}
is->audio_clock是在是在audio_decode_frame赋值:
static int audio_decode_frame(VideoState *is)
{
........................................................
/* update the audio clock with the pts */
if (!isnan(af->pts))
is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
else
is->audio_clock = NAN;
is->audio_clock_serial = af->serial;
................................................
}
从这里可以看出来,这里的时间戳是audio_buf结束位置的时间戳,而不是audio_buf起始位置的时间戳, 所以当audio_buf有剩余时(剩余的度记录在audio_write_buf_size),那实际数据的pts就变成is- >audio_clock - (double)(is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec。
实质上audio_hw_buf_size*2这些数据实际都没有播放出去,所以就有:
is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec
set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
接下来要将视频以音频时钟为准进行同步,如果视频播放过快,则重复播放上一帧,来等待音频;如果视频播放过慢,则丢帧,追赶音频。这部分的逻辑在vido_refresh中:
static void video_refresh(void *opaque, double *remaining_time)
{
...............................................................
/* compute nominal last_duration */
last_duration = vp_duration(is, lastvp, vp);//计算相邻两帧间隔时长
delay = compute_target_delay(last_duration, is);//计算上一帧lastvp还需要播放时间
time= av_gettime_relative()/1000000.0;//当前时间
if (time < is->frame_timer + delay) {//如果还没有到播放时间,说明当前显示帧还需要继续显示
*remaining_time = FFMIN(is->frame_timer + delay - time, *remaining_time);//延迟的时间,继续显示时间长
goto display;
}
//该到vp播放了
is->frame_timer += delay;
if (delay > 0 && time - is->frame_timer > AV_SYNC_THRESHOLD_MAX)
is->frame_timer = time;
SDL_LockMutex(is->pictq.mutex);
if (!isnan(vp->pts))
update_video_pts(is, vp->pts, vp->pos, vp->serial);
SDL_UnlockMutex(is->pictq.mutex);
if (frame_queue_nb_remaining(&is->pictq) > 1) {//队列至少有2帧
Frame *nextvp = frame_queue_peek_next(&is->pictq);//取下一帧
duration = vp_duration(is, vp, nextvp);
if(!is->step && //非逐帧播放才检测是否需要丢帧
(framedrop>0 //framedrop = 0,不开启丢帧
|| (framedrop && get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER)) && //如果同步时钟以视频为基准不需要丢帧,非视频时钟为基准,才需要丢帧
time > is->frame_timer + duration){//落后,也需要丢帧
is->frame_drops_late++;//累计丢帧
frame_queue_next(&is->pictq);
goto retry;
}
}
.............................................................
}
这里引入了另一个概念:frame_timer,可以理解为帧显示时刻,如更新前,是上 一帧lastvp的显示时刻;对于更新后( is->frame_timer += delay ),则为当前帧vp显示时刻,上一帧显示时刻加上delay(还应显示多久(含帧本身时))即为上一帧应结束显示的时刻。
time1:系统时刻小于lastvp结束显示的时刻(frame_timer+dealy),即虚线圆圈位置,此时应该继续显示lastvp;
time2:系统时刻大于lastvp的结束显示时刻,但小于vp的结束显示时刻(vp的显示时间开始于虚线圆圈,结束于黑色圆圈),此时既不重复显示lastvp,也不丢弃vp,即应显示vp;
time3:系统时刻大于vp结束显示时刻(黑色圆圈位置,也是nextvp预计的开始显示时刻),此时应该丢弃vp。
lastvp的显示时delay是如何计算的,这在函数compute_target_delay中实现,compute_target_delay返回值越大,画面越慢,也就是上一帧显示的时间越长。
static double compute_target_delay(double delay, VideoState *is)
{
double sync_threshold, diff = 0;
/* update delay to follow master synchronisation source */
if (get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER) {
/* if video is slave, we try to correct big delays by
duplicating or deleting a frame */
diff = get_clock(&is->vidclk) - get_master_clock(is);
/* skip or repeat frame. We take into account the
delay to compute the threshold. I still don't know
if it is the best guess */
sync_threshold = FFMAX(AV_SYNC_THRESHOLD_MIN, FFMIN(AV_SYNC_THRESHOLD_MAX, delay));
if (!isnan(diff) && fabs(diff) < is->max_frame_duration) {
if (diff <= -sync_threshold)
delay = FFMAX(0, delay + diff);
else if (diff >= sync_threshold && delay > AV_SYNC_FRAMEDUP_THRESHOLD)
delay = delay + diff;
else if (diff >= sync_threshold)
delay = 2 * delay;
}
}
av_log(NULL, AV_LOG_TRACE, "video: delay=%0.3f A-V=%f\n",delay, -diff);
return delay;
}
(1) diff<= -sync_threshold:视频播放较慢,需要适当丢帧;
(2) diff>= sync_threshold && delay >
AV_SYNC_FRAMEDUP_THRESHOLD:返回 delay + diff,由具体dif决定还要显示多久;
(3) diff>= sync_threshold:视频播放较快,需要适当重复显示lastvp,返回2*delay,也就是2倍lastvp显示时长,也就是让lastvp再显示一帧的时长;
(4) -sync_threshold< diff < sync_threshold:允许误差内,按frame duration去显示视频,返回delay。
由上可知,并不是每时每刻都在同步,而是有一个“准同步”的差值区域。
以视频为准
在以视频为准进行音频同步视频时,如果对音频使用一样的丢帧和重复显示方案,是不靠谱的,因为音频的连续性远高于视频,通过重复几百个样本或者丢弃几百个样本来达到同步,会在听觉有很明显的不连贯。ffplay采用的原理:通过做重采样补偿,音频变慢了,重采样后的样本就比正常的少,以赶紧播放下一帧;音频快了,重采样后的样本就比正常的增加,从而播放慢一些。
对于视频流程,video_refresh()-> update_video_pts() 按照着视频帧间隔去播放,并实时地重新矫正video时钟即可,主要在于audio的播放,其主要看audio_decode_frame:
static int audio_decode_frame(VideoState *is)
{
.......................................
//1.根据与video clock的差值,计算应该输出的样本数
wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
//2.判断是否需要重新初始化重采样
if (af->frame->format != is->audio_src.fmt ||
dec_channel_layout != is->audio_src.channel_layout ||
af->frame->sample_rate != is->audio_src.freq ||
(wanted_nb_samples != af->frame->nb_samples && !is->swr_ctx)) {
swr_free(&is->swr_ctx);
//设置输出、输入目标参数
is->swr_ctx = swr_alloc_set_opts(NULL,
is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
dec_channel_layout, af->frame->format, af->frame->sample_rate,
0, NULL);
if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
av_log(NULL, AV_LOG_ERROR,
"Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
swr_free(&is->swr_ctx);
return -1;
}
//保存最新的参数
is->audio_src.channel_layout = dec_channel_layout;
is->audio_src.channels = af->frame->channels;
is->audio_src.freq = af->frame->sample_rate;
is->audio_src.fmt = af->frame->format;
}
//3.重采样,利用重采样进行样本的添加或剔除
if (is->swr_ctx) {
const uint8_t **in = (const uint8_t **)af->frame->extended_data;
uint8_t **out = &is->audio_buf1;
int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
int out_size = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
int len2;
if (out_size < 0) {
av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
return -1;
}
if (wanted_nb_samples != af->frame->nb_samples) {
if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
return -1;
}
}
av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
if (!is->audio_buf1)
return AVERROR(ENOMEM);
//音频重采样:返回值是重采样后得到的音频数据中单个声道的样本数
len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
if (len2 < 0) {
av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
return -1;
}
if (len2 == out_count) {
av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
if (swr_init(is->swr_ctx) < 0)
swr_free(&is->swr_ctx);
}
is->audio_buf = is->audio_buf1;
resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
} else {
//不用重采样,则将指针指向frame中音频数据
is->audio_buf = af->frame->data[0];
resampled_data_size = data_size;
}
...............................................
return resampled_data_size;//返回audio_buf的数据大小
}
(1) 根据与video clock的差值,计算应该输出的样本数。由函数 synchronize_audio 完成,音频变慢了则样本数减少,音频变快了则样本数增加;
(2) 判断是否需要重采样:如果要输出的样本数与frame的样本数不相等,也就是需要适当减少或增加样本;
(3) 利用重采样库进行平滑地样本添加或剔除,在获知要调整的目标样本数 wanted_nb_samples 后,通过 swr_set_compensation 和 swr_convert 的函数组合完成重采样。
猜你喜欢
- 2025-05-11 Qt音视频开发22-音频播放QAudioOutput
- 2025-05-11 教你从头开始写课堂签到的微信小程序(二)实现签到及视频播放
- 2025-05-11 如何把python绘制的动态图形保存为gif文件或视频
- 2025-05-11 1,vue播放视频之—引入.m3u8后缀的hsl视频流
- 2025-05-11 鸡汤、争议与反转:格莱美之夜的一场好戏
- 2025-05-11 WebRTC 协议推流
- 最近发表
- 标签列表
-
- xml (46)
- css animation (57)
- array_slice (60)
- htmlspecialchars (54)
- position: absolute (54)
- datediff函数 (47)
- array_pop (49)
- jsmap (52)
- toggleclass (43)
- console.time (63)
- .sql (41)
- ahref (40)
- js json.parse (59)
- html复选框 (60)
- css 透明 (44)
- css 颜色 (47)
- php replace (41)
- css nth-child (48)
- min-height (40)
- xml schema (44)
- css 最后一个元素 (46)
- location.origin (44)
- table border (49)
- html tr (40)
- video controls (49)