计算PTS和DTS正确地同步音频和视频ffmpeg C ++ [英] Compute PTS and DTS correctly to sync audio and video ffmpeg C++

查看：6532 发布时间：2017/10/23 23:08:01 c++ audio video ffmpeg

本文介绍了计算PTS和DTS正确地同步音频和视频ffmpeg C ++的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我正在尝试将H264编码数据和G711 PCM数据复制到 mov 多媒体容器中。我从编码数据创建 AVPacket ，最初视频/音频帧的PTS和DTS值相当于 AV_NOPTS_VALUE 。所以我用当前的时间信息计算了DTS。我的代码 -

  bool AudioVideoRecorder :: WriteVideo（const unsigned char * pData，size_t iDataSize，bool const bIFrame）{
 ..................................... 
 ........ ............................. 
 ................. .................... 
 AVPacket pkt = {0}; 
 av_init_packet（& pkt）; 
 int64_t dts = av_gettime（）; 
 dts = av_rescale_q（dts，（AVRational）{1，1000000}，m_pVideoStream-> time_base）; 
 int duration = 90000 / VIDEO_FRAME_RATE; 
 if（m_prevVideoDts> 0LL）{
 duration = dts  -  m_prevVideoDts; 
} 
 m_prevVideoDts = dts; 
 
 pkt.pts = AV_NOPTS_VALUE; 
 pkt.dts = m_currVideoDts; 
 m_currVideoDts + = duration; 
 pkt.duration = duration; 
 if（bIFrame）{
 pkt.flags | = AV_PKT_FLAG_KEY; 
} 
 pkt.stream_index = m_pVideoStream-> index; 
 pkt.data =（uint8_t *）pData; 
 pkt.size = iDataSize; 
 
 int ret = av_interleaved_write_frame（m_pFormatCtx，& pkt）; 
 
 if（ret< 0）{
 LogErr（写入视频帧失败）; 
返回false; 
} 
 
日志（写入视频帧完成）; 
 
 av_free_packet（& pkt）; 
返回true; 
} 
 
 bool AudioVideoRecorder :: WriteAudio（const unsigned char * pEncodedData，size_t iDataSize）{
 ................. ................ 
 .............................. ... 
 ................................. 
 AVPacket pkt = {0} ; 
 av_init_packet（& pkt）; 
 
 int64_t dts = av_gettime（）; 
 dts = av_rescale_q（dts，（AVRational）{1，1000000}，（AVRational）{1，90000}）; 
 int duration = AUDIO_STREAM_DURATION; // 20 
 if（m_prevAudioDts> 0LL）{
 duration = dts  -  m_prevAudioDts; 
} 
 m_prevAudioDts = dts; 
 pkt.pts = AV_NOPTS_VALUE; 
 pkt.dts = m_currAudioDts; 
 m_currAudioDts + = duration; 
 pkt.duration = duration; 
 
 pkt.stream_index = m_pAudioStream-> index; 
 pkt.flags | = AV_PKT_FLAG_KEY; 
 pkt.data =（uint8_t *）pEncodedData; 
 pkt.size = iDataSize; 
 
 int ret = av_interleaved_write_frame（m_pFormatCtx，& pkt）; 
 if（ret< 0）{
 LogErr（写入音频帧失败：％d，ret）; 
返回false; 
} 
 
日志（完成写入音频帧）; 
 
 av_free_packet（& pkt）; 
返回true; 
}

我添加了这样的流 -

  AVStream * AudioVideoRecorder :: AddMediaStream（枚举AVCodecID codecID）{
 ................... ............. 
 ................................. 
 pStream = avformat_new_stream（m_pFormatCtx，codec）; 
如果（！pStream）{
 LogErr（无法分配流）; 
返回NULL; 
} 
 pStream-> id = m_pFormatCtx-> nb_streams  -  1; 
 pCodecCtx = pStream-> codec; 
 pCodecCtx-> codec_id = codecID; 
 
 switch（codec-> type）{
 case AVMEDIA_TYPE_VIDEO：
 pCodecCtx-> bit_rate = VIDEO_BIT_RATE; 
 pCodecCtx-> width = PICTURE_WIDTH; 
 pCodecCtx-> height = PICTURE_HEIGHT; 
 pStream-> time_base =（AVRational）{1，90000}; 
 pStream-> avg_frame_rate =（AVRational）{90000，1}; 
 pStream-> r_frame_rate =（AVRational）{90000，1}; //虽然帧速率是可变的，大约15 fps 
 pCodecCtx-> pix_fmt = STREAM_PIX_FMT; 
 m_pVideoStream = pStream; 
 break; 
 
 case AVMEDIA_TYPE_AUDIO：
 pCodecCtx-> sample_fmt = AV_SAMPLE_FMT_S16; 
 pCodecCtx-> bit_rate = AUDIO_BIT_RATE; 
 pCodecCtx-> sample_rate = AUDIO_SAMPLE_RATE; 
 pCodecCtx-> channels = 1; 
 m_pAudioStream = pStream; 
 break; 
 
默认值：
 break; 
} 
 
 / *某些格式要将流标题分开。 * / 
 if（m_pOutputFmt-> flags& AVFMT_GLOBALHEADER）
 m_pFormatCtx-> flags | = CODEC_FLAG_GLOBAL_HEADER; 
 
 return pStream; 
}

这个计算有几个问题：

视频比时间越来越慢，音质越来越低。

接收到一个音频帧（ WriteAudio（..））最近很少像3秒钟，那么晚帧应该开始播放3秒延迟，但不是。延迟帧与前一帧连续播放。

有时我录制了约40秒，但文件持续时间大约是2分钟，但音频/视频只有40秒钟的时间，剩下的文件不包含任何东西，并且在40秒后立即在www中进行搜索（在VLC中测试）。

编辑：

根据Ronald S. Bultje的建议，我明白了：

  m_pAudioStream-> time_base =（AVRational）{1，9000}; //实际上不需要设置为9000已经是音频的默认值，就像你说的那样
 m_pVideoStream-> time_base =（AVRational）{1，9000};应该设置为

现在，音频和视频流现在都处于相同的时基单位。 >

视频：

  .......... ......... 
 ................... 
 
 int64_t dts = av_gettime（）; //获取当前时间（以微秒为单位）
 dts * = 9000; 
 dts / = 1000000; // 1秒= 10 ^ 6微秒
 pkt.pts = AV_NOPTS_VALUE; //可以吗
 pkt.dts = dts; 
 //而不需要设置pkt.duration，对吧？

而对于音频：（与视频完全相同）

  ................... 
 ........... ........ 
 
 int64_t dts = av_gettime（）; //获取当前时间（以微秒为单位）
 dts * = 9000; 
 dts / = 1000000; // 1秒= 10 ^ 6微秒
 pkt.pts = AV_NOPTS_VALUE; //可以吗
 pkt.dts = dts; 
 //而不需要设置pkt.duration，对吧？

我认为他们现在就像共享一样 currDts 对吧？如果我在任何地方发生错误或任何错误，请更正我。

另外，如果我想使用视频流时基作为（AVRational）{1 ，frameRate} 和音频流时基作为（AVRational）{1，sampleRate} ，正确的代码应该如何？

EDIT 2.0：

  m_pAudioStream-> time_base =（AVRational）{1， VIDEO_FRAME_RATE}; 
 m_pVideoStream-> time_base =（AVRational）{1，VIDEO_FRAME_RATE};

和

  bool AudioVideoRecorder :: WriteAudio（const unsigned char * pEncodedData，size_t iDataSize）{
 .......................... 
 ...................... 
 AVPacket pkt = {0}; 
 av_init_packet（& pkt）; 
 
 int64_t dts = av_gettime（）/ 1000; // convert into millisecond 
 dts = dts * VIDEO_FRAME_RATE; 
 if（m_dtsOffset< 0）{
 m_dtsOffset = dts; 
} 
 
 pkt.pts = AV_NOPTS_VALUE; 
 pkt.dts =（dts  -  m_dtsOffset）; 
 
 pkt.stream_index = m_pAudioStream-> index; 
 pkt.flags | = AV_PKT_FLAG_KEY; 
 pkt.data =（uint8_t *）pEncodedData; 
 pkt.size = iDataSize; 
 
 int ret = av_interleaved_write_frame（m_pFormatCtx，& pkt）; 
 if（ret< 0）{
 LogErr（写入音频帧失败：％d，ret）; 
返回false; 
} 
 
日志（完成写入音频帧）; 
 
 av_free_packet（& pkt）; 
返回true; 
} 
 
 bool AudioVideoRecorder :: WriteVideo（const unsigned char * pData，size_t iDataSize，bool const bIFrame）{
 ............. ........................... 
 ................... .............. 
 AVPacket pkt = {0}; 
 av_init_packet（& pkt）; 
 int64_t dts = av_gettime（）/ 1000; 
 dts = dts * VIDEO_FRAME_RATE; 
 if（m_dtsOffset< 0）{
 m_dtsOffset = dts; 
} 
 pkt.pts = AV_NOPTS_VALUE; 
 pkt.dts =（dts  -  m_dtsOffset）; 
 
 if（bIFrame）{
 pkt.flags | = AV_PKT_FLAG_KEY; 
} 
 pkt.stream_index = m_pVideoStream-> index; 
 pkt.data =（uint8_t *）pData; 
 pkt.size = iDataSize; 
 
 int ret = av_interleaved_write_frame（m_pFormatCtx，& pkt）; 
 
 if（ret< 0）{
 LogErr（写入视频帧失败）; 
返回false; 
} 
 
日志（写入视频帧完成）; 
 
 av_free_packet（& pkt）; 
返回true; 
}

最后一次更改还好吗？视频和音频似乎已同步。只有问题是 - 音频播放没有延迟，无论数据包到达延迟。
喜欢 -

数据包到达：1 2 3 4 ...（然后下一帧到达3秒后）.. 5

播放的音频：1 2 3 4（无延迟）5

EDIT 3.0：

零音频样本数据：

  AVFrame * pSilentData; 
 pSilentData = av_frame_alloc（）; 
 memset（& pSilentData-> data [0]，0，iDataSize）; 
 
 pkt.data =（uint8_t *）pSilentData; 
 pkt.size = iDataSize; 
 
 av_freep（& pSilentData-> data [0]）; 
 av_frame_free（& pSilentData）;

这样可以吗？但是，在将文件写入文件容器后，播放媒体时会出现 点点击 。有什么问题？

编辑4.0：

嗯，对于μ-Law音频，零值表示为 0xff 。所以 -

  memset（& pSilentData-> data [0]，0xff，iDataSize）;

解决我的问题。

解决方案

时间戳（如 dts ）应该在AVStream.time_base单位。您正在请求1/90000的视频时基和默认音频时基（1/9000），但是您正在使用1/100000的时基来写入dts值。我也不确定是否保证在标题写入期间保留请求的时间基准，您的复用器可能会更改值，并期望您处理新值。

所以代码如下：

  int64_t dts = av_gettime（）; 
 dts = av_rescale_q（dts，（AVRational）{1，1000000}，（AVRational）{1，90000}）; 
 int duration = AUDIO_STREAM_DURATION; // 20 
 if（m_prevAudioDts> 0LL）{
 duration = dts  -  m_prevAudioDts; 
}

不行。将其改为使用audiostream的时基，而不要设置持续时间，除非你知道你在做什么。（视频相同）

  m_prevAudioDts = dts; 
 pkt.pts = AV_NOPTS_VALUE; 
 pkt.dts = m_currAudioDts; 
 m_currAudioDts + = duration; 
 pkt.duration = duration;

这看起来令人毛骨悚然，特别是与视频相似的代码。这里的问题是，两者的第一个数据包将具有零时间戳，而不管流之间的数据包间延迟。您需要在所有流之间共享一个父级currDts，否则您的流将永久失去同步。

所以，关于你的编辑，如果你有音频差距，我认为你需要在间隙的时间内插入静音（零音频样本数据）。

I am trying to mux H264 encoded data and G711 PCM data into mov multimedia container. I am creating AVPacket from encoded data and initially the PTS and DTS value of video/audio frames is equivalent to AV_NOPTS_VALUE. So I calculated the DTS using current time information. My code -

bool AudioVideoRecorder::WriteVideo(const unsigned char *pData, size_t iDataSize, bool const bIFrame) {
    .....................................
    .....................................
    .....................................
    AVPacket pkt = {0};
    av_init_packet(&pkt);
    int64_t dts = av_gettime();
    dts = av_rescale_q(dts, (AVRational){1, 1000000}, m_pVideoStream->time_base);
    int duration = 90000 / VIDEO_FRAME_RATE;
    if(m_prevVideoDts > 0LL) {
        duration = dts - m_prevVideoDts;
    }
    m_prevVideoDts = dts;

    pkt.pts = AV_NOPTS_VALUE;
    pkt.dts = m_currVideoDts;
    m_currVideoDts += duration;
    pkt.duration = duration;
    if(bIFrame) {
        pkt.flags |= AV_PKT_FLAG_KEY;
    }
    pkt.stream_index = m_pVideoStream->index;
    pkt.data = (uint8_t*) pData;
    pkt.size = iDataSize;

    int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt);

    if(ret < 0) {
        LogErr("Writing video frame failed.");
        return false;
    }

    Log("Writing video frame done.");

    av_free_packet(&pkt);
    return true;
}

bool AudioVideoRecorder::WriteAudio(const unsigned char *pEncodedData, size_t iDataSize) {
    .................................
    .................................
    .................................
    AVPacket pkt = {0};
    av_init_packet(&pkt);

    int64_t dts = av_gettime();
    dts = av_rescale_q(dts, (AVRational){1, 1000000}, (AVRational){1, 90000});
    int duration = AUDIO_STREAM_DURATION; // 20
    if(m_prevAudioDts > 0LL) {
        duration = dts - m_prevAudioDts;
    }
    m_prevAudioDts = dts;
    pkt.pts = AV_NOPTS_VALUE;
    pkt.dts = m_currAudioDts;
    m_currAudioDts += duration;
    pkt.duration = duration;

    pkt.stream_index = m_pAudioStream->index;
    pkt.flags |= AV_PKT_FLAG_KEY;
    pkt.data = (uint8_t*) pEncodedData;
    pkt.size = iDataSize;

    int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt);
    if(ret < 0) {
        LogErr("Writing audio frame failed: %d", ret);
        return false;
    }

    Log("Writing audio frame done.");

    av_free_packet(&pkt);
    return true;
}

And I added stream like this -

AVStream* AudioVideoRecorder::AddMediaStream(enum AVCodecID codecID) {
    ................................
    .................................   
    pStream = avformat_new_stream(m_pFormatCtx, codec);
    if (!pStream) {
        LogErr("Could not allocate stream.");
        return NULL;
    }
    pStream->id = m_pFormatCtx->nb_streams - 1;
    pCodecCtx = pStream->codec;
    pCodecCtx->codec_id = codecID;

    switch(codec->type) {
    case AVMEDIA_TYPE_VIDEO:
        pCodecCtx->bit_rate = VIDEO_BIT_RATE;
        pCodecCtx->width = PICTURE_WIDTH;
        pCodecCtx->height = PICTURE_HEIGHT;
        pStream->time_base = (AVRational){1, 90000};
        pStream->avg_frame_rate = (AVRational){90000, 1};
        pStream->r_frame_rate = (AVRational){90000, 1}; // though the frame rate is variable and around 15 fps
        pCodecCtx->pix_fmt = STREAM_PIX_FMT;
        m_pVideoStream = pStream;
        break;

    case AVMEDIA_TYPE_AUDIO:
        pCodecCtx->sample_fmt = AV_SAMPLE_FMT_S16;
        pCodecCtx->bit_rate = AUDIO_BIT_RATE;
        pCodecCtx->sample_rate = AUDIO_SAMPLE_RATE;
        pCodecCtx->channels = 1;
        m_pAudioStream = pStream;
        break;

    default:
        break;
    }

    /* Some formats want stream headers to be separate. */
    if (m_pOutputFmt->flags & AVFMT_GLOBALHEADER)
        m_pFormatCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;

    return pStream;
}

There are several problems with this calculation:

The video is laggy and lags behind than audio increasingly with time.
Suppose, an audio frame is received (WriteAudio(..)) little lately like 3 seconds, then the late frame should be started playing with 3 second delay, but it's not. The delayed frame is played consecutively with previous frame.
Sometimes I recorded for ~40 seconds but the file duration is much like 2 minutes, but audio/video is played only few moments like 40 seconds and rest of the file contains nothing and seekbar jumps at en immediately after 40 seconds (tested in VLC).

EDIT:

According to Ronald S. Bultje's suggestion, what I've understand:

m_pAudioStream->time_base = (AVRational){1, 9000}; // actually no need to set as 9000 is already default value for audio as you said
m_pVideoStream->time_base = (AVRational){1, 9000};

should be set as now both audio and video streams are now in same time base units.

And for video:

...................
...................

int64_t dts = av_gettime(); // get current time in microseconds
dts *= 9000; 
dts /= 1000000; // 1 second = 10^6 microseconds
pkt.pts = AV_NOPTS_VALUE; // is it okay?
pkt.dts = dts;
// and no need to set pkt.duration, right?

And for audio: (exactly same as video, right?)

...................
...................

int64_t dts = av_gettime(); // get current time in microseconds
dts *= 9000; 
dts /= 1000000; // 1 second = 10^6 microseconds
pkt.pts = AV_NOPTS_VALUE; // is it okay?
pkt.dts = dts;
// and no need to set pkt.duration, right?

And I think they are now like sharing same currDts, right? Please correct me if I am wrong anywhere or missing anything.

Also, if I want to use video stream time base as (AVRational){1, frameRate} and audio stream time base as (AVRational){1, sampleRate}, how the correct code should look like?

EDIT 2.0:

m_pAudioStream->time_base = (AVRational){1, VIDEO_FRAME_RATE};
m_pVideoStream->time_base = (AVRational){1, VIDEO_FRAME_RATE};

And

bool AudioVideoRecorder::WriteAudio(const unsigned char *pEncodedData, size_t iDataSize) {
    ...........................
    ......................
    AVPacket pkt = {0};
    av_init_packet(&pkt);

    int64_t dts = av_gettime() / 1000; // convert into millisecond
    dts = dts * VIDEO_FRAME_RATE;
    if(m_dtsOffset < 0) {
        m_dtsOffset = dts;
    }

    pkt.pts = AV_NOPTS_VALUE;
    pkt.dts = (dts - m_dtsOffset);

    pkt.stream_index = m_pAudioStream->index;
    pkt.flags |= AV_PKT_FLAG_KEY;
    pkt.data = (uint8_t*) pEncodedData;
    pkt.size = iDataSize;

    int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt);
    if(ret < 0) {
        LogErr("Writing audio frame failed: %d", ret);
        return false;
    }

    Log("Writing audio frame done.");

    av_free_packet(&pkt);
    return true;
}

bool AudioVideoRecorder::WriteVideo(const unsigned char *pData, size_t iDataSize, bool const bIFrame) {
    ........................................
    .................................
    AVPacket pkt = {0};
    av_init_packet(&pkt);
    int64_t dts = av_gettime() / 1000;
    dts = dts * VIDEO_FRAME_RATE;
    if(m_dtsOffset < 0) {
        m_dtsOffset = dts;
    }
    pkt.pts = AV_NOPTS_VALUE;
    pkt.dts = (dts - m_dtsOffset);

    if(bIFrame) {
        pkt.flags |= AV_PKT_FLAG_KEY;
    }
    pkt.stream_index = m_pVideoStream->index;
    pkt.data = (uint8_t*) pData;
    pkt.size = iDataSize;

    int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt);

    if(ret < 0) {
        LogErr("Writing video frame failed.");
        return false;
    }

    Log("Writing video frame done.");

    av_free_packet(&pkt);
    return true;
}

Is the last change okay? The video and audio seems synced. Only problem is - the audio is played without the delay regardless the packet arrived in delay. Like -

packet arrival: 1 2 3 4... (then next frame arrived after 3 sec) .. 5

audio played: 1 2 3 4 (no delay) 5

EDIT 3.0:

zeroed audio sample data:

AVFrame* pSilentData;
pSilentData = av_frame_alloc();
memset(&pSilentData->data[0], 0, iDataSize);

pkt.data = (uint8_t*) pSilentData;
pkt.size = iDataSize;

av_freep(&pSilentData->data[0]);
av_frame_free(&pSilentData);

Is this okay? But after writing this into file container, there are dot dot noise during playing the media. Whats the problem?

EDIT 4.0:

Well, For µ-Law audio the zero value is represented as 0xff. So -

memset(&pSilentData->data[0], 0xff, iDataSize);

solve my problem.

解决方案

Timestamps (such as dts) should be in AVStream.time_base units. You're requesting a video timebase of 1/90000 and a default audio timebase (1/9000), but you're using a timebase of 1/100000 to write dts values. I'm also not sure if it's guaranteed that requested timebases are maintained during header writing, your muxer might change the values and expect you to deal with the new values.

So code like this:

int64_t dts = av_gettime();
dts = av_rescale_q(dts, (AVRational){1, 1000000}, (AVRational){1, 90000});
int duration = AUDIO_STREAM_DURATION; // 20
if(m_prevAudioDts > 0LL) {
    duration = dts - m_prevAudioDts;
}

Won't work. Change that to something that uses the audiostream's timebase, and don't set the duration unless you know what you're doing. (Same for video.)

m_prevAudioDts = dts;
pkt.pts = AV_NOPTS_VALUE;
pkt.dts = m_currAudioDts;
m_currAudioDts += duration;
pkt.duration = duration;

This looks creepy, especially combined with the video alike code. The problem here is that the first packet for both will have a timestamp of zero, regardless of inter-packet delay between the streams. You need one parent currDts shared between all streams, otherwise your streams will be perpetually out of sync.

[edit]

So, regarding your edit, if you have audio gaps, I think you need to insert silence (zeroed audio sample data) for the duration of the gap.

这篇关于计算PTS和DTS正确地同步音频和视频ffmpeg C ++的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

计算PTS和DTS正确地同步音频和视频ffmpeg C ++ [英] Compute PTS and DTS correctly to sync audio and video ffmpeg C++

问题描述

相关文章

C/C++开发最新文章

热门教程

热门工具

登录关闭

计算PTS和DTS正确地同步音频和视频ffmpeg C ++ [英] Compute PTS and DTS correctly to sync audio and video ffmpeg C++

问题描述

相关文章

C/C++开发最新文章

热门教程

热门工具

登录 关闭

登录关闭