使用FFmpeg库,C ++转码为vorbis [英] Transcoding to vorbis using FFmpeg libraries, C++

查看:220
本文介绍了使用FFmpeg库,C ++转码为vorbis的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述



到目前为止,基于FFmpeg示例,事情有些工作,输出文件播放正常,但右声道中的声音丢失。我试着看看不同的可能性,但到目前为止找不到任何答案。



为了参考,这是我使用的代码:

  #includestdafx.h
#define MAX_AUDIO_PACKET_SIZE(128 * 1024)

#include< iostream>
#include< fstream>

#include< string>
#include< vector>
#include< map>

#include< deque>
#include< queue>

#include< math.h>
#include< stdlib.h>
#include< stdio.h>
#include< conio.h>

externC
{
#includelibavcodec / avcodec.h
#includelibavformat / avformat.h
#include libavdevice / avdevice.h
#includelibswscale / swscale.h
#includelibavutil / dict.h
#includelibavutil / error.h
#包括libavutil / opt.h
#include #include #include< libavutil / samplefmt.h>
#include< libswresample / swresample.h>
}
AVCodecID outputAudioFormat = AV_CODEC_ID_VORBIS;


static int sws_flags = SWS_BICUBIC;
#define STREAM_DURATION 50.0
#define STREAM_FRAME_RATE 25 / * 25 images / s * /
#define STREAM_NB_FRAMES((int)(STREAM_DURATION * STREAM_FRAME_RATE))
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P / * default pix_fmt * /


AVFormatContext * fmt_ctx = NULL;
int audio_stream_index = -1;
AVCodecContext * codec_ctx_audio = NULL;
AVCodec * codec_audio = NULL;
AVFrame * decoded_frame = NULL;
uint8_t ** audio_dst_data = NULL;
int got_frame = 0;
int audiobufsize = 0;
AVPacket input_packet;
int audio_dst_linesize = 0;
int audio_dst_bufsize = 0;
SwrContext * swrContext = NULL;

AVOutputFormat * output_format = NULL;
AVFormatContext * output_fmt_ctx = NULL;
AVStream * audio_st = NULL;
AVStream * video_st = NULL;
AVCodec * audio_codec = NULL;
AVCodec * video_codec = NULL;
double audio_pts = 0.0;
AVFrame * out_frame = avcodec_alloc_frame();

int audio_input_frame_size = 64;

uint8_t * audio_data_buf = NULL;
uint8_t * audio_out = NULL;
int audio_bit_rate;
int audio_sample_rate;
int audio_channels;
int sourceSampleRate = 0;
int destSampleRate = 0;

int dst_nb_samples = 0;
int pivotIndex = 0;
int max_dst_nb_samples = 0;
int samples_count = 0;


int decode_packet();
int open_audio_input(char * src_filename);
int decode_frame();

int open_encoder(char * output_filename);
AVStream * add_audio_stream(AVFormatContext * oc,AVCodec ** codec,
enum AVCodecID codec_id);
int open_audio(AVFormatContext * oc,AVCodec * codec,AVStream * st);
void close_audio(AVFormatContext * oc,AVStream * st);
void write_audio_frame(uint8_t ** audio_src_data,int audio_src_bufsize);

静态AVFrame *框架;
static AVPicture src_picture,dst_picture;
static int frame_count;
/ *添加输出流。 * /
static AVStream * add_stream(AVFormatContext * oc,AVCodec ** codec,
enum AVCodecID codec_id)
{
AVCodecContext * c;
AVStream * st;

/ *找到编码器* /
* codec = avcodec_find_encoder(codec_id);
如果(!(*编解码器)){
fprintf(stderr,找不到'%s'\\\
的编码器,
avcodec_get_name(codec_id));
exit(1);
}

st = avformat_new_stream(oc,* codec);
if(!st){
fprintf(stderr,Could not allocate stream\\\
);
exit(1);
}
st-> id = oc-> nb_streams-1;
c = st-> codec;

switch((* codec) - > type){
case AVMEDIA_TYPE_AUDIO:
c-> sample_fmt =(* codec) - > sample_fmts?
(* codec) - > sample_fmts [0]:AV_SAMPLE_FMT_FLTP;
c-> bit_rate = 64000;
c-> sample_rate = 44100;
c-> channels = 2;
break;

案例AVMEDIA_TYPE_VIDEO:
c-> codec_id = codec_id;

c-> bit_rate = 400000;
/ *分辨率必须是两倍的倍数。 * /
c-> width = 352;
c-> height = 288;
/ *时基:这是代表帧时间戳的基本单位时间(以秒为单位),以
*为单位。对于固定fps内容,
*时基应为1 /帧速率,时间戳增量应为
*与1相同。* /
c-> time_base.den = STREAM_FRAME_RATE;
c-> time_base.num = 1;
c-> gop_size = 12; / *最多每十二个帧发出一个帧内* /
c-> pix_fmt = STREAM_PIX_FMT;
if(c-> codec_id == AV_CODEC_ID_MPEG2VIDEO){
/ *只是为了测试,我们还添加了B帧* /
c-> max_b_frames = 2;
}
if(c-> codec_id == AV_CODEC_ID_MPEG1VIDEO){
/ *需要避免使用某些coeffs溢出的宏块。
*这不会发生在正常的视频,它只是发生在这里作为
*色度平面的运动不符合亮度平面。 * /
c-> mb_decision = 2;
}
break;

默认值:
break;
}

/ *某些格式要将流标题分开。 * /
if(oc-> oformat-> flags& AVFMT_GLOBALHEADER)
c-> flags | = CODEC_FLAG_GLOBAL_HEADER;

return st;
}


static void open_video(AVFormatContext * oc,AVCodec * codec,AVStream * st)
{
int ret;
AVCodecContext * c = st-> codec;

/ *打开编解码器* /
ret = avcodec_open2(c,codec,NULL);
if(ret< 0){
// fprintf(stderr,无法打开视频编解码器:%s\\\
,av_err2str(ret));
exit(1);
}

/ *分配和初始化一个可重用的框架* /
frame = av_frame_alloc();
if(!frame){
fprintf(stderr,Could not assigned video frame\\\
);
exit(1);
}
frame-> format = c-> pix_fmt;
frame-> width = c-> width;
frame-> height = c-> height;

/ *分配编码的原始图片。 * /
ret = avpicture_alloc(& dst_picture,c-> pix_fmt,c-> width,c-> height);
if(ret< 0){
// fprintf(stderr,无法分配图片:%s\\\
,av_err2str(ret));
exit(1);
}

/ *如果输出格式不是YUV420P,那么也需要一个临时的YUV420P
*图片。然后将其转换为所需的
*输出格式。 * /
if(c-> pix_fmt!= AV_PIX_FMT_YUV420P){
ret = avpicture_alloc(& src_picture,AV_PIX_FMT_YUV420P,c-> width,c-> height);
if(ret< 0){
// fprintf(stderr,无法分配临时图片:%s\\\

// av_err2str(ret));
exit(1);
}
}

/ *复制数据并将图像的指针排序到帧* /
*((AVPicture *)frame)= dst_picture;
}

int open_audio_input(char * src_filename)
{
int i = 0;
/ *打开输入文件,并分配格式上下文* /
if(avformat_open_input(& fmt_ctx,src_filename,NULL,NULL)< 0)
{
fprintf(stderr ,无法打开源文件%s\\\
,src_filename);
exit(1);
}

//检索流信息
if(avformat_find_stream_info(fmt_ctx,NULL)<0)
return -1; //找不到流信息

//将文件转储到标准错误
av_dump_format(fmt_ctx,0,src_filename,0);

//找到第一个视频流
(i = 0; i< fmt_ctx-> nb_streams; i ++)
{
if(fmt_ctx-> stream [i] - > codec-> codec_type == AVMEDIA_TYPE_AUDIO)
{
audio_stream_index = i;
break;
}
}
if(audio_stream_index!= -1)
{
//获取指向音频流的编解码器上下文的指针
codec_ctx_audio = fmt_ctx->流[audio_stream_index] - GT;编解码器;

//找到视频流的解码器
codec_audio = avcodec_find_decoder(codec_ctx_audio-> codec_id);
if(codec_audio == NULL){
fprintf(stderr,Unsupported audio codec!\\\
);
return -1; // Codec not found
}

//打开编解码器
AVDictionary * codecDictOptions = NULL;
if(avcodec_open2(codec_ctx_audio,codec_audio,& codecDictOptions)< 0)
return -1; //无法打开编解码器

//设置SWR上下文一旦你有编解码器信息
swrContext = swr_alloc();
av_opt_set_int(swrContext,in_channel_layout,codec_ctx_audio-> channel_layout,0);
av_opt_set_int(swrContext,out_channel_layout,codec_ctx_audio-> channel_layout,0);
av_opt_set_int(swrContext,in_sample_rate,codec_ctx_audio-> sample_rate,0);
av_opt_set_int(swrContext,out_sample_rate,codec_ctx_audio-> sample_rate,0);
av_opt_set_sample_fmt(swrContext,in_sample_fmt,codec_ctx_audio-> sample_fmt,0);
if(outputAudioFormat == AV_CODEC_ID_VORBIS)
{
av_opt_set_sample_fmt(swrContext,out_sample_fmt,AV_SAMPLE_FMT_FLTP,0);
}
else
{
av_opt_set_sample_fmt(swrContext,out_sample_fmt,AV_SAMPLE_FMT_S16,0);
}
int rv = swr_init(swrContext);

sourceSampleRate = destSampleRate = codec_ctx_audio-> sample_rate;

//分配音频帧
if(decoded_frame == NULL)decoded_frame = avcodec_alloc_frame();
int nb_planes = 0;
AVStream * audio_stream = fmt_ctx-> streams [audio_stream_index];
nb_planes = av_sample_fmt_is_planar(codec_ctx_audio-> sample_fmt)? codec_ctx_audio-> channels:1;
int tempSize = sizeof(uint8_t *)* nb_planes;
audio_dst_data =(uint8_t **)av_mallocz(tempSize);
如果(!audio_dst_data)
{
fprintf(stderr,无法分配音频数据缓冲区\);
}
else
{
for(int i = 0; i< nb_planes; i ++)
{
audio_dst_data [i] = NULL ;
}
}
}
}

int decode_frame()
{
int rv = 0;
got_frame = 0;
if(fmt_ctx == NULL)
{
return rv;
}
int ret = 0;
audiobufsize = 0;
rv = av_read_frame(fmt_ctx,& input_packet);
if(rv< 0)
{
return rv;
}
rv = decode_packet();
//释放由av_read_frame
av_free_packet(& input_packet)分配的input_packet;
return rv;
}

int decode_packet()
{
int rv = 0;
int ret = 0;

//音频流?
if(input_packet.stream_index == audio_stream_index)
{
avcodec_get_frame_defaults(decoded_frame);
while(input_packet.size> 0)
{
int result = avcodec_decode_audio4(codec_ctx_audio,decoded_frame,& got_frame,& input_packet);
if(result< 0)
{
fprintf(stderr,Error decoding audio frame\\\
);
// return ret;
}
else
{
if(got_frame)
{
dst_nb_samples =(int)av_rescale_rnd(swr_get_delay(swrContext,sourceSampleRate)+ decoded_frame-> nb_samples,sourceSampleRate,destSampleRate,AV_ROUND_UP);
if(dst_nb_samples> max_dst_nb_samples)
{
max_dst_nb_samples = dst_nb_samples;
if(audio_dst_data [0])
{
av_freep(& audio_dst_data [0]);
audio_dst_data [0] = NULL;
}
}
if(audio_dst_data [0] == NULL)
{
if(outputAudioFormat == AV_CODEC_ID_VORBIS)
{
ret = av_samples_alloc(audio_dst_data,& audio_dst_linesize,codec_ctx_audio-> channels,
decoded_frame-> nb_samples,(AVSampleFormat)AV_SAMPLE_FMT_FLTP,0);
}
else
{
ret = av_samples_alloc(audio_dst_data,& audio_dst_linesize,codec_ctx_audio-> channels,
decoded_frame-> nb_samples,(AVSampleFormat)AV_SAMPLE_FMT_S16, 0);
}
}
/ * TODO:扩展av_samples_ *函数的返回码,以便不需要此调用* /
int resampled = swr_convert(swrContext,audio_dst_data,out_frame- > nb_samples,
(const uint8_t **)(decoded_frame-> extended_data),decoded_frame-> nb_samples);
char str [900] =;
sprintf(str,out_frame-> nb_samples:\t%d; decoded_frame-> nb_samples:\t%d,out_frame-> nb_samples,decoded_frame-> nb_samples);
if(outputAudioFormat == AV_CODEC_ID_VORBIS)
{
audio_dst_bufsize = av_samples_get_buffer_size(& audio_dst_linesize,decoding_frame-> channels,resampled,(AVSampleFormat)AV_SAMPLE_FMT_FLTP,1);
}
else
{
audio_dst_bufsize = av_samples_get_buffer_size(& audio_dst_linesize,decoding_frame-> channels,resampled,(AVSampleFormat)AV_SAMPLE_FMT_S16,1);
}

input_packet.size - = result;
input_packet.data + = result;
}
else
{
input_packet.size = 0;
input_packet.data = NULL;
}
}
}
}
return rv;
}

int open_encoder(char * output_filename)
{
int rv = 0;

/ *分配输出媒体上下文* /
AVOutputFormat * opfmt = NULL;

avformat_alloc_output_context2(& output_fmt_ctx,opfmt,NULL,output_filename);
如果(!output_fmt_ctx){
printf(无法从文件扩展名推导出输出格式:使用MPEG.\\\
);
avformat_alloc_output_context2(& output_fmt_ctx,NULL,mpeg,output_filename);
}
if(!output_fmt_ctx){
rv = -1;
}
else
{
output_format = output_fmt_ctx-> oformat;
}

/ *使用默认格式编解码器
*添加音频流,并初始化编解码器。 * /
audio_st = NULL;

if(output_fmt_ctx)
{
if(output_format-> audio_codec!= AV_CODEC_ID_NONE)
{
audio_st = add_audio_stream(output_fmt_ctx,& audio_codec ,output_format-> audio_codec);
}

/ *现在所有参数都设置好了,我们可以打开音频和
*视频编解码器并分配必要的编码缓冲区。 * /

if(audio_st)
{
rv = open_audio(output_fmt_ctx,audio_codec,audio_st);
if(rv< 0)return rv;
}

av_dump_format(output_fmt_ctx,0,output_filename,1);
/ *如果需要,打开输出文件* /
if(!(output_format-> flags& AVFMT_NOFILE))
{
if(avio_open(& output_fmt_ctx- > pb,output_filename,AVIO_FLAG_WRITE)< 0){
fprintf(stderr,无法打开'%s'\\\
,output_filename);
rv = -1;
}
else
{
/ *编写流标题,如果有的话。 * /
if(avformat_write_header(output_fmt_ctx,NULL)< 0)
{
fprintf(stderr,打开输出文件时出错);
rv = -1;
}
}
}
}

return rv;
}

AVStream * add_audio_stream(AVFormatContext * oc,AVCodec ** codec,
enum AVCodecID codec_id)
{
AVCodecContext * c;
AVStream * st;

/ *找到音频编码器* /
* codec = avcodec_find_encoder(codec_id);
如果(!(*编解码器)){
fprintf(stderr,找不到codec\\\
);
exit(1);
}

st = avformat_new_stream(oc,* codec);
if(!st){
fprintf(stderr,Could not allocate stream\\\
);
exit(1);
}
st-> id = 1;

c = st-> codec;

/ * put sample parameters * /
if(outputAudioFormat == AV_CODEC_ID_VORBIS)
{
c-> sample_fmt = AV_SAMPLE_FMT_FLTP;
}
else
{
c-> sample_fmt = AV_SAMPLE_FMT_S16;
}

c-> bit_rate = audio_bit_rate;
c-> sample_rate = audio_sample_rate;
c-> channels = audio_channels;

//某些格式要流分头
if(oc-> oformat-> flags& AVFMT_GLOBALHEADER)
c-> flags | = CODEC_FLAG_GLOBAL_HEADER;

return st;
}

int open_audio(AVFormatContext * oc,AVCodec * codec,AVStream * st)
{
int ret = 0;
AVCodecContext * c;

st-> duration = fmt_ctx-> duration;
c = st-> codec;

/ *打开它* /
ret = avcodec_open2(c,codec,NULL);
if(ret< 0)
{
fprintf(stderr,can not open codec\\\
);
return -1;
// exit(1);
}

if(c-> codec->功能& CODEC_CAP_VARIABLE_FRAME_SIZE)
audio_input_frame_size = 10000;
else
audio_input_frame_size = c-> frame_size;
out_frame-> nb_samples = audio_input_frame_size;
int tempSize = audio_input_frame_size *
av_get_bytes_per_sample(c-> sample_fmt)*
c-> channels;
return ret;
}

void close_audio(AVFormatContext * oc,AVStream * st)
{
avcodec_close(st-> codec);
}

void write_audio_frame(uint8_t ** audio_dst_data,int audio_dst_bufsize)
{
AVFormatContext * oc = output_fmt_ctx;
AVStream * st = audio_st;
if(oc == NULL || st == NULL)return;
AVCodecContext * c;
AVPacket pkt = {0}; //数据和大小必须为0;
int got_packet = 0,ret = 0;

av_init_packet(& pkt);
c = st-> codec;

out_frame-> nb_samples = audio_input_frame_size;

AVRational r;
r.num = 1;
r.den = c-> sample_rate;
out_frame-> pts = av_rescale_q(samples_count,(AVRational)r,c-> time_base);
avcodec_fill_audio_frame(out_frame,c-> channels,c-> sample_fmt,
audio_dst_data [0],audio_dst_bufsize,0);
samples_count + = out_frame-> nb_samples;

ret = avcodec_encode_audio2(c,& pkt,out_frame,& got_packet);
if(ret< 0)
{
return;
}

if(!got_packet)
return;

/ *从编解码器到流时基重新缩放输出数据包时间戳值* /
pkt.pts = av_rescale_q_rnd(pkt.pts,c-> time_base,st-> time_base,(AVRounding )(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt.dts = av_rescale_q_rnd(pkt.dts,c-> time_base,st-> time_base,(AVROUNDing)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt.duration = av_rescale_q(pkt.duration,c-> time_base,st-> time_base);
pkt.stream_index = st-> index;

char str [999] =;
sprintf(str,out_frame-> nb_samples:\t%d,out_frame-> nb_samples);
/ *将压缩帧写入媒体文件。 * /
ret = av_interleaved_write_frame(oc,& pkt);
if(ret!= 0)
{
exit(1);
}
av_free_packet(& pkt);
}

void write_delayed_frames(AVFormatContext * oc,AVStream * st)
{
AVCodecContext * c = st-> codec;
int got_output = 0;
int ret = 0;
AVPacket pkt;
pkt.data = NULL;
pkt.size = 0;
av_init_packet(& pkt);
int i = 0;
for(got_output = 1; got_output; i ++)
{
ret = avcodec_encode_audio2(c,& pkt,NULL,& got_output);
if(ret< 0)
{
fprintf(stderr,error encoding frame\\\
);
exit(1);
}
static int64_t tempPts = 0;
static int64_t tempDts = 0;
/ *如果大小为零,则表示图像被缓冲。 * /
if(got_output)
{
pkt.pts = av_rescale_q_rnd(pkt.pts,c-> time_base,st-> time_base,(AVROUNDing)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX)) ;
pkt.dts = av_rescale_q_rnd(pkt.dts,c-> time_base,st-> time_base,(AVROUNDing)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt.duration = av_rescale_q(pkt.duration,c-> time_base,st-> time_base);
pkt.stream_index = st-> index;
if(c&& c-> coded_frame& c-> coded_frame-> key_frame)
pkt.flags | = AV_PKT_FLAG_KEY;
/ *将压缩帧写入媒体文件。 * /
ret = av_interleaved_write_frame(oc,& pkt);
}
else
{
ret = 0;
}
av_free_packet(& pkt);
}
}

int main(int argc,char ** argv)
{
/ *注册所有格式和编解码器* /
av_register_all();
avcodec_register_all();
avformat_network_init();
avdevice_register_all();
int i = 0;
int ret = 0;
char src_filename [90] =test.mp2;
char dst_filename [90] =output.webm;
outputAudioFormat = AV_CODEC_ID_VORBIS;
open_audio_input(src_filename);
if(codec_ctx_audio-> bit_rate == 0)codec_ctx_audio-> bit_rate = 112000;
audio_bit_rate = codec_ctx_audio-> bit_rate;
audio_sample_rate = codec_ctx_audio-> sample_rate;
audio_channels = codec_ctx_audio-> channels;
open_encoder(dst_filename);
int frames = 0;
while(1)
{
int rv = decode_frame();
if(rv< 0)
{
break;
}

if(audio_st)
{
audio_pts = audio_st-> pts.val * av_q2d(audio_st-> time_base);
}
else
{
audio_pts = 0.0;
}
if(codec_ctx_audio)
{
if(got_frame)
{
write_audio_frame(audio_dst_data,audio_dst_bufsize);
frames ++;
}
}
printf(\\\
audio_pts:%f,audio_pts);
}
while(1)
{
dst_nb_samples =(int)av_rescale_rnd(swr_get_delay(swrContext,sourceSampleRate)+ decoded_frame-> nb_samples,sourceSampleRate,destSampleRate,AV_ROUND_UP);
if(dst_nb_samples> max_dst_nb_samples)
{
max_dst_nb_samples = dst_nb_samples;
if(audio_dst_data [0])
{
av_freep(& audio_dst_data [0]);
audio_dst_data [0] = NULL;
}
}
if(audio_dst_data [0] == NULL)
{
if(outputAudioFormat == AV_CODEC_ID_VORBIS)
{
ret = av_samples_alloc(audio_dst_data,NULL,codec_ctx_audio-> channels,
decoded_frame-> nb_samples,(AVSampleFormat)AV_SAMPLE_FMT_FLTP,0);
}

{
ret = av_samples_alloc(audio_dst_data,NULL,codec_ctx_audio-> channels,
decoded_frame-> nb_samples,(AVSampleFormat)AV_SAMPLE_FMT_S16,0) ;
}
}
int resampled = swr_convert(swrContext,audio_dst_data,out_frame-> nb_samples,NULL,0);
if(outputAudioFormat == AV_CODEC_ID_VORBIS)
{
audio_dst_bufsize = av_samples_get_buffer_size(& audio_dst_linesize,decoding_frame-> channels,resampled,(AVSampleFormat)AV_SAMPLE_FMT_FLTP,1);
}
else
{
audio_dst_bufsize = av_samples_get_buffer_size(& audio_dst_linesize,decoding_frame-> channels,resampled,(AVSampleFormat)AV_SAMPLE_FMT_S16,1);
}
if(audio_dst_bufsize< = 0)break;
audio_pts = audio_st-> pts.val * av_q2d(audio_st-> time_base);
printf(\\\
audio_pts:%f,audio_pts);
write_audio_frame(audio_dst_data,audio_dst_bufsize);
}
write_delayed_frames(output_fmt_ctx,audio_st);
av_write_trailer(output_fmt_ctx);
close_audio(output_fmt_ctx,audio_st);
swr_free(& swrContext);
avcodec_free_frame(& out_frame);
getch();
return 0;
}

在Windows 7下工作,Zeranoe FFmpeg 32位版本:

  libavutil 52. 62.100 / 52. 62.100 
libavcodec 55. 47.101 / 55. 47.101
libavformat 55. 22.103 / 55。 22.103
libavdevice 55. 5.102 / 55. 5.102
libavfilter 4. 1.100 / 4. 1.100
libswscale 2. 5.101 / 2. 5.101
libswresample 0. 17.104 / 0. 17.104
libpostproc 52. 3.100 / 52. 3.100

任何人都可以指向我可能的地方谢谢任何指导!

解决方案

我想我终于找到了解决方案。重新采样FFmpeg附带的样本(至少有一个)可能会产生误导 - 可能需要更正。即使根据swr_convert的文档,audio_dst_data也可以是避免缓冲的大缓冲区:

  *如果提供的输入比输出空间多那么输入将被缓冲。 
*您可以通过提供比输入更多的输出空间来避免此缓冲。
*转换将直接运行,无需复制即可。

此语句可能不正确(理论上和工作中没有明显的错误,但有时会导致尴尬的行为正如我已经发现的那样)。



我的解决方案:不要让audio_dst_data缓冲区大小超过输出编解码器的帧大小 - 那么它的工作原理完美。



也许有人会修改swresample库或重新抽样示例,或至少更清楚地记录它。


I have made a test application to transcode to vorbis format (webm container).

So far, based on FFmpeg examples, things are somewhat working, and output file plays properly, but sound in right channel is missing. I tried looking at different possibilities, but so far could not find any answer.

For reference, this is the code I am using:

#include "stdafx.h"
#define MAX_AUDIO_PACKET_SIZE (128 * 1024)

#include <iostream>
#include <fstream>

#include <string>
#include <vector>
#include <map>

#include <deque>
#include <queue>

#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <conio.h>

extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavdevice/avdevice.h"
#include "libswscale/swscale.h"
#include "libavutil/dict.h"
#include "libavutil/error.h"
#include "libavutil/opt.h"
#include <libavutil/fifo.h>
#include <libavutil/imgutils.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
}
AVCodecID           outputAudioFormat = AV_CODEC_ID_VORBIS;


static int sws_flags = SWS_BICUBIC;
#define STREAM_DURATION   50.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_NB_FRAMES  ((int)(STREAM_DURATION * STREAM_FRAME_RATE))
#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P /* default pix_fmt */


AVFormatContext*    fmt_ctx= NULL;
int                    audio_stream_index = -1;
AVCodecContext *    codec_ctx_audio = NULL;
AVCodec*            codec_audio = NULL;
AVFrame*            decoded_frame = NULL;
uint8_t**            audio_dst_data = NULL;
int                    got_frame = 0;
int                    audiobufsize = 0;
AVPacket            input_packet;
int                    audio_dst_linesize = 0;
int                    audio_dst_bufsize = 0;
SwrContext *        swrContext = NULL;

AVOutputFormat *    output_format = NULL ;
AVFormatContext *    output_fmt_ctx= NULL;
AVStream *            audio_st = NULL;
AVStream*           video_st = NULL;
AVCodec *            audio_codec = NULL;
AVCodec*            video_codec = NULL;
double                audio_pts = 0.0;
AVFrame *            out_frame = avcodec_alloc_frame();

int                    audio_input_frame_size = 64;

uint8_t *            audio_data_buf = NULL;
uint8_t *            audio_out = NULL;
int                    audio_bit_rate;
int                    audio_sample_rate;
int                    audio_channels;
int                 sourceSampleRate=0;
int                 destSampleRate = 0;

int                 dst_nb_samples = 0;
int                 pivotIndex = 0;
int                 max_dst_nb_samples = 0;
int                 samples_count=0;


int decode_packet();
int open_audio_input(char* src_filename);
int decode_frame();

int open_encoder(char* output_filename);
AVStream *add_audio_stream(AVFormatContext *oc, AVCodec **codec,
    enum AVCodecID codec_id);
int open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st);
void close_audio(AVFormatContext *oc, AVStream *st);
void write_audio_frame(uint8_t ** audio_src_data, int audio_src_bufsize);

static AVFrame *frame;
static AVPicture src_picture, dst_picture;
static int frame_count;
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec,
                            enum AVCodecID codec_id)
{
    AVCodecContext *c;
    AVStream *st;

    /* find the encoder */
    *codec = avcodec_find_encoder(codec_id);
    if (!(*codec)) {
        fprintf(stderr, "Could not find encoder for '%s'\n",
                avcodec_get_name(codec_id));
        exit(1);
    }

    st = avformat_new_stream(oc, *codec);
    if (!st) {
        fprintf(stderr, "Could not allocate stream\n");
        exit(1);
    }
    st->id = oc->nb_streams-1;
    c = st->codec;

    switch ((*codec)->type) {
    case AVMEDIA_TYPE_AUDIO:
        c->sample_fmt  = (*codec)->sample_fmts ?
            (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
        c->bit_rate    = 64000;
        c->sample_rate = 44100;
        c->channels    = 2;
        break;

    case AVMEDIA_TYPE_VIDEO:
        c->codec_id = codec_id;

        c->bit_rate = 400000;
        /* Resolution must be a multiple of two. */
        c->width    = 352;
        c->height   = 288;
        /* timebase: This is the fundamental unit of time (in seconds) in terms
         * of which frame timestamps are represented. For fixed-fps content,
         * timebase should be 1/framerate and timestamp increments should be
         * identical to 1. */
        c->time_base.den = STREAM_FRAME_RATE;
        c->time_base.num = 1;
        c->gop_size      = 12; /* emit one intra frame every twelve frames at most */
        c->pix_fmt       = STREAM_PIX_FMT;
        if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
            /* just for testing, we also add B frames */
            c->max_b_frames = 2;
        }
        if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
            /* Needed to avoid using macroblocks in which some coeffs overflow.
             * This does not happen with normal video, it just happens here as
             * the motion of the chroma plane does not match the luma plane. */
            c->mb_decision = 2;
        }
    break;

    default:
        break;
    }

    /* Some formats want stream headers to be separate. */
    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        c->flags |= CODEC_FLAG_GLOBAL_HEADER;

    return st;
}


static void open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
    int ret;
    AVCodecContext *c = st->codec;

    /* open the codec */
    ret = avcodec_open2(c, codec, NULL);
    if (ret < 0) {
        //fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
        exit(1);
    }

    /* allocate and init a re-usable frame */
    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\n");
        exit(1);
    }
    frame->format = c->pix_fmt;
    frame->width = c->width;
    frame->height = c->height;

    /* Allocate the encoded raw picture. */
    ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
    if (ret < 0) {
        //fprintf(stderr, "Could not allocate picture: %s\n", av_err2str(ret));
        exit(1);
    }

    /* If the output format is not YUV420P, then a temporary YUV420P
     * picture is needed too. It is then converted to the required
     * output format. */
    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
        ret = avpicture_alloc(&src_picture, AV_PIX_FMT_YUV420P, c->width, c->height);
        if (ret < 0) {
            //fprintf(stderr, "Could not allocate temporary picture: %s\n",
            //        av_err2str(ret));
            exit(1);
        }
    }

    /* copy data and linesize picture pointers to frame */
    *((AVPicture *)frame) = dst_picture;
}

int open_audio_input(char* src_filename)
{
    int i =0;
    /* open input file, and allocate format context */
    if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0)
    {
        fprintf(stderr, "Could not open source file %s\n", src_filename);
        exit(1);
    }

    // Retrieve stream information
    if(avformat_find_stream_info(fmt_ctx, NULL)<0)
        return -1; // Couldn't find stream information

    // Dump information about file onto standard error
    av_dump_format(fmt_ctx, 0, src_filename, 0);

    // Find the first video stream
    for(i=0; i<fmt_ctx->nb_streams; i++)
    {
        if(fmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO)
        {
            audio_stream_index=i;
            break;
        }
    }
    if ( audio_stream_index != -1 )
    {
        // Get a pointer to the codec context for the audio stream
        codec_ctx_audio=fmt_ctx->streams[audio_stream_index]->codec;

        // Find the decoder for the video stream
        codec_audio=avcodec_find_decoder(codec_ctx_audio->codec_id);
        if(codec_audio==NULL) {
            fprintf(stderr, "Unsupported audio codec!\n");
            return -1; // Codec not found
        }

        // Open codec
        AVDictionary *codecDictOptions = NULL;
        if(avcodec_open2(codec_ctx_audio, codec_audio, &codecDictOptions)<0)
            return -1; // Could not open codec

        // Set up SWR context once you've got codec information
        swrContext = swr_alloc();
        av_opt_set_int(swrContext, "in_channel_layout",  codec_ctx_audio->channel_layout, 0);
        av_opt_set_int(swrContext, "out_channel_layout", codec_ctx_audio->channel_layout,  0);
        av_opt_set_int(swrContext, "in_sample_rate",     codec_ctx_audio->sample_rate, 0);
        av_opt_set_int(swrContext, "out_sample_rate",    codec_ctx_audio->sample_rate, 0);
        av_opt_set_sample_fmt(swrContext, "in_sample_fmt",  codec_ctx_audio->sample_fmt, 0);
        if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
        {
            av_opt_set_sample_fmt(swrContext, "out_sample_fmt", AV_SAMPLE_FMT_FLTP,  0);
        }
        else
        {
            av_opt_set_sample_fmt(swrContext, "out_sample_fmt", AV_SAMPLE_FMT_S16,  0);
        }
        int rv = swr_init(swrContext);

        sourceSampleRate    =   destSampleRate = codec_ctx_audio->sample_rate;

        // Allocate audio frame
        if ( decoded_frame == NULL ) decoded_frame = avcodec_alloc_frame();
        int nb_planes = 0;
        AVStream* audio_stream = fmt_ctx->streams[audio_stream_index];
        nb_planes = av_sample_fmt_is_planar(codec_ctx_audio->sample_fmt) ? codec_ctx_audio->channels : 1;
        int tempSize =  sizeof(uint8_t *) * nb_planes;
        audio_dst_data = (uint8_t**)av_mallocz(tempSize);
        if (!audio_dst_data)
        {
            fprintf(stderr, "Could not allocate audio data buffers\n");
        }
        else
        {
            for ( int i = 0 ; i < nb_planes ; i ++ )
            {
                audio_dst_data[i] = NULL;
            }
        }
    }
}

int decode_frame()
{
    int rv = 0;
    got_frame = 0;
    if ( fmt_ctx == NULL  )
    {
        return rv;
    }
    int ret = 0;
    audiobufsize = 0;
    rv = av_read_frame(fmt_ctx, &input_packet);
    if ( rv < 0 )
    {
        return rv;
    }
    rv = decode_packet();
    // Free the input_packet that was allocated by av_read_frame
    av_free_packet(&input_packet);
    return rv;
}

int decode_packet()
{
    int rv = 0;
    int ret = 0;

    //audio stream?
    if(input_packet.stream_index == audio_stream_index)
    {
        avcodec_get_frame_defaults(decoded_frame);
        while( input_packet.size > 0 )
        {
            int result = avcodec_decode_audio4(codec_ctx_audio, decoded_frame, &got_frame, &input_packet);
            if ( result < 0)
            {
                fprintf(stderr, "Error decoding audio frame\n");
                //return ret;
            }
            else
            {
                if ( got_frame )
                {
                    dst_nb_samples = (int)av_rescale_rnd(swr_get_delay(swrContext, sourceSampleRate) + decoded_frame->nb_samples, sourceSampleRate, destSampleRate, AV_ROUND_UP);
                    if ( dst_nb_samples > max_dst_nb_samples )
                    {
                        max_dst_nb_samples = dst_nb_samples;
                        if ( audio_dst_data[0] )
                        {
                            av_freep(&audio_dst_data[0]);
                            audio_dst_data[0] = NULL;
                        }
                    }
                    if ( audio_dst_data[0] == NULL )
                    {
                        if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
                        {
                            ret = av_samples_alloc(audio_dst_data, &audio_dst_linesize, codec_ctx_audio->channels,
                                decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 0);
                        }
                        else
                        {
                            ret = av_samples_alloc(audio_dst_data, &audio_dst_linesize, codec_ctx_audio->channels,
                                decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_S16, 0);
                        }
                    }
                    /* TODO: extend return code of the av_samples_* functions so that this call is not needed */
                    int resampled  = swr_convert(swrContext, audio_dst_data, out_frame->nb_samples,
                        (const uint8_t **)(decoded_frame->extended_data), decoded_frame->nb_samples);
                    char str[900]="";
                    sprintf(str,"out_frame->nb_samples:\t%d; decoded_frame->nb_samples:\t%d",out_frame->nb_samples,decoded_frame->nb_samples );
                    if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
                    {
                        audio_dst_bufsize  = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 1);
                    }
                    else
                    {
                        audio_dst_bufsize  = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_S16, 1);
                    }

                    input_packet.size -= result;
                    input_packet.data += result;
                }
                else
                {
                    input_packet.size   =   0;
                    input_packet.data   =   NULL;
                }
            }
        }
    }
    return rv;
}

int open_encoder(char* output_filename )
{
    int rv = 0;

    /* allocate the output media context */
    AVOutputFormat *opfmt = NULL;

    avformat_alloc_output_context2(&output_fmt_ctx, opfmt, NULL, output_filename);
    if (!output_fmt_ctx) {
        printf("Could not deduce output format from file extension: using MPEG.\n");
        avformat_alloc_output_context2(&output_fmt_ctx, NULL, "mpeg", output_filename);
    }
    if (!output_fmt_ctx) {
        rv = -1;
    }
    else
    {
        output_format = output_fmt_ctx->oformat;
    }

    /* Add the audio stream using the default format codecs
    * and initialize the codecs. */
    audio_st = NULL;

    if ( output_fmt_ctx )
    {
        if (output_format->audio_codec != AV_CODEC_ID_NONE)
        {
            audio_st = add_audio_stream(output_fmt_ctx, &audio_codec, output_format->audio_codec);
        }

        /* Now that all the parameters are set, we can open the audio and
        * video codecs and allocate the necessary encode buffers. */

        if (audio_st)
        {
            rv = open_audio(output_fmt_ctx, audio_codec, audio_st);
            if ( rv < 0 ) return rv;
        }

        av_dump_format(output_fmt_ctx, 0, output_filename, 1);
        /* open the output file, if needed */
        if (!(output_format->flags & AVFMT_NOFILE))
        {
            if (avio_open(&output_fmt_ctx->pb, output_filename, AVIO_FLAG_WRITE) < 0) {
                fprintf(stderr, "Could not open '%s'\n", output_filename);
                rv = -1;
            }
            else
            {
                /* Write the stream header, if any. */
                if (avformat_write_header(output_fmt_ctx, NULL) < 0)
                {
                    fprintf(stderr, "Error occurred when opening output file\n");
                    rv = -1;
                }
            }
        }
    }

    return rv;
}

AVStream *add_audio_stream(AVFormatContext *oc, AVCodec **codec,
    enum AVCodecID codec_id)
{
    AVCodecContext *c;
    AVStream *st;

    /* find the audio encoder */
    *codec = avcodec_find_encoder(codec_id);
    if (!(*codec)) {
        fprintf(stderr, "Could not find codec\n");
        exit(1);
    }

    st = avformat_new_stream(oc, *codec);
    if (!st) {
        fprintf(stderr, "Could not allocate stream\n");
        exit(1);
    }
    st->id = 1;

    c = st->codec;

    /* put sample parameters */
    if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
    {
        c->sample_fmt  = AV_SAMPLE_FMT_FLTP;
    }
    else
    {
        c->sample_fmt  = AV_SAMPLE_FMT_S16;
    }

    c->bit_rate    = audio_bit_rate;
    c->sample_rate = audio_sample_rate;
    c->channels    = audio_channels;

    // some formats want stream headers to be separate
    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        c->flags |= CODEC_FLAG_GLOBAL_HEADER;

    return st;
}

int open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
    int ret=0;
    AVCodecContext *c;

    st->duration = fmt_ctx->duration;
    c = st->codec;

    /* open it */
    ret = avcodec_open2(c, codec, NULL) ;
    if ( ret < 0)
    {
        fprintf(stderr, "could not open codec\n");
        return -1;
        //exit(1);
    }

    if (c->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)
        audio_input_frame_size = 10000;
    else
        audio_input_frame_size = c->frame_size;
    out_frame->nb_samples = audio_input_frame_size;
    int tempSize = audio_input_frame_size *
        av_get_bytes_per_sample(c->sample_fmt) *
        c->channels;
    return ret;
}

void close_audio(AVFormatContext *oc, AVStream *st)
{
    avcodec_close(st->codec);
}

void write_audio_frame(uint8_t ** audio_dst_data, int audio_dst_bufsize)
{
    AVFormatContext *oc = output_fmt_ctx;
    AVStream *st = audio_st;
    if ( oc == NULL || st == NULL ) return;
    AVCodecContext *c;
    AVPacket pkt = { 0 }; // data and size must be 0;
    int got_packet=0, ret=0;

    av_init_packet(&pkt);
    c = st->codec;

    out_frame->nb_samples = audio_input_frame_size;

    AVRational r;
    r.num = 1;
    r.den = c->sample_rate;
    out_frame->pts = av_rescale_q(samples_count, (AVRational)r, c->time_base);
    avcodec_fill_audio_frame(out_frame, c->channels, c->sample_fmt,
                             audio_dst_data[0], audio_dst_bufsize, 0);
    samples_count += out_frame->nb_samples;

    ret = avcodec_encode_audio2(c, &pkt, out_frame, &got_packet);
    if (ret < 0) 
    {
        return;
    }

    if (!got_packet)
        return;

    /* rescale output packet timestamp values from codec to stream timebase */
    pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
    pkt.dts = av_rescale_q_rnd(pkt.dts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
    pkt.duration = av_rescale_q(pkt.duration, c->time_base, st->time_base);
    pkt.stream_index = st->index;

    char str[999]="";
    sprintf(str,"out_frame->nb_samples:\t%d",out_frame->nb_samples);
    /* Write the compressed frame to the media file. */
    ret = av_interleaved_write_frame(oc, &pkt);
    if (ret != 0) 
    {
        exit(1);
    }
    av_free_packet(&pkt);
}

void write_delayed_frames(AVFormatContext *oc, AVStream *st)
{
    AVCodecContext *c = st->codec;
    int got_output = 0;
    int ret = 0;
    AVPacket pkt;
    pkt.data = NULL;
    pkt.size = 0;
    av_init_packet(&pkt);
    int i = 0;
    for (got_output = 1; got_output; i++)
    {
        ret = avcodec_encode_audio2(c, &pkt, NULL, &got_output);
        if (ret < 0)
        {
            fprintf(stderr, "error encoding frame\n");
            exit(1);
        }
        static int64_t tempPts = 0;
        static int64_t tempDts = 0;
        /* If size is zero, it means the image was buffered. */
        if (got_output)
        {
            pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
            pkt.dts = av_rescale_q_rnd(pkt.dts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
            pkt.duration = av_rescale_q(pkt.duration, c->time_base, st->time_base);
            pkt.stream_index = st->index;
            if ( c && c->coded_frame && c->coded_frame->key_frame)
                pkt.flags |= AV_PKT_FLAG_KEY;
            /* Write the compressed frame to the media file. */
            ret = av_interleaved_write_frame(oc, &pkt);
        }
        else
        {
            ret = 0;
        }
        av_free_packet(&pkt);
    }
}

int main(int argc, char **argv)
{
    /* register all formats and codecs */
    av_register_all();
    avcodec_register_all();
    avformat_network_init();
    avdevice_register_all();
    int i =0;
    int ret=0;
    char src_filename[90] = "test.mp2";
    char dst_filename[90] = "output.webm";
    outputAudioFormat = AV_CODEC_ID_VORBIS;
    open_audio_input(src_filename);
    if ( codec_ctx_audio->bit_rate == 0 ) codec_ctx_audio->bit_rate = 112000;
    audio_bit_rate        = codec_ctx_audio->bit_rate;
    audio_sample_rate    = codec_ctx_audio->sample_rate;
    audio_channels        = codec_ctx_audio->channels;
    open_encoder( dst_filename );
    int frames= 0;
    while(1)
    {
        int rv = decode_frame();
        if ( rv < 0 )
        {
            break;
        }

        if (audio_st)
        {
            audio_pts = audio_st->pts.val * av_q2d(audio_st->time_base);
        }
        else
        {
            audio_pts = 0.0;
        }
        if ( codec_ctx_audio )
        {
            if ( got_frame )
            {
                write_audio_frame( audio_dst_data, audio_dst_bufsize );
                frames++;
            }
        }
        printf("\naudio_pts: %f", audio_pts);
    }
    while(1)
    {
        dst_nb_samples = (int)av_rescale_rnd(swr_get_delay(swrContext, sourceSampleRate) + decoded_frame->nb_samples, sourceSampleRate, destSampleRate, AV_ROUND_UP);
        if ( dst_nb_samples > max_dst_nb_samples )
        {
            max_dst_nb_samples = dst_nb_samples;
            if ( audio_dst_data[0] )
            {
                av_freep(&audio_dst_data[0]);
                audio_dst_data[0] = NULL;
            }
        }
        if ( audio_dst_data[0] == NULL )
        {
            if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
            {
                ret = av_samples_alloc(audio_dst_data, NULL, codec_ctx_audio->channels,
                    decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 0);
            }
            else
            {
                ret = av_samples_alloc(audio_dst_data, NULL, codec_ctx_audio->channels,
                    decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_S16, 0);
            }
        }
        int resampled = swr_convert(swrContext, audio_dst_data, out_frame->nb_samples,NULL, 0);
        if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
        {
            audio_dst_bufsize  = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 1);
        }
        else
        {
            audio_dst_bufsize  = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_S16, 1);
        }
        if ( audio_dst_bufsize <= 0 ) break;
        audio_pts = audio_st->pts.val * av_q2d(audio_st->time_base);
        printf("\naudio_pts: %f", audio_pts);
        write_audio_frame( audio_dst_data, audio_dst_bufsize );
    }
    write_delayed_frames( output_fmt_ctx, audio_st );
    av_write_trailer(output_fmt_ctx);
    close_audio( output_fmt_ctx, audio_st);
    swr_free(&swrContext);
    avcodec_free_frame(&out_frame);
    getch();
    return 0;
}

Working under Windows 7, Zeranoe FFmpeg 32 bit build:

libavutil      52. 62.100 / 52. 62.100
libavcodec     55. 47.101 / 55. 47.101
libavformat    55. 22.103 / 55. 22.103
libavdevice    55.  5.102 / 55.  5.102
libavfilter     4.  1.100 /  4.  1.100
libswscale      2.  5.101 /  2.  5.101
libswresample   0. 17.104 /  0. 17.104
libpostproc    52.  3.100 / 52.  3.100

Could anyone point to the place where I might be misunderstanding things?

Thanks for any guidance in advance!

解决方案

I think I finally found the solution. Resampling sample that comes with FFmpeg (with at least the one I have) could be misleading - probably needs to be corrected. Even according to documentation of swr_convert, audio_dst_data can be a big buffer to avoid buffering:

 * If more input is provided than output space then the input will be buffered.
 * You can avoid this buffering by providing more output space than input.
 * Convertion will run directly without copying whenever possible.

This statement could be incorrect (theoretically and in working has no obvious errors, but sometimes results in awkward behavior as I have discovered).

My solution: do not let audio_dst_data buffer size exceed output codec's frame size - then it works perfectly.

Maybe someone would fix swresample library, or resampling example, or, at least document it more clearly.

这篇关于使用FFmpeg库,C ++转码为vorbis的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆