目标码率丢帧-1 · 音视频开发之路

## 前言本篇文章的丢帧是依据编码后的码率和目标码率来决定丢帧，而下一篇文章介绍的丢帧依据是目标帧率。 > [http://www.jianshu.com/p/fe303bdabc26](https://www.jianshu.com/p/fe303bdabc26) ##### 由此可对丢帧策略分类如下： * 编码后的码率和目标码率来决定丢帧 * 目标帧率决定丢帧 ## 整个帧率控制多次使用的算法---指数权重滤波（暂且如此命名）在exp\_filter.cc文件中： ~~~cpp #include "webrtc/base/exp_filter.h" #include <math.h> namespace rtc { const float ExpFilter::kValueUndefined = -1.0f; void ExpFilter::Reset(float alpha) { alpha_ = alpha; filtered_ = kValueUndefined; } float ExpFilter::Apply(float exp, float sample) { if (filtered_ == kValueUndefined) { // Initialize filtered value. filtered_ = sample; } else if (exp == 1.0) { filtered_ = alpha_ * filtered_ + (1 - alpha_) * sample; } else { float alpha = pow(alpha_, exp); filtered_ = alpha * filtered_ + (1 - alpha) * sample; } if (max_ != kValueUndefined && filtered_ > max_) { filtered_ = max_; } return filtered_; } void ExpFilter::UpdateBase(float alpha) { alpha_ = alpha; } } // namespace rtc ~~~ 这个文件的大概思想就是对历史值和当前值做指数加权求和。公式为： ~~~cpp f(x)=alpha*f(x-1)+(1-alpha)*sample; alpha=pow(alpha_, exp); ~~~ 其中alpha\_为设定常量，exp为幂次方，sample为最新样点值。后面还有： ~~~swift f(x)=min(f(x),max);即不要超过max。 ~~~ ## 调用丢帧 ~~~cpp bool MediaOptimization::DropFrame() { CriticalSectionScoped lock(crit_sect_.get()); UpdateIncomingFrameRate(); // Leak appropriate number of bytes. frame_dropper_->Leak((uint32_t)(InputFrameRateInternal() + 0.5f)); if (video_suspended_) { return true; // Drop all frames when muted. } return frame_dropper_->DropFrame(); } ~~~ 解释： * UpdateIncomingFrameRate();更新采集出来的帧率。 * frame\_dropper\_->Leak((uint32\_t)(InputFrameRateInternal() + 0.5f));这里主要利用采集帧率，去更新丢帧比率等关键丢帧信息。 * return frame\_dropper\_->DropFrame();这里就是根据前面计算的丢帧比率等去实现均匀丢帧。这些函数的具体实现后面会一一介绍。 ## 更新采集出来的帧率 ~~~cpp void MediaOptimization::UpdateIncomingFrameRate() { int64_t now = clock_->TimeInMilliseconds(); if (incoming_frame_times_[0] == 0) { // No shifting if this is the first time. } else { // Shift all times one step. for (int32_t i = (kFrameCountHistorySize - 2); i >= 0; i--) { incoming_frame_times_[i + 1] = incoming_frame_times_[i]; } } incoming_frame_times_[0] = now; ProcessIncomingFrameRate(now); } ~~~ ~~~cpp //framerate=n/t void MediaOptimization::ProcessIncomingFrameRate(int64_t now) { int32_t num = 0; int32_t nr_of_frames = 0; for (num = 1; num < (kFrameCountHistorySize - 1); ++num) { if (incoming_frame_times_[num] <= 0 || // don't use data older than 2 s now - incoming_frame_times_[num] > kFrameHistoryWinMs) { break; } else { nr_of_frames++; } } if (num > 1) { const int64_t diff = now - incoming_frame_times_[num - 1]; incoming_frame_rate_ = 1.0; if (diff > 0) { incoming_frame_rate_ = nr_of_frames * 1000.0f / static_cast<float>(diff); } } } ~~~ 解释：这一段比较好理解，就是根据每一帧到来的时间，最多2秒钟的统计，利用公式： `incoming_frame_rate_ = nr_of_frames * 1000.0f / static_cast<float>(diff);` 得到这一段时间的采集帧率。对于统计数据， ~~~cpp for (int32_t i = (kFrameCountHistorySize - 2); i >= 0; i--) { incoming_frame_times_[i + 1] = incoming_frame_times_[i]; } ~~~ 可见这是一个滑动窗口，即总是用最新的kFrameCountHistorySize 大小的数据。 ## 丢帧算法主要实现丢帧算法全部在frame\_dropper.cc文件中，下面先通过代码解读，在细说算法实现。此为frame\_dropper.cc文件内容，及注释 ~~~cpp /* * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "webrtc/modules/video_coding/utility/include/frame_dropper.h" #include "webrtc/system_wrappers/interface/trace.h" namespace webrtc { const float kDefaultKeyFrameSizeAvgKBits = 0.9f; const float kDefaultKeyFrameRatio = 0.99f; const float kDefaultDropRatioAlpha = 0.9f; const float kDefaultDropRatioMax = 0.96f; const float kDefaultMaxTimeToDropFrames = 4.0f; // In seconds. FrameDropper::FrameDropper() : _keyFrameSizeAvgKbits(kDefaultKeyFrameSizeAvgKBits), _keyFrameRatio(kDefaultKeyFrameRatio), _dropRatio(kDefaultDropRatioAlpha, kDefaultDropRatioMax), _enabled(true), _max_time_drops(kDefaultMaxTimeToDropFrames) { Reset(); } FrameDropper::FrameDropper(float max_time_drops) : _keyFrameSizeAvgKbits(kDefaultKeyFrameSizeAvgKBits), _keyFrameRatio(kDefaultKeyFrameRatio), _dropRatio(kDefaultDropRatioAlpha, kDefaultDropRatioMax), _enabled(true), _max_time_drops(max_time_drops) { Reset(); } void FrameDropper::Reset() { _keyFrameRatio.Reset(0.99f); _keyFrameRatio.Apply(1.0f, 1.0f/300.0f); // 1 key frame every 10th second in 30 fps _keyFrameSizeAvgKbits.Reset(0.9f); _keyFrameCount = 0; _accumulator = 0.0f; _accumulatorMax = 150.0f; // assume 300 kb/s and 0.5 s window _targetBitRate = 300.0f; _incoming_frame_rate = 30; _keyFrameSpreadFrames = 0.5f * _incoming_frame_rate; _dropNext = false; _dropRatio.Reset(0.9f); _dropRatio.Apply(0.0f, 0.0f); // Initialize to 0 _dropCount = 0; _windowSize = 0.5f; _wasBelowMax = true; _fastMode = false; // start with normal (non-aggressive) mode // Cap for the encoder buffer level/accumulator, in secs. _cap_buffer_size = 3.0f; // Cap on maximum amount of dropped frames between kept frames, in secs. _max_time_drops = 4.0f; } void FrameDropper::Enable(bool enable) { _enabled = enable; } //deltaFrame : 0:key frame 1:P frame void FrameDropper::Fill(size_t frameSizeBytes, bool deltaFrame) { if (!_enabled) { return; } float frameSizeKbits = 8.0f * static_cast<float>(frameSizeBytes) / 1000.0f; if (!deltaFrame && !_fastMode) // fast mode does not treat key-frames any different//非fast_mode而且key_frame { //exp=1.0时，filtered_ = alpha_ * filtered_ + (1 - alpha_) * sample;当alpha_=0.8或0.9时，则更偏重于历史值，而非当前sample _keyFrameSizeAvgKbits.Apply(1, frameSizeKbits); _keyFrameRatio.Apply(1.0, 1.0);//_keyFrameRatio同样偏重于历史值，而当前值设置为1，因为当前为key frame ，所以值为1 if (frameSizeKbits > _keyFrameSizeAvgKbits.filtered())//当前值大于均值 { // Remove the average key frame size since we // compensate for key frames when adding delta // frames. frameSizeKbits -= _keyFrameSizeAvgKbits.filtered();//超出均值的部分 } else { // Shouldn't be negative, so zero is the lower bound. frameSizeKbits = 0; } if (_keyFrameRatio.filtered() > 1e-5 && 1 / _keyFrameRatio.filtered() < _keyFrameSpreadFrames) //_keyFrameSpreadFrames = 0.5f * inputFrameRate; { // We are sending key frames more often than our upper bound for // how much we allow the key frame compensation to be spread // out in time. Therefor we must use the key frame ratio rather // than keyFrameSpreadFrames. _keyFrameCount = static_cast<int32_t>(1 / _keyFrameRatio.filtered() + 0.5);//每一秒关键帧的数量？ } else { // Compensate for the key frame the following frames _keyFrameCount = static_cast<int32_t>(_keyFrameSpreadFrames + 0.5); } } else { // Decrease the keyFrameRatio _keyFrameRatio.Apply(1.0, 0.0);//因为这是P帧，降低_keyFrameRatio的fileter值，因为sample=0 } // Change the level of the accumulator (bucket) _accumulator += frameSizeKbits; //_accumulator是frameSizeKbits的累加器，表示超过均值的bit值累加 CapAccumulator();//max_accumulator = _targetBitRate * _cap_buffer_size;累加器最多为max_accumulator,3倍目标码率 } void FrameDropper::Leak(uint32_t inputFrameRate) { if (!_enabled) { return; } if (inputFrameRate < 1) { return; } if (_targetBitRate < 0.0f) { return; } _keyFrameSpreadFrames = 0.5f * inputFrameRate; // T is the expected bits per frame (target). If all frames were the same size, // we would get T bits per frame. Notice that T is also weighted to be able to // force a lower frame rate if wanted. float T = _targetBitRate / inputFrameRate;//T:每一帧期望的bit大小，从下面内容，明显这个T代表的是每个P帧期望的大小，K帧是另外补偿的 if (_keyFrameCount > 0) { // Perform the key frame compensation if (_keyFrameRatio.filtered() > 0 && 1 / _keyFrameRatio.filtered() < _keyFrameSpreadFrames) { T -= _keyFrameSizeAvgKbits.filtered() * _keyFrameRatio.filtered();//_keyFrameSizeAvgKbits.filtered() * _keyFrameRatio.filtered()为keyframe在每一帧均摊的占用的kbit } else { T -= _keyFrameSizeAvgKbits.filtered() / _keyFrameSpreadFrames;// } _keyFrameCount--;//补偿一个关键帧，则关键帧数-1. } _accumulator -= T;//累加器在编码后增加，在编码前减去当前帧占用的大小 if (_accumulator < 0.0f) { _accumulator = 0.0f; } UpdateRatio(); } void FrameDropper::UpdateNack(uint32_t nackBytes) { if (!_enabled) { return; } _accumulator += static_cast<float>(nackBytes) * 8.0f / 1000.0f; } void FrameDropper::FillBucket(float inKbits, float outKbits) { _accumulator += (inKbits - outKbits); } void FrameDropper::UpdateRatio() { if (_accumulator > 1.3f * _accumulatorMax)//_accumulatorMax = bitRate * _windowSize;累加器过大之后，减小alpha值，_dropRatio更偏重当前值 { // Too far above accumulator max, react faster _dropRatio.UpdateBase(0.8f); } else { // Go back to normal reaction _dropRatio.UpdateBase(0.9f); } if (_accumulator > _accumulatorMax) { // We are above accumulator max, and should ideally // drop a frame. Increase the dropRatio and drop // the frame later. if (_wasBelowMax)//_wasBelowMax = _accumulator < _accumulatorMax;上一次小于_accumulatorMax { _dropNext = true;//丢掉下一帧 } if (_fastMode) { // always drop in aggressive mode _dropNext = true; } _dropRatio.Apply(1.0f, 1.0f);//因为丢帧，所以sample为1 _dropRatio.UpdateBase(0.9f); } else { _dropRatio.Apply(1.0f, 0.0f);//不丢帧，sample为0 } _wasBelowMax = _accumulator < _accumulatorMax; } // This function signals when to drop frames to the caller. It makes use of the dropRatio // to smooth out the drops over time. bool FrameDropper::DropFrame() { if (!_enabled) { return false; } if (_dropNext) { _dropNext = false; _dropCount = 0; } if (_dropRatio.filtered() >= 0.5f) // Drops per keep//>=0.5表示当前帧不丢，下一帧一定丢，即2个至少丢一个 { // limit is the number of frames we should drop between each kept frame // to keep our drop ratio. limit is positive in this case. float denom = 1.0f - _dropRatio.filtered();//denom:分母，表示不丢的比率 if (denom < 1e-5) { denom = (float)1e-5; } int32_t limit = static_cast<int32_t>(1.0f / denom - 1.0f + 0.5f);//这里注释意思limit代表需要丢掉的帧数，即如果当前帧不丢，则后面有limit帧需要丢掉 // Put a bound on the max amount of dropped frames between each kept // frame, in terms of frame rate and window size (secs). int max_limit = static_cast<int>(_incoming_frame_rate * _max_time_drops);//4倍帧率，max_limit则表示连续丢掉4倍帧率的帧，明显太大了 if (limit > max_limit) { limit = max_limit; } if (_dropCount < 0)//_dropCount表示当前这一轮丢帧，已经丢掉的帧数 { // Reset the _dropCount since it was negative and should be positive. if (_dropRatio.filtered() > 0.4f) { _dropCount = -_dropCount; } else { _dropCount = 0; } } if (_dropCount < limit)//直到丢掉limit帧 { // As long we are below the limit we should drop frames. _dropCount++; return true; } else { // Only when we reset _dropCount a frame should be kept. _dropCount = 0; return false; } } else if (_dropRatio.filtered() > 0.0f && _dropRatio.filtered() < 0.5f) // Keeps per drop//表示当前帧不丢，下一帧可能丢，也可能不丢，即每隔若干帧丢一帧 { // limit is the number of frames we should keep between each drop // in order to keep the drop ratio. limit is negative in this case, // and the _dropCount is also negative. float denom = _dropRatio.filtered(); if (denom < 1e-5) { denom = (float)1e-5; } int32_t limit = -static_cast<int32_t>(1.0f / denom - 1.0f + 0.5f); if (_dropCount > 0) { // Reset the _dropCount since we have a positive // _dropCount, and it should be negative. if (_dropRatio.filtered() < 0.6f) { _dropCount = -_dropCount; } else { _dropCount = 0; } } if (_dropCount > limit) { if (_dropCount == 0) { // Drop frames when we reset _dropCount. _dropCount--; return true;//丢，明显每次只丢一帧 } else { // Keep frames as long as we haven't reached limit. _dropCount--; return false;//不丢，直到_dropCount > limit，则重新置_dropCount = 0;开始新一轮丢帧 } } else { _dropCount = 0; return false; } } _dropCount = 0; return false; // A simpler version, unfiltered and quicker //bool dropNext = _dropNext; //_dropNext = false; //return dropNext; } void FrameDropper::SetRates(float bitRate, float incoming_frame_rate) { // Bit rate of -1 means infinite bandwidth. _accumulatorMax = bitRate * _windowSize; // bitRate * windowSize (in seconds) if (_targetBitRate > 0.0f && bitRate < _targetBitRate && _accumulator > _accumulatorMax) { // Rescale the accumulator level if the accumulator max decreases _accumulator = bitRate / _targetBitRate * _accumulator; } _targetBitRate = bitRate; CapAccumulator(); _incoming_frame_rate = incoming_frame_rate; } float FrameDropper::ActualFrameRate(uint32_t inputFrameRate) const { if (!_enabled) { return static_cast<float>(inputFrameRate); } return inputFrameRate * (1.0f - _dropRatio.filtered());//实际编码帧率 } // Put a cap on the accumulator, i.e., don't let it grow beyond some level. // This is a temporary fix for screencasting where very large frames from // encoder will cause very slow response (too many frame drops). void FrameDropper::CapAccumulator() { float max_accumulator = _targetBitRate * _cap_buffer_size; if (_accumulator > max_accumulator) { _accumulator = max_accumulator; } } } ~~~ 1、丢帧的决定因素在\_dropRatio.Apply(1.0f, 1.0f);通过给\_dropRatio赋值，使得\_dropRatio不为0.而\_dropRatio.Apply(1.0f, 1.0f);调用的起因，还在 ~~~cpp int32_t VCMEncodedFrameCallback::Encoded ->int32_t MediaOptimization::UpdateWithEncodedData ->FrameDropper::Fill(size_t frameSizeBytes, bool deltaFrame) ~~~ 通过Fill函数中的\_accumulator（累加器），再通过 ~~~cpp FrameDropper::Leak(uint32_t inputFrameRate) ->FrameDropper::UpdateRatio() ~~~ 来最终调用\_dropRatio.Apply(1.0f, 1.0f)或\_dropRatio.Apply(1.0f, 0.0f) 2、丢帧的方法在FrameDropper::DropFrame()函数中，通过上面注释的代码也可以理解。 ![](//upload-images.jianshu.io/upload_images/2305000-b04236bcba856804.png?imageMogr2/auto-orient/strip|imageView2/2/w/499/format/webp) drop.png 就是当dropRatio>=0.5时，两个帧之间可能丢多个；当dropRatio<0.5时，两个帧之间最多丢一个。 3、调用丢帧的地方 * int32\_t VideoSender::AddVideoFrame()帧数据加入encoder之前 4、如何从\_accumulator控制帧率 * FrameDropper::Fill()中，每编码完一帧数据，就将数据的大小累加到\_accumulator，其中P帧全部累加，K帧只加超出均值的部分。 * 每个采集后，即将给到编码器的帧，利用\_targetBitRate / inputFrameRate;得到每一帧期望占用的bit大小，其中K帧单独计算： \_keyFrameSizeAvgKbits.filtered() \* \_keyFrameRatio.filtered(); ~~~cpp 疑问：为什么_accumulator累加时，K帧只加超出均值的部分，而不是全部。 ``` 5、什么时候丢帧 _accumulator > _accumulatorMax；其中，_accumulatorMax = bitRate * _windowSize;（_windowSize=0.5f） ##编码完后，更新_accumulator 这一部分只是说明编码完后怎么去更新_accumulator 的流程，比较容易看懂。 ``` int32_t VCMEncodedFrameCallback::Encoded( const EncodedImage& encodedImage, const CodecSpecificInfo* codecSpecificInfo, const RTPFragmentationHeader* fragmentationHeader) { post_encode_callback_->Encoded(encodedImage, NULL, NULL); if (_sendCallback == NULL) { return VCM_UNINITIALIZED; } RTPVideoHeader rtpVideoHeader; memset(&rtpVideoHeader, 0, sizeof(RTPVideoHeader)); RTPVideoHeader* rtpVideoHeaderPtr = &rtpVideoHeader; CopyCodecSpecific(codecSpecificInfo, &rtpVideoHeaderPtr); int32_t callbackReturn = _sendCallback->SendData( _payloadType, encodedImage, *fragmentationHeader, rtpVideoHeaderPtr); if (callbackReturn < 0) { return callbackReturn; } if (_mediaOpt != NULL) { //编码后的统计信息更新 _mediaOpt->UpdateWithEncodedData(encodedImage); if (_internalSource) return _mediaOpt->DropFrame(); // Signal to encoder to drop next frame. } return VCM_OK; } ``` ``` int32_t MediaOptimization::UpdateWithEncodedData( const EncodedImage& encoded_image) { size_t encoded_length = encoded_image._length; uint32_t timestamp = encoded_image._timeStamp; CriticalSectionScoped lock(crit_sect_.get()); const int64_t now_ms = clock_->TimeInMilliseconds(); PurgeOldFrameSamples(now_ms); if (encoded_frame_samples_.size() > 0 && encoded_frame_samples_.back().timestamp == timestamp) { // Frames having the same timestamp are generated from the same input // frame. We don't want to double count them, but only increment the // size_bytes. encoded_frame_samples_.back().size_bytes += encoded_length; encoded_frame_samples_.back().time_complete_ms = now_ms; } else { encoded_frame_samples_.push_back( EncodedFrameSample(encoded_length, timestamp, now_ms)); } UpdateSentBitrate(now_ms); UpdateSentFramerate(); if (encoded_length > 0) { const bool delta_frame = encoded_image._frameType != kKeyFrame;//0:key 1:P //这里将每次编码完的数据长度Fill到frame_dropper frame_dropper_->Fill(encoded_length, delta_frame); if (max_payload_size_ > 0 && encoded_length > 0) { const float min_packets_per_frame = encoded_length / static_cast<float>(max_payload_size_); if (delta_frame) { loss_prot_logic_->UpdatePacketsPerFrame(min_packets_per_frame, clock_->TimeInMilliseconds()); } else { loss_prot_logic_->UpdatePacketsPerFrameKey( min_packets_per_frame, clock_->TimeInMilliseconds()); } if (enable_qm_) { // Update quality select with encoded length. qm_resolution_->UpdateEncodedSize(encoded_length); } } if (!delta_frame && encoded_length > 0) { loss_prot_logic_->UpdateKeyFrameSize(static_cast<float>(encoded_length)); } // Updating counters. if (delta_frame) { delta_frame_cnt_++; } else { key_frame_cnt_++; } } return VCM_OK; } ``` 解释：编码完后的数据都是经过callback回调的， ``` int32_t VCMEncodedFrameCallback::Encoded ->int32_t MediaOptimization::UpdateWithEncodedData ->frame_dropper_->Fill(encoded_length, delta_frame); ``` 经过这个流程，每次编码后，送给发送的数据都要去更新frame_dropper_。 ~~~ 经过这个流程，每次编码后，送给发送的数据都要去更新frame_dropper_。 ## 后记：作者对于这一个算法的机制原理，也不是很明白，只能从代码中体会算法实现，不免有错误理解，如有更好理解或者不同见解的道友，敬请赐教，不胜感激！