Commit 7ebef41f authored by Dale Curtis's avatar Dale Curtis Committed by Commit Bot

Fix calculations for frame and tile threads with dav1d (again :O).

1ad19fb1 continued the trend of
incorrect statements about dav1d thread counts :/ The actual total
is (t + 1) * f when configuring tilethreads=t and framethreads=f.

I've triple checked this, so this will hopefully be the last update
to the calculations and comments.

If a system has the cores for it, we'll end up using the following:
<300p: 2 tile threads, 2 frame threads = 2 * 2 + 2 = 6 total threads.
<700p: 3 tile threads, 2 frame threads = 3 * 2 + 2 = 8 total threads.

For higher resolutions we hit limits::kMaxVideoThreads (16):
>700p: 4 tile threads, 3 frame threads = 4 * 3 + 3  = 15 total threads.

Because of this correct total exceeds limits::kMaxVideoFrames for 720p
content we've had to reduce the tile thread count to 4 to ensure smooth
playback. Otherwise 5 tile threads and 2 frame threads is insufficient
for smooth playback on my Z840. This is handled by a new combined
GetDecoderThreadCounts() which outputs both tile and frame thread
counts concurrently for clarity.

BUG=954659,957511
TEST=printf() on pthread_create, python test program to be sure:
def count_threads(f,t):
  threads = 0
  for i in xrange(0, f):
    for j in xrange(0, t):
      if t > 1: threads += 1
    if f > 1: threads += 1
  return threads

R=chcunningham

Change-Id: Ifcf1069ee9b1a8cede82c102fddbc3c57c2036f5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1606192
Auto-Submit: Dale Curtis <dalecurtis@chromium.org>
Commit-Queue: Chrome Cunningham <chcunningham@chromium.org>
Reviewed-by: default avatarChrome Cunningham <chcunningham@chromium.org>
Cr-Commit-Position: refs/heads/master@{#658815}
parent 3b009f61
...@@ -26,36 +26,27 @@ extern "C" { ...@@ -26,36 +26,27 @@ extern "C" {
namespace media { namespace media {
static int GetDecoderTileThreadCount(const VideoDecoderConfig& config) { static void GetDecoderThreadCounts(const int coded_height,
// Values based on currently available content. Recommended by YouTube. int* tile_threads,
int tile_threads; int* frame_threads) {
// Tile thread counts based on currently available content. Recommended by
const int height = config.coded_size().height(); // YouTube, while frame thread values fit within limits::kMaxVideoThreads.
if (height >= 1000) if (coded_height >= 700) {
tile_threads = 8; *tile_threads =
else if (height >= 700) 4; // Current 720p content is encoded in 5 tiles and 1080p content with
tile_threads = 5; // 8 tiles, but we'll exceed limits::kMaxVideoThreads with 5+ tile
else if (height >= 300) // threads with 3 frame threads (5 * 3 + 3 = 18 threads vs 16 max).
tile_threads = 3; //
else // Since 720p playback isn't smooth without 3 frame threads, we've
tile_threads = 2; // chosen a slightly lower tile thread count.
*frame_threads = 3;
return tile_threads; } else if (coded_height >= 300) {
} *tile_threads = 3;
*frame_threads = 2;
static int GetDecoderFrameThreadCount(const VideoDecoderConfig& config) { } else {
// Values based on currently available content. *tile_threads = 2;
int frame_threads; *frame_threads = 2;
}
const int height = config.coded_size().height();
if (height >= 1000)
frame_threads = 8;
else if (height >= 700)
frame_threads = 4;
else
frame_threads = 2;
return frame_threads;
} }
static VideoPixelFormat Dav1dImgFmtToVideoPixelFormat( static VideoPixelFormat Dav1dImgFmtToVideoPixelFormat(
...@@ -178,10 +169,11 @@ void Dav1dVideoDecoder::Initialize(const VideoDecoderConfig& config, ...@@ -178,10 +169,11 @@ void Dav1dVideoDecoder::Initialize(const VideoDecoderConfig& config,
// maximum number of recommended threads (using number of processors, etc). // maximum number of recommended threads (using number of processors, etc).
// //
// dav1d will spawn |n_tile_threads| per frame thread. // dav1d will spawn |n_tile_threads| per frame thread.
s.n_tile_threads = GetDecoderTileThreadCount(config); GetDecoderThreadCounts(config.coded_size().height(), &s.n_tile_threads,
s.n_frame_threads = GetDecoderFrameThreadCount(config); &s.n_frame_threads);
const int max_threads = VideoDecoder::GetRecommendedThreadCount( const int max_threads = VideoDecoder::GetRecommendedThreadCount(
s.n_frame_threads * s.n_tile_threads); s.n_frame_threads * (s.n_tile_threads + 1));
// First clamp tile threads to the allowed maximum. We prefer tile threads // First clamp tile threads to the allowed maximum. We prefer tile threads
// over frame threads since dav1d folk indicate they are more efficient. In an // over frame threads since dav1d folk indicate they are more efficient. In an
...@@ -199,21 +191,20 @@ void Dav1dVideoDecoder::Initialize(const VideoDecoderConfig& config, ...@@ -199,21 +191,20 @@ void Dav1dVideoDecoder::Initialize(const VideoDecoderConfig& config,
// require at least two buffers before the first frame can be output. // require at least two buffers before the first frame can be output.
// //
// If a system has the cores for it, we'll end up using the following: // If a system has the cores for it, we'll end up using the following:
// <300p: 2 tile threads, 2 frame threads = 4 total threads. // <300p: 2 tile threads, 2 frame threads = 2 * 2 + 2 = 6 total threads.
// <700p: 3 tile threads, 2 frame threads = 6 total threads. // <700p: 3 tile threads, 2 frame threads = 3 * 2 + 2 = 8 total threads.
// //
// For higher resolutions we hit limits::kMaxVideoThreads (16): // For higher resolutions we hit limits::kMaxVideoThreads (16):
// <1000p: 5 tile threads, 3 frame thread = 15 total threads. // >700p: 4 tile threads, 3 frame threads = 4 * 3 + 3 = 15 total threads.
// >1000p: 8 tile threads, 2 frame threads = 16 total threads.
// //
// Due to the (surprising) performance issues which occurred when setting // Due to the (surprising) performance issues which occurred when setting
// |n_frame_threads|=1 (https://crbug.com/957511) the minimum total number of // |n_frame_threads|=1 (https://crbug.com/957511) the minimum total number of
// threads is 4 (two tile and two frame) regardless of core count. The maximum // threads is 6 (two tile and two frame) regardless of core count. The maximum
// is min(2 * base::SysInfo::NumberOfProcessors(), limits::kMaxVideoThreads). // is min(2 * base::SysInfo::NumberOfProcessors(), limits::kMaxVideoThreads).
if (low_delay) if (low_delay)
s.n_frame_threads = 1; s.n_frame_threads = 1;
else if (s.n_frame_threads * s.n_tile_threads > max_threads) else if (s.n_frame_threads * (s.n_tile_threads + 1) > max_threads)
s.n_frame_threads = std::max(2, max_threads / s.n_tile_threads); s.n_frame_threads = std::max(2, max_threads / (s.n_tile_threads + 1));
// Route dav1d internal logs through Chrome's DLOG system. // Route dav1d internal logs through Chrome's DLOG system.
s.logger = {nullptr, &LogDav1dMessage}; s.logger = {nullptr, &LogDav1dMessage};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment