Update SIMD version of life demo.

- add benchmark option
- add multi-thread support
- convert from c to cpp (inc. ppapi)
BUG=NONE
TEST=this is a demo

R=binji@chromium.org

Review URL: https://codereview.chromium.org/451883002

Cr-Commit-Position: refs/heads/master@{#288473}
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@288473 0039d316-1c4b-4281-b951-d872f2087c98
parent 06209e69
...@@ -2,17 +2,20 @@ ...@@ -2,17 +2,20 @@
'TOOLS': ['pnacl'], 'TOOLS': ['pnacl'],
'TARGETS': [ 'TARGETS': [
{ {
'NAME' : 'life_simd', 'NAME' : 'life',
'TYPE' : 'main', 'TYPE' : 'main',
'SOURCES' : [ 'SOURCES' : [
'life.c', 'life.cc',
], ],
'DEPS': ['ppapi_simple', 'nacl_io'], 'DEPS': ['ppapi_simple', 'nacl_io'],
'LIBS': ['ppapi_simple', 'nacl_io', 'ppapi_cpp', 'ppapi', 'pthread'] 'LIBS': ['ppapi_simple', 'nacl_io', 'sdk_util', 'ppapi_cpp', 'ppapi', 'pthread']
} }
], ],
'DATA': [
'example.js'
],
'DEST': 'examples/demo', 'DEST': 'examples/demo',
'NAME': 'life_simd', 'NAME': 'life_simd',
'TITLE': "Conway's Life (SIMD version)", 'TITLE': "Conway's Life",
'GROUP': 'Demo' 'GROUP': 'Demo'
} }
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
function moduleDidLoad() {
}
// Add event listeners after the NaCl module has loaded. These listeners will
// forward messages to the NaCl module via postMessage()
function attachListeners() {
document.getElementById('benchmark').addEventListener('click',
function() {
common.naclModule.postMessage({'message' : 'run_benchmark'});
common.updateStatus('BENCHMARKING... (please wait)');
});
document.getElementById('simd').addEventListener('click',
function() {
var simd = document.getElementById('simd');
common.naclModule.postMessage({'message' : 'set_simd',
'value' : simd.checked});
});
document.getElementById('multithread').addEventListener('click',
function() {
var multithread = document.getElementById('multithread');
common.naclModule.postMessage({'message' : 'set_threading',
'value' : multithread.checked});
});
document.getElementById('large').addEventListener('click',
function() {
var large = document.getElementById('large');
var nacl = document.getElementById('nacl_module');
nacl.setAttribute('width', large.checked ? 1280 : 640);
nacl.setAttribute('height', large.checked ? 1024 : 640);
});
}
// Handle a message coming from the NaCl module.
function handleMessage(message_event) {
if (message_event.data.message == 'benchmark_result') {
// benchmark result
var result = message_event.data.value;
console.log('Benchmark result:' + result);
result = (Math.round(result * 1000) / 1000).toFixed(3);
document.getElementById('result').textContent =
'Result: ' + result + ' seconds';
common.updateStatus('SUCCESS');
}
}
...@@ -10,12 +10,24 @@ found in the LICENSE file. ...@@ -10,12 +10,24 @@ found in the LICENSE file.
<meta http-equiv="Expires" content="-1"> <meta http-equiv="Expires" content="-1">
<title>{{title}}</title> <title>{{title}}</title>
<script type="text/javascript" src="common.js"></script> <script type="text/javascript" src="common.js"></script>
<script type="text/javascript" src="example.js"></script>
</head> </head>
<body data-width="640" data-height="640" {{attrs}}> <body data-width="640" data-height="640" {{attrs}}>
<h1>{{title}}</h1> <h1>{{title}}</h1>
<h2>Status: <code id="statusField">NO-STATUS</code></h2> <h2>Status: <code id="statusField">NO-STATUS</code></h2>
<!-- The NaCl plugin will be embedded inside the element with id "listener". <!-- The NaCl plugin will be embedded inside the element with id "listener".
See common.js.--> See common.js.-->
<div>
Conway's game of life is a cellular automaton by British mathematician John
Horton Conway. Use the touch screen or mouse pointer to interact with the
simulation.
<br>
<input type="checkbox" id="simd" checked >Use SIMD<br>
<input type="checkbox" id="multithread" checked >Use multiple threads<br>
<input type="checkbox" id="large" >Use large field<br>
<input type="submit" id="benchmark" value="Run Benchmark">
<label id="result" name="result"> </label>
</div>
<div id="listener"></div> <div id="listener"></div>
</body> </body>
</html> </html>
/* Copyright 2014 The Chromium Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ppapi/c/pp_resource.h"
#include "ppapi/c/ppb_core.h"
#include "ppapi/c/ppb_fullscreen.h"
#include "ppapi/c/ppb_graphics_2d.h"
#include "ppapi/c/ppb_image_data.h"
#include "ppapi/c/ppb_input_event.h"
#include "ppapi/c/ppb_instance.h"
#include "ppapi/c/ppb_view.h"
#include "ppapi_simple/ps_event.h"
#include "ppapi_simple/ps_main.h"
PPB_Core* g_pCore;
PPB_Fullscreen* g_pFullscreen;
PPB_Graphics2D* g_pGraphics2D;
PPB_ImageData* g_pImageData;
PPB_Instance* g_pInstance;
PPB_View* g_pView;
PPB_InputEvent* g_pInputEvent;
PPB_KeyboardInputEvent* g_pKeyboardInput;
PPB_MouseInputEvent* g_pMouseInput;
PPB_TouchInputEvent* g_pTouchInput;
struct {
PP_Resource ctx;
struct PP_Size size;
int bound;
uint8_t* cell_in;
uint8_t* cell_out;
int32_t cell_stride;
} g_Context;
const unsigned int kInitialRandSeed = 0xC0DE533D;
const int kCellAlignment = 0x10;
#define INLINE inline __attribute__((always_inline))
/* BGRA helper macro, for constructing a pixel for a BGRA buffer. */
#define MakeBGRA(b, g, r, a) \
(((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
/* 128 bit vector types */
typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
/* Helper function to broadcast x across 16 element vector. */
INLINE u8x16_t broadcast(uint8_t x) {
u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
return r;
}
/*
* Convert a count value into a live (green) or dead color value.
*/
const uint32_t kNeighborColors[] = {
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
};
/*
* These represent the new health value of a cell based on its neighboring
* values. The health is binary: either alive or dead.
*/
const uint8_t kIsAlive[] = {
0, 0, 0, 0, 0, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0
};
void UpdateContext(uint32_t width, uint32_t height) {
int stride = (width + kCellAlignment - 1) & ~kCellAlignment;
if (width != g_Context.size.width || height != g_Context.size.height) {
size_t size = stride * height;
size_t index;
free(g_Context.cell_in);
free(g_Context.cell_out);
/* Create a new context */
void* in_buffer = NULL;
void* out_buffer = NULL;
/* alloc buffers aligned on 16 bytes */
posix_memalign(&in_buffer, kCellAlignment, size);
posix_memalign(&out_buffer, kCellAlignment, size);
g_Context.cell_in = (uint8_t*) in_buffer;
g_Context.cell_out = (uint8_t*) out_buffer;
memset(g_Context.cell_out, 0, size);
for (index = 0; index < size; index++) {
g_Context.cell_in[index] = rand() & 1;
}
}
/* Recreate the graphics context on a view change */
g_pCore->ReleaseResource(g_Context.ctx);
g_Context.size.width = width;
g_Context.size.height = height;
g_Context.cell_stride = stride;
g_Context.ctx =
g_pGraphics2D->Create(PSGetInstanceId(), &g_Context.size, PP_TRUE);
g_Context.bound =
g_pInstance->BindGraphics(PSGetInstanceId(), g_Context.ctx);
}
void DrawCell(int32_t x, int32_t y) {
int32_t width = g_Context.size.width;
int32_t height = g_Context.size.height;
int32_t stride = g_Context.cell_stride;
if (!g_Context.cell_in) return;
if (x > 0 && x < width - 1 && y > 0 && y < height - 1) {
g_Context.cell_in[x - 1 + y * stride] = 1;
g_Context.cell_in[x + 1 + y * stride] = 1;
g_Context.cell_in[x + (y - 1) * stride] = 1;
g_Context.cell_in[x + (y + 1) * stride] = 1;
}
}
void ProcessTouchEvent(PSEvent* event) {
uint32_t count = g_pTouchInput->GetTouchCount(event->as_resource,
PP_TOUCHLIST_TYPE_TOUCHES);
uint32_t i, j;
for (i = 0; i < count; i++) {
struct PP_TouchPoint touch = g_pTouchInput->GetTouchByIndex(
event->as_resource, PP_TOUCHLIST_TYPE_TOUCHES, i);
int radius = (int)touch.radius.x;
int x = (int)touch.position.x;
int y = (int)touch.position.y;
/* num = 1/100th the area of touch point */
int num = (int)(M_PI * radius * radius / 100.0f);
for (j = 0; j < num; j++) {
int dx = rand() % (radius * 2) - radius;
int dy = rand() % (radius * 2) - radius;
/* only plot random cells within the touch area */
if (dx * dx + dy * dy <= radius * radius)
DrawCell(x + dx, y + dy);
}
}
}
void ProcessEvent(PSEvent* event) {
switch(event->type) {
/* If the view updates, build a new Graphics 2D Context */
case PSE_INSTANCE_DIDCHANGEVIEW: {
struct PP_Rect rect;
g_pView->GetRect(event->as_resource, &rect);
UpdateContext(rect.size.width, rect.size.height);
break;
}
case PSE_INSTANCE_HANDLEINPUT: {
PP_InputEvent_Type type = g_pInputEvent->GetType(event->as_resource);
PP_InputEvent_Modifier modifiers =
g_pInputEvent->GetModifiers(event->as_resource);
switch(type) {
case PP_INPUTEVENT_TYPE_MOUSEDOWN:
case PP_INPUTEVENT_TYPE_MOUSEMOVE: {
struct PP_Point location =
g_pMouseInput->GetPosition(event->as_resource);
/* If the button is down, draw */
if (modifiers & PP_INPUTEVENT_MODIFIER_LEFTBUTTONDOWN) {
DrawCell(location.x, location.y);
}
break;
}
case PP_INPUTEVENT_TYPE_TOUCHSTART:
case PP_INPUTEVENT_TYPE_TOUCHMOVE:
ProcessTouchEvent(event);
break;
case PP_INPUTEVENT_TYPE_KEYDOWN: {
PP_Bool fullscreen = g_pFullscreen->IsFullscreen(PSGetInstanceId());
g_pFullscreen->SetFullscreen(PSGetInstanceId(),
fullscreen ? PP_FALSE : PP_TRUE);
break;
}
default:
break;
}
/* case PSE_INSTANCE_HANDLEINPUT */
break;
}
default:
break;
}
}
void Stir() {
int32_t width = g_Context.size.width;
int32_t height = g_Context.size.height;
int32_t stride = g_Context.cell_stride;
int32_t i;
if (g_Context.cell_in == NULL || g_Context.cell_out == NULL)
return;
for (i = 0; i < width; ++i) {
g_Context.cell_in[i] = rand() & 1;
g_Context.cell_in[i + (height - 1) * stride] = rand() & 1;
}
for (i = 0; i < height; ++i) {
g_Context.cell_in[i * stride] = rand() & 1;
g_Context.cell_in[i * stride + (width - 1)] = rand() & 1;
}
}
void Render() {
struct PP_Size* psize = &g_Context.size;
PP_ImageDataFormat format = PP_IMAGEDATAFORMAT_BGRA_PREMUL;
/*
* Create a buffer to draw into. Since we are waiting until the next flush
* chrome has an opportunity to cache this buffer see ppb_graphics_2d.h.
*/
PP_Resource image =
g_pImageData->Create(PSGetInstanceId(), format, psize, PP_FALSE);
uint8_t* pixels = g_pImageData->Map(image);
struct PP_ImageDataDesc desc;
uint8_t* cell_temp;
uint32_t x, y;
/* If we somehow have not allocated these pointers yet, skip this frame. */
if (!g_Context.cell_in || !g_Context.cell_out) return;
/* Get the pixel stride. */
g_pImageData->Describe(image, &desc);
/* Stir up the edges to prevent the simulation from reaching steady state. */
Stir();
/*
* Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
* wide perimeter is excluded from the simulation update; only cells from
* x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
*/
for (y = 1; y < g_Context.size.height - 1; ++y) {
uint8_t *src0 = (g_Context.cell_in + (y - 1) * g_Context.cell_stride);
uint8_t *src1 = src0 + g_Context.cell_stride;
uint8_t *src2 = src1 + g_Context.cell_stride;
uint8_t *dst = (g_Context.cell_out + y * g_Context.cell_stride) + 1;
uint32_t *pixel_line = (uint32_t*) (pixels + y * desc.stride);
const u8x16_t kOne = broadcast(1);
const u8x16_t kFour = broadcast(4);
const u8x16_t kEight = broadcast(8);
const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
/* Prime the src */
u8x16_t src00 = *(u8x16_t*)&src0[0];
u8x16_t src01 = *(u8x16_t*)&src0[16];
u8x16_t src10 = *(u8x16_t*)&src1[0];
u8x16_t src11 = *(u8x16_t*)&src1[16];
u8x16_t src20 = *(u8x16_t*)&src2[0];
u8x16_t src21 = *(u8x16_t*)&src2[16];
/* This inner loop is SIMD - each loop iteration will process 16 cells. */
for (x = 1; (x + 15) < (g_Context.size.width - 1); x += 16) {
/*
* Construct jittered source temps, using __builtin_shufflevector(..) to
* extract a shifted 16 element vector from the 32 element concatenation
* of two source vectors.
*/
u8x16_t src0j0 = src00;
u8x16_t src0j1 = __builtin_shufflevector(src00, src01,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
u8x16_t src0j2 = __builtin_shufflevector(src00, src01,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
u8x16_t src1j0 = src10;
u8x16_t src1j1 = __builtin_shufflevector(src10, src11,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
u8x16_t src1j2 = __builtin_shufflevector(src10, src11,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
u8x16_t src2j0 = src20;
u8x16_t src2j1 = __builtin_shufflevector(src20, src21,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
u8x16_t src2j2 = __builtin_shufflevector(src20, src21,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
/* Sum the jittered sources to construct neighbor count. */
u8x16_t count = src0j0 + src0j1 + src0j2 +
src1j0 + + src1j2 +
src2j0 + src2j1 + src2j2;
/* Add the center cell. */
count = count + count + src1j1;
/* If count > 4 and < 8, center cell will be alive in the next frame. */
u8x16_t alive1 = count > kFour;
u8x16_t alive2 = count < kEight;
/* Intersect the two comparisons from above. */
u8x16_t alive = alive1 & alive2;
/*
* At this point, alive[x] will be one of two values:
* 0x00 for a dead cell
* 0xFF for an alive cell.
*
* Next, convert alive cells to green pixel color.
* Use __builtin_shufflevector(..) to construct output pixels from
* concantination of alive vector and kZero255 const vector.
* Indices 0..15 select the 16 cells from alive vector.
* Index 16 is zero constant from kZero255 constant vector.
* Index 17 is 255 constant from kZero255 constant vector.
* Output pixel color values are in BGRABGRABGRABGRA order.
* Since each pixel needs 4 bytes of color information, 16 cells will
* need to expand to 4 seperate 16 byte pixel splats.
*/
u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255,
16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255,
16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255,
16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255,
16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
/* Write 16 pixels to output pixel buffer. */
*(u8x16_t*)(pixel_line + 0) = pixel0_3;
*(u8x16_t*)(pixel_line + 4) = pixel4_7;
*(u8x16_t*)(pixel_line + 8) = pixel8_11;
*(u8x16_t*)(pixel_line + 12) = pixel12_15;
/* Convert alive mask to 1 or 0 and store in destination cell array. */
*(u8x16_t*)dst = alive & kOne;
/* Increment pointers. */
pixel_line += 16;
dst += 16;
src0 += 16;
src1 += 16;
src2 += 16;
/* Shift source over by 16 cells and read the next 16 cells. */
src00 = src01;
src01 = *(u8x16_t*)&src0[16];
src10 = src11;
src11 = *(u8x16_t*)&src1[16];
src20 = src21;
src21 = *(u8x16_t*)&src2[16];
}
/*
* The SIMD loop above does 16 cells at a time. The loop below is the
* regular version which processes one cell at a time. It is used to
* finish the remainder of the scanline not handled by the SIMD loop.
*/
for (; x < (g_Context.size.width - 1); ++x) {
/* Sum the jittered sources to construct neighbor count. */
int count = src0[0] + src0[1] + src0[2] +
src1[0] + + src1[2] +
src2[0] + src2[1] + src2[2];
/* Add the center cell. */
count = count + count + src1[1];
/* Use table lookup indexed by count to determine pixel & alive state. */
uint32_t color = kNeighborColors[count];
*pixel_line++ = color;
*dst++ = kIsAlive[count];
++src0;
++src1;
++src2;
}
}
cell_temp = g_Context.cell_in;
g_Context.cell_in = g_Context.cell_out;
g_Context.cell_out = cell_temp;
/* Unmap the range, we no longer need it. */
g_pImageData->Unmap(image);
/* Replace the contexts, and block until it's on the screen. */
g_pGraphics2D->ReplaceContents(g_Context.ctx, image);
g_pGraphics2D->Flush(g_Context.ctx, PP_BlockUntilComplete());
/* Release the image data, we no longer need it. */
g_pCore->ReleaseResource(image);
}
/*
* Starting point for the module. We do not use main since it would
* collide with main in libppapi_cpp.
*/
int example_main(int argc, char *argv[]) {
fprintf(stdout,"Started main.\n");
g_pCore = (PPB_Core*)PSGetInterface(PPB_CORE_INTERFACE);
g_pFullscreen = (PPB_Fullscreen*)PSGetInterface(PPB_FULLSCREEN_INTERFACE);
g_pGraphics2D = (PPB_Graphics2D*)PSGetInterface(PPB_GRAPHICS_2D_INTERFACE);
g_pInstance = (PPB_Instance*)PSGetInterface(PPB_INSTANCE_INTERFACE);
g_pImageData = (PPB_ImageData*)PSGetInterface(PPB_IMAGEDATA_INTERFACE);
g_pView = (PPB_View*)PSGetInterface(PPB_VIEW_INTERFACE);
g_pInputEvent =
(PPB_InputEvent*) PSGetInterface(PPB_INPUT_EVENT_INTERFACE);
g_pKeyboardInput = (PPB_KeyboardInputEvent*)
PSGetInterface(PPB_KEYBOARD_INPUT_EVENT_INTERFACE);
g_pMouseInput =
(PPB_MouseInputEvent*) PSGetInterface(PPB_MOUSE_INPUT_EVENT_INTERFACE);
g_pTouchInput =
(PPB_TouchInputEvent*) PSGetInterface(PPB_TOUCH_INPUT_EVENT_INTERFACE);
PSEventSetFilter(PSE_ALL);
while (1) {
/* Process all waiting events without blocking */
PSEvent* event;
while ((event = PSEventTryAcquire()) != NULL) {
ProcessEvent(event);
PSEventRelease(event);
}
/* Render a frame, blocking until complete. */
if (g_Context.bound) {
Render();
}
}
return 0;
}
/*
* Register the function to call once the Instance Object is initialized.
* see: pappi_simple/ps_main.h
*/
PPAPI_SIMPLE_REGISTER_MAIN(example_main);
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <assert.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <unistd.h>
#include <ppapi/c/ppb_input_event.h>
#include <ppapi/cpp/fullscreen.h>
#include <ppapi/cpp/input_event.h>
#include <ppapi/cpp/var.h>
#include <ppapi/cpp/var_array.h>
#include <ppapi/cpp/var_array_buffer.h>
#include <ppapi/cpp/var_dictionary.h>
#include "ppapi_simple/ps.h"
#include "ppapi_simple/ps_context_2d.h"
#include "ppapi_simple/ps_event.h"
#include "ppapi_simple/ps_instance.h"
#include "ppapi_simple/ps_interface.h"
#include "ppapi_simple/ps_main.h"
#include "sdk_util/macros.h"
#include "sdk_util/thread_pool.h"
using namespace sdk_util; // For sdk_util::ThreadPool
namespace {
#define INLINE inline __attribute__((always_inline))
// BGRA helper macro, for constructing a pixel for a BGRA buffer.
#define MakeBGRA(b, g, r, a) \
(((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
const int kFramesToBenchmark = 100;
const int kCellAlignment = 0x10;
// 128 bit vector types
typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
// Helper function to broadcast x across 16 element vector.
INLINE u8x16_t broadcast(uint8_t x) {
u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
return r;
}
// Convert a count value into a live (green) or dead color value.
const uint32_t kNeighborColors[] = {
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
MakeBGRA(0x00, 0x00, 0x00, 0xFF),
};
// These represent the new health value of a cell based on its neighboring
// values. The health is binary: either alive or dead.
const uint8_t kIsAlive[] = {
0, 0, 0, 0, 0, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0
};
// Timer helper for benchmarking. Returns seconds elapsed since program start,
// as a double.
timeval start_tv;
int start_tv_retv = gettimeofday(&start_tv, NULL);
inline double getseconds() {
const double usec_to_sec = 0.000001;
timeval tv;
if ((0 == start_tv_retv) && (0 == gettimeofday(&tv, NULL)))
return (tv.tv_sec - start_tv.tv_sec) + tv.tv_usec * usec_to_sec;
return 0.0;
}
} // namespace
class Life {
public:
Life();
virtual ~Life();
// Runs a tick of the simulations, update 2D output.
void Update();
// Handle event from user, or message from JS.
void HandleEvent(PSEvent* ps_event);
private:
void UpdateContext();
void DrawCell(int32_t x, int32_t y);
void ProcessTouchEvent(const pp::TouchInputEvent& touches);
void PostUpdateMessage(const char* message, double value);
void StartBenchmark();
void EndBenchmark();
void Stir();
void wSimulate(int y);
static void wSimulateEntry(int y, void* data);
void Simulate();
bool simd_;
bool multithread_;
bool benchmarking_;
int benchmark_frame_counter_;
double bench_start_time_;
double bench_end_time_;
uint8_t* cell_in_;
uint8_t* cell_out_;
int32_t cell_stride_;
int32_t width_;
int32_t height_;
PSContext2D_t* ps_context_;
ThreadPool* workers_;
};
Life::Life() :
simd_(true),
multithread_(true),
benchmarking_(false),
benchmark_frame_counter_(0),
bench_start_time_(0.0),
bench_end_time_(0.0),
cell_in_(NULL),
cell_out_(NULL),
cell_stride_(0),
width_(0),
height_(0) {
ps_context_ = PSContext2DAllocate(PP_IMAGEDATAFORMAT_BGRA_PREMUL);
// Query system for number of processors via sysconf()
int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
if (num_threads < 2)
num_threads = 2;
workers_ = new ThreadPool(num_threads);
PSEventSetFilter(PSE_ALL);
}
Life::~Life() {
delete workers_;
PSContext2DFree(ps_context_);
}
void Life::UpdateContext() {
cell_stride_ = (ps_context_->width + kCellAlignment - 1) &
~(kCellAlignment - 1);
size_t size = cell_stride_ * ps_context_->height;
if (ps_context_->width != width_ || ps_context_->height != height_) {
free(cell_in_);
free(cell_out_);
// Create a new context
void* in_buffer = NULL;
void* out_buffer = NULL;
// alloc buffers aligned on 16 bytes
posix_memalign(&in_buffer, kCellAlignment, size);
posix_memalign(&out_buffer, kCellAlignment, size);
cell_in_ = (uint8_t*) in_buffer;
cell_out_ = (uint8_t*) out_buffer;
memset(cell_out_, 0, size);
for (size_t index = 0; index < size; index++) {
cell_in_[index] = rand() & 1;
}
width_ = ps_context_->width;
height_ = ps_context_->height;
}
}
void Life::DrawCell(int32_t x, int32_t y) {
if (!cell_in_) return;
if (x > 0 && x < ps_context_->width - 1 &&
y > 0 && y < ps_context_->height - 1) {
cell_in_[x - 1 + y * cell_stride_] = 1;
cell_in_[x + 1 + y * cell_stride_] = 1;
cell_in_[x + (y - 1) * cell_stride_] = 1;
cell_in_[x + (y + 1) * cell_stride_] = 1;
}
}
void Life::ProcessTouchEvent(const pp::TouchInputEvent& touches) {
uint32_t count = touches.GetTouchCount(PP_TOUCHLIST_TYPE_TOUCHES);
uint32_t i, j;
for (i = 0; i < count; i++) {
pp::TouchPoint touch =
touches.GetTouchByIndex(PP_TOUCHLIST_TYPE_TOUCHES, i);
int radius = (int)(touch.radii().x());
int x = (int)(touch.position().x());
int y = (int)(touch.position().y());
// num = 1/100th the area of touch point
uint32_t num = (uint32_t)(M_PI * radius * radius / 100.0f);
for (j = 0; j < num; j++) {
int dx = rand() % (radius * 2) - radius;
int dy = rand() % (radius * 2) - radius;
// only plot random cells within the touch area
if (dx * dx + dy * dy <= radius * radius)
DrawCell(x + dx, y + dy);
}
}
}
void Life::PostUpdateMessage(const char* message_name, double value) {
pp::VarDictionary message;
message.Set("message", message_name);
message.Set("value", value);
PSInterfaceMessaging()->PostMessage(PSGetInstanceId(), message.pp_var());
}
void Life::StartBenchmark() {
printf("Running benchmark... (SIMD: %s, multi-threading: %s, size: %dx%d)\n",
simd_ ? "enabled" : "disabled",
multithread_ ? "enabled" : "disabled",
ps_context_->width,
ps_context_->height);
benchmarking_ = true;
bench_start_time_ = getseconds();
benchmark_frame_counter_ = kFramesToBenchmark;
}
void Life::EndBenchmark() {
double total_time;
bench_end_time_ = getseconds();
benchmarking_ = false;
total_time = bench_end_time_ - bench_start_time_;
printf("Finished - benchmark took %f seconds\n", total_time);
// Send benchmark result to JS.
PostUpdateMessage("benchmark_result", total_time);
}
void Life::HandleEvent(PSEvent* ps_event) {
// Give the 2D context a chance to process the event.
if (0 != PSContext2DHandleEvent(ps_context_, ps_event)) {
UpdateContext();
return;
}
switch(ps_event->type) {
case PSE_INSTANCE_HANDLEINPUT: {
pp::InputEvent event(ps_event->as_resource);
switch(event.GetType()) {
case PP_INPUTEVENT_TYPE_MOUSEDOWN:
case PP_INPUTEVENT_TYPE_MOUSEMOVE: {
pp::MouseInputEvent mouse = pp::MouseInputEvent(event);
// If the button is down, draw
if (mouse.GetModifiers() & PP_INPUTEVENT_MODIFIER_LEFTBUTTONDOWN) {
PP_Point location = mouse.GetPosition();
DrawCell(location.x, location.y);
}
break;
}
case PP_INPUTEVENT_TYPE_TOUCHSTART:
case PP_INPUTEVENT_TYPE_TOUCHMOVE: {
pp::TouchInputEvent touches = pp::TouchInputEvent(event);
ProcessTouchEvent(touches);
break;
}
case PP_INPUTEVENT_TYPE_KEYDOWN: {
pp::Fullscreen fullscreen(PSInstance::GetInstance());
bool isFullscreen = fullscreen.IsFullscreen();
fullscreen.SetFullscreen(!isFullscreen);
break;
}
default:
break;
}
break; // case PSE_INSTANCE_HANDLEINPUT
}
case PSE_INSTANCE_HANDLEMESSAGE: {
// Convert Pepper Simple message to PPAPI C++ vars
pp::Var var(ps_event->as_var);
if (var.is_dictionary()) {
pp::VarDictionary dictionary(var);
std::string message = dictionary.Get("message").AsString();
if (message == "run_benchmark" && !benchmarking_) {
StartBenchmark();
} else if (message == "set_simd") {
simd_ = dictionary.Get("value").AsBool();
} else if (message == "set_threading") {
multithread_ = dictionary.Get("value").AsBool();
}
}
break; // case PSE_INSTANCE_HANDLEMESSAGE
}
default:
break;
}
}
void Life::Stir() {
int32_t width = ps_context_->width;
int32_t height = ps_context_->height;
int32_t stride = cell_stride_;
int32_t i;
if (cell_in_ == NULL || cell_out_ == NULL)
return;
for (i = 0; i < width; ++i) {
cell_in_[i] = rand() & 1;
cell_in_[i + (height - 1) * stride] = rand() & 1;
}
for (i = 0; i < height; ++i) {
cell_in_[i * stride] = rand() & 1;
cell_in_[i * stride + (width - 1)] = rand() & 1;
}
}
void Life::wSimulate(int y) {
// Don't run simulation on top and bottom borders
if (y < 1 || y >= ps_context_->height - 1)
return;
// Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
// wide perimeter is excluded from the simulation update; only cells from
// x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
uint8_t *src0 = (cell_in_ + (y - 1) * cell_stride_);
uint8_t *src1 = src0 + cell_stride_;
uint8_t *src2 = src1 + cell_stride_;
uint8_t *dst = (cell_out_ + y * cell_stride_) + 1;
uint32_t *pixels = static_cast<uint32_t *>(ps_context_->data);
uint32_t *pixel_line = // static_cast<uint32_t*>
(pixels + y * ps_context_->stride / sizeof(uint32_t));
int32_t x = 1;
if (simd_) {
const u8x16_t kOne = broadcast(1);
const u8x16_t kFour = broadcast(4);
const u8x16_t kEight = broadcast(8);
const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
// Prime the src
u8x16_t src00 = *reinterpret_cast<u8x16_t*>(&src0[0]);
u8x16_t src01 = *reinterpret_cast<u8x16_t*>(&src0[16]);
u8x16_t src10 = *reinterpret_cast<u8x16_t*>(&src1[0]);
u8x16_t src11 = *reinterpret_cast<u8x16_t*>(&src1[16]);
u8x16_t src20 = *reinterpret_cast<u8x16_t*>(&src2[0]);
u8x16_t src21 = *reinterpret_cast<u8x16_t*>(&src2[16]);
// This inner loop is SIMD - each loop iteration will process 16 cells.
for (; (x + 15) < (ps_context_->width - 1); x += 16) {
// Construct jittered source temps, using __builtin_shufflevector(..) to
// extract a shifted 16 element vector from the 32 element concatenation
// of two source vectors.
u8x16_t src0j0 = src00;
u8x16_t src0j1 = __builtin_shufflevector(src00, src01,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
u8x16_t src0j2 = __builtin_shufflevector(src00, src01,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
u8x16_t src1j0 = src10;
u8x16_t src1j1 = __builtin_shufflevector(src10, src11,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
u8x16_t src1j2 = __builtin_shufflevector(src10, src11,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
u8x16_t src2j0 = src20;
u8x16_t src2j1 = __builtin_shufflevector(src20, src21,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
u8x16_t src2j2 = __builtin_shufflevector(src20, src21,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
// Sum the jittered sources to construct neighbor count.
u8x16_t count = src0j0 + src0j1 + src0j2 +
src1j0 + + src1j2 +
src2j0 + src2j1 + src2j2;
// Add the center cell.
count = count + count + src1j1;
// If count > 4 and < 8, center cell will be alive in the next frame.
u8x16_t alive1 = count > kFour;
u8x16_t alive2 = count < kEight;
// Intersect the two comparisons from above.
u8x16_t alive = alive1 & alive2;
// At this point, alive[x] will be one of two values:
// 0x00 for a dead cell
// 0xFF for an alive cell.
//
// Next, convert alive cells to green pixel color.
// Use __builtin_shufflevector(..) to construct output pixels from
// concantination of alive vector and kZero255 const vector.
// Indices 0..15 select the 16 cells from alive vector.
// Index 16 is zero constant from kZero255 constant vector.
// Index 17 is 255 constant from kZero255 constant vector.
// Output pixel color values are in BGRABGRABGRABGRA order.
// Since each pixel needs 4 bytes of color information, 16 cells will
// need to expand to 4 seperate 16 byte pixel splats.
u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255,
16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255,
16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255,
16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255,
16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
// Write 16 pixels to output pixel buffer.
*reinterpret_cast<u8x16_t*>(pixel_line + 0) = pixel0_3;
*reinterpret_cast<u8x16_t*>(pixel_line + 4) = pixel4_7;
*reinterpret_cast<u8x16_t*>(pixel_line + 8) = pixel8_11;
*reinterpret_cast<u8x16_t*>(pixel_line + 12) = pixel12_15;
// Convert alive mask to 1 or 0 and store in destination cell array.
*reinterpret_cast<u8x16_t*>(dst) = alive & kOne;
// Increment pointers.
pixel_line += 16;
dst += 16;
src0 += 16;
src1 += 16;
src2 += 16;
// Shift source over by 16 cells and read the next 16 cells.
src00 = src01;
src01 = *reinterpret_cast<u8x16_t*>(&src0[16]);
src10 = src11;
src11 = *reinterpret_cast<u8x16_t*>(&src1[16]);
src20 = src21;
src21 = *reinterpret_cast<u8x16_t*>(&src2[16]);
}
}
// The SIMD loop above does 16 cells at a time. The loop below is the
// regular version which processes one cell at a time. It is used to
// finish the remainder of the scanline not handled by the SIMD loop.
for (; x < (ps_context_->width - 1); ++x) {
// Sum the jittered sources to construct neighbor count.
int count = src0[0] + src0[1] + src0[2] +
src1[0] + + src1[2] +
src2[0] + src2[1] + src2[2];
// Add the center cell.
count = count + count + src1[1];
// Use table lookup indexed by count to determine pixel & alive state.
uint32_t color = kNeighborColors[count];
*pixel_line++ = color;
*dst++ = kIsAlive[count];
++src0;
++src1;
++src2;
}
}
// Static entry point for worker thread.
void Life::wSimulateEntry(int slice, void* thiz) {
static_cast<Life*>(thiz)->wSimulate(slice);
}
void Life::Simulate() {
// Stir up the edges to prevent the simulation from reaching steady state.
Stir();
if (multithread_) {
// If multi-threading enabled, dispatch tasks to pool of worker threads.
workers_->Dispatch(ps_context_->height, wSimulateEntry, this);
} else {
// Else manually simulate each line on this thread.
for (int y = 0; y < ps_context_->height; y++) {
wSimulateEntry(y, this);
}
}
std::swap(cell_in_, cell_out_);
}
void Life::Update() {
PSContext2DGetBuffer(ps_context_);
if (NULL == ps_context_->data)
return;
// If we somehow have not allocated these pointers yet, skip this frame.
if (!cell_in_ || !cell_out_) return;
// Simulate one (or more if benchmarking) frames
do {
Simulate();
if (!benchmarking_)
break;
--benchmark_frame_counter_;
} while(benchmark_frame_counter_ > 0);
if (benchmarking_)
EndBenchmark();
PSContext2DSwapBuffer(ps_context_);
}
// Starting point for the module. We do not use main since it would
// collide with main in libppapi_cpp.
int example_main(int argc, char* argv[]) {
Life life;
while (true) {
PSEvent* ps_event;
// Consume all available events
while ((ps_event = PSEventTryAcquire()) != NULL) {
life.HandleEvent(ps_event);
PSEventRelease(ps_event);
}
// Do simulation, render and present.
life.Update();
}
return 0;
}
// Register the function to call once the Instance Object is initialized.
// see: pappi_simple/ps_main.h
PPAPI_SIMPLE_REGISTER_MAIN(example_main);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment