比较实例与合并drawcall和本机之间的FPS [英] Comparing FPS between instance and merge drawcall and native

查看:213
本文介绍了比较实例与合并drawcall和本机之间的FPS的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在使用 Intel(R)Iris(R)Plus Graphics 655 卡在笔记本电脑上测试FPS.要使用实例渲染和merge-drawcall渲染测试threeJS示例.

I am testing the FPS with my laptop using the Intel(R) Iris(R) Plus Graphics 655 card. To test the threeJS example with Instance rendering and merge-drawcall rendering.

因此,我同时使用了QRCode_buffergeometry.json模型和suzanne_buffergeometry.json模型.用于QRCode_buffergeometry.json:顶点:12852,面:4284对于suzanne_buffergeometry.json:顶点:1515面:967

So I used both the QRCode_buffergeometry.json model and the suzanne_buffergeometry.json model. for the QRCode_buffergeometry.json: vertex:12852, face: 4284 and for the suzanne_buffergeometry.json: vertex:1515 face: 967

然后以 8000 计数的 suzanne_buffergeometry 的FPS:

Then the FPS for the suzanne_buffergeometry with 8000 count:

INSTANCE:36

INSTANCE: 36

合并:43

自然:旋转时从23到35

NATIVE: from 23 to 35 by rotation

对于具有 8000 个计数的 QRCode_buffergeometry 模型:

for the QRCode_buffergeometry model with 8000 count:

实例:9

合并:15-17

本机:17-19

我对此表演感到非常困惑.1.据我了解,无论我使用实例还是merge-drawcall,drawcall都固定为1,并且绘制的总面数相同,为什么merged-drawcall比实例好?由于面和顶点数都相同,所以我想在顶点着色器中进行顶点变换时也应该相同,那么为什么合并的速度更快?

I am very confused with this performance. 1. As far as my understanding, with no matter if i use instance or merge-drawcall, the drawcall is fixed to be 1 and the total face number to draw is same, why merged-drawcall is better than instance? Since the face and vertex number are both same, I suppose what happened in the vertex shader for transform the vertex should be same too, so why merged is faster?

  1. 对于QRCode_buffergeometry模型,native与合并几乎相同,并且比实例更好,因此我猜CPU不是瓶颈,而GPU是瓶颈,但是最终图形数据应该是相同的,我的意思是最终绘制的面孔编号应该相同,为什么本机要更快?是不是该实例应该是最好的方法?我很确定相机的距离足够远,因此应该没有任何剔除问题.

  1. For the QRCode_buffergeometry model, native is almost same as merged, and better than instance, so I guess the CPU is not the bottle neck but the GPU is, however the final drawing data should be same, i mean eventually the face number to be draw should be same, why native is faster?, isn't that the instance is supposed to be the best way? I am pretty sure the camera's far and near is big enough, so there should not be any culling issue.

当我尝试优化某个大型场景时,应该何时选择合并?何时选择实例?也许什么都不做会更好?

When I am trying to optimize some big scene, when should I pick merge? when to pick instance? and when maybe no doing anything is better?

有帮助吗?

非常感谢~~~

示例代码附在这里

body { margin: 0; }

<div id="container"></div>
<script type="module">
import * as THREE from 'https://cdn.jsdelivr.net/npm/three@0.112.1/build/three.module.js';
import Stats from 'https://cdn.jsdelivr.net/npm/three@0.112.1/examples/jsm/libs/stats.module.js';
import {
  GUI
} from 'https://cdn.jsdelivr.net/npm/three@0.112.1/examples/jsm/libs/dat.gui.module.js';
import {
  OrbitControls
} from 'https://cdn.jsdelivr.net/npm/three@0.112.1/examples/jsm/controls/OrbitControls.js';
import {
  BufferGeometryUtils
} from 'https://cdn.jsdelivr.net/npm/three@0.112.1/examples/jsm/utils/BufferGeometryUtils.js';
var container, stats, gui, guiStatsEl;
var camera, controls, scene, renderer, material;

// gui
var Method = {
  INSTANCED: 'INSTANCED',
  MERGED: 'MERGED',
  NAIVE: 'NAIVE'
};

var api = {
  method: Method.INSTANCED,
  mesh_number: 1,
  count_per_mesh: 1000
};

var modelName = 'suzanne_buffergeometry.json';
var modelScale = (modelName === 'suzanne_buffergeometry.json' ? 1 : 0.01);
var modelVertex = (modelName === 'suzanne_buffergeometry.json' ? 1515 : 12852);
var modelFace = (modelName === 'suzanne_buffergeometry.json' ? 967 : 4284);

//
init();
initMesh();
animate();

//
function clean() {
  var meshes = [];
  scene.traverse(function(object) {
    if (object.isMesh) meshes.push(object);
  });

  for (var i = 0; i < meshes.length; i++) {
    var mesh = meshes[i];
    mesh.material.dispose();
    mesh.geometry.dispose();
    scene.remove(mesh);
  }
}

var randomizeMatrix = function() {
  var position = new THREE.Vector3();
  var rotation = new THREE.Euler();
  var quaternion = new THREE.Quaternion();
  var scale = new THREE.Vector3();

  return function(matrix) {
    position.x = Math.random() * 40 - 20;
    position.y = Math.random() * 40 - 20;
    position.z = Math.random() * 40 - 20;
    rotation.x = Math.random() * 2 * Math.PI;
    rotation.y = Math.random() * 2 * Math.PI;
    rotation.z = Math.random() * 2 * Math.PI;
    quaternion.setFromEuler(rotation);
    scale.x = scale.y = scale.z = Math.random() * modelScale;
    matrix.compose(position, quaternion, scale);
  };
}();

function initMesh() {
  clean();

  console.time(api.method + ' (build)');
  for (var i = 0; i < api.mesh_number; i++) {
    // make instances
    new THREE.BufferGeometryLoader()
      .setPath('https://threejs.org/examples/models/json/')
      .load(modelName, function(geometry) {
        material = new THREE.MeshNormalMaterial();
        geometry.computeVertexNormals();

        switch (api.method) {
          case Method.INSTANCED:
            makeInstanced(geometry);
            break;
          case Method.MERGED:
            makeMerged(geometry);
            break;
          case Method.NAIVE:
            makeNaive(geometry);
            break;
        }
      });
  }
  console.timeEnd(api.method + ' (build)');
  var drawCalls = 0;
  switch (api.method) {
    case Method.INSTANCED:
    case Method.MERGED:
      drawCalls = api.mesh_number;
      break;
    case Method.NAIVE:
      drawCalls = api.mesh_number * api.count_per_mesh;
      break;
  }
  guiStatsEl.innerHTML = [
    '<i>GPU draw calls</i>: ' + drawCalls,
    '<i>Face Number</i>: ' + (modelFace * api.mesh_number * api.count_per_mesh),
    '<i>Vertex Number</i>: ' + (modelVertex * api.mesh_number * api.count_per_mesh)
  ].join('<br/>');
}

function makeInstanced(geometry, idx) {
  var matrix = new THREE.Matrix4();
  var mesh = new THREE.InstancedMesh(geometry, material, api.count_per_mesh);

  for (var i = 0; i < api.count_per_mesh; i++) {
    randomizeMatrix(matrix);
    mesh.setMatrixAt(i, matrix);
  }
  scene.add(mesh);
}

function makeMerged(geometry, idx) {
  var instanceGeometry;
  var geometries = [];
  var matrix = new THREE.Matrix4();
  for (var i = 0; i < api.count_per_mesh; i++) {
    randomizeMatrix(matrix);
    var instanceGeometry = geometry.clone();
    instanceGeometry.applyMatrix(matrix);
    geometries.push(instanceGeometry);
  }

  var mergedGeometry = BufferGeometryUtils.mergeBufferGeometries(geometries);
  scene.add(new THREE.Mesh(mergedGeometry, material));
}

function makeNaive(geometry, idx) {
  var matrix = new THREE.Matrix4();
  for (var i = 0; i < api.count_per_mesh; i++) {
    randomizeMatrix(matrix);
    var mesh = new THREE.Mesh(geometry, material);
    mesh.applyMatrix(matrix);
    scene.add(mesh);
  }
}

function init() {
  var width = window.innerWidth;
  var height = window.innerHeight;

  // camera
  camera = new THREE.PerspectiveCamera(70, width / height, 1, 100);
  camera.position.z = 30;

  // renderer
  renderer = new THREE.WebGLRenderer({
    antialias: true
  });
  renderer.setPixelRatio(window.devicePixelRatio);
  renderer.setSize(width, height);
  renderer.outputEncoding = THREE.sRGBEncoding;
  container = document.getElementById('container');
  container.appendChild(renderer.domElement);

  // scene
  scene = new THREE.Scene();
  scene.background = new THREE.Color(0xffffff);

  // controls
  controls = new OrbitControls(camera, renderer.domElement);
  controls.autoRotate = true;

  // stats
  stats = new Stats();
  container.appendChild(stats.dom);

  // gui
  gui = new GUI();
  gui.add(api, 'method', Method).onChange(initMesh);
  gui.add(api, 'count_per_mesh', 1, 20000).step(1).onChange(initMesh);
  gui.add(api, 'mesh_number', 1, 200).step(1).onChange(initMesh);
  var perfFolder = gui.addFolder('Performance');
  guiStatsEl = document.createElement('li');
  guiStatsEl.classList.add('gui-stats');
  perfFolder.__ul.appendChild(guiStatsEl);
  perfFolder.open();
  // listeners
  window.addEventListener('resize', onWindowResize, false);
  Object.assign(window, {
    scene
  });
}

//
function onWindowResize() {
  var width = window.innerWidth;
  var height = window.innerHeight;
  camera.aspect = width / height;
  camera.updateProjectionMatrix();
  renderer.setSize(width, height);
}

function animate() {
  requestAnimationFrame(animate);
  controls.update();
  stats.update();
  render();
}

function render() {
  renderer.render(scene, camera);
}

//
function getGeometryByteLength(geometry) {
  var total = 0;
  if (geometry.index) total += geometry.index.array.byteLength;
  for (var name in geometry.attributes) {
    total += geometry.attributes[name].array.byteLength;
  }
  return total;
}
// Source: https://stackoverflow.com/a/18650828/1314762
function formatBytes(bytes, decimals) {
  if (bytes === 0) return '0 bytes';
  var k = 1024;
  var dm = decimals < 0 ? 0 : decimals;
  var sizes = ['bytes', 'KB', 'MB'];
  var i = Math.floor(Math.log(bytes) / Math.log(k));
  return parseFloat((bytes / Math.pow(k, i)).toFixed(dm)) + ' ' + sizes[i];
}
</script>

推荐答案

这只是猜测

  1. 如果事物位于平截头体之外,默认情况下,Three.js会被剔除.

  1. Three.js by default culls if things are outside the frustum.

我们可以使用 mesh.frustumCulled = false 将其关闭.我没注意到有什么不同,这应该出现在抽奖次数中.

We can turn this off with mesh.frustumCulled = false. I didn't notice a difference and this should show up in the draw count.

默认情况下,Three.js将不透明的对象排回到最前面.

Three.js by default sorts opaque objects back to front.

这意味着其他所有条件都一样,排序将运行得更快比未排序的原因在于深度测试.如果我设置深度测试总是

This means everything else being equal, sorted will run faster than unsorted because of the depth test. If I set the depth test to always

material.depthFunc = THREE.AlwaysDepth

然后,我似乎在实例化与本机化之间渲染速度稍快.当然其他一切都不平等.

Then I seem to get slightly faster rendering with instanced vs native. Of course everything else is not equal.

Chrome中的问题.

An issue in Chrome.

如果我在Firefox或Safari中运行,我会得到预期的结果.合并>实例>本机

If I run in Firefox or Safari I get the expected results. Merged > Instanced > Native

可能是错误,也可能是他们在驱动程序附近工作,或者其他浏览器没有的安全性问题.您必须.

It could be a bug or it could be they're working around a driver or security issue that the other browsers are not. You'd have to ask.

这篇关于比较实例与合并drawcall和本机之间的FPS的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆