So based on the discussion above + some other poking around, I put together the following example:
import * as THREE from 'three';
import { TrackballControls } from 'three/addons/controls/TrackballControls.js';
// Basic scene setup
const scene = new THREE.Scene();
const camera = new THREE.PerspectiveCamera(75, window.innerWidth / window.innerHeight, 0.1, 1000);
camera.position.z = 5;
const renderer = new THREE.WebGLRenderer({ preserveDrawingBuffer: true });
renderer.setSize(window.innerWidth, window.innerHeight);
document.body.appendChild(renderer.domElement);
const controls = new TrackballControls(camera, renderer.domElement);
controls.rotateSpeed = 5.0;
controls.zoomSpeed = 1.2;
controls.panSpeed = 0.8;
controls.noZoom = false;
controls.noPan = false;
controls.staticMoving = true;
controls.dynamicDampingFactor = 0.3;
const planeGeometries = [];
const planeMaterials = [];
const planeMeshes = [];
const NUM_PLANES = 10; // Number of plane geometries
// Create plane geometries
for (let i = 0; i < NUM_PLANES; i++) {
const geometry = new THREE.PlaneGeometry(1, 1);
const material = new THREE.MeshBasicMaterial({ color: 0xffffff });
const plane = new THREE.Mesh(geometry, material);
plane.position.set(5 - i, 5 - i, 5 - i);
scene.add(plane);
planeGeometries.push(geometry);
planeMaterials.push(material);
planeMeshes.push(plane);
}
const pickingScene = new THREE.Scene();
const pickingTexture = new THREE.WebGLRenderTarget(NUM_PLANES, 1);
const pickingMaterial = new THREE.ShaderMaterial({
// Vertex shader
vertexShader: `
void main() {
gl_Position = projectionMatrix * modelViewMatrix * vec4(position, 1.0);
}
`,
// Fragment shader
fragmentShader: `
uniform mat4 visibleCameraViewMatrix;
uniform mat4 visibleCameraProjectionMatrix;
uniform vec3 planes[${NUM_PLANES}];
uniform vec2 mouse;
uniform vec2 resolution;
// Function to convert world coordinates to screen coordinates
vec2 worldToScreen(vec3 worldPosition) {
vec4 clipSpacePosition = visibleCameraProjectionMatrix * visibleCameraViewMatrix * vec4(worldPosition, 1.0);
vec3 ndc = clipSpacePosition.xyz / clipSpacePosition.w; // Normalized Device Coordinates
vec2 screenPosition = vec2(ndc.x, -ndc.y) * 0.5 + 0.5;
return screenPosition * resolution;
}
void main() {
// Calculate the index of the plane
int planeIndex = int(gl_FragCoord.x);
vec3 planePosition = planes[planeIndex];
// Convert plane world position to screen position
vec2 screenPos = worldToScreen(planePosition);
// Check if the mouse is over the plane
float radius = 10.0; // Radius for picking
// Convert from webgl coordinates (center is 0, 0) to pixel
// coordinates (top left is 0, 0, scaled by resolution)
vec2 mousePos = vec2((mouse.x + 1.0) / 2.0 * resolution.x, (1.0 - (mouse.y + 1.0) / 2.0) * resolution.y);
if (distance(screenPos, mousePos) < radius) {
gl_FragColor = vec4(1.0, 0.0, 0.0, 1.0); // Red if under mouse
} else {
gl_FragColor = vec4(0.0, 0.0, 1.0, 1.0); // Blue otherwise
}
}
`,
uniforms: {
visibleCameraViewMatrix: { value: camera.matrixWorldInverse },
visibleCameraProjectionMatrix: { value: camera.projectionMatrix },
planes: {
value: planeMeshes.map((mesh) => [mesh.position.x, mesh.position.y, mesh.position.z]).flat(),
},
mouse: { value: new THREE.Vector2() },
resolution: { value: new THREE.Vector2(window.innerWidth, window.innerHeight) },
},
});
const pickingPlane = new THREE.PlaneGeometry(NUM_PLANES, 1);
const pickingMesh = new THREE.Mesh(pickingPlane, pickingMaterial);
pickingScene.add(pickingMesh);
const aspect = NUM_PLANES / 1; // Width divided by height
const frustumHeight = 1; // Or any other value that matches your scene setup
const pickingCamera = new THREE.OrthographicCamera(
(-aspect * frustumHeight) / 2,
(aspect * frustumHeight) / 2,
frustumHeight / 2,
-frustumHeight / 2,
0,
10,
);
pickingCamera.position.z = 5;
document.addEventListener('mousemove', function (event) {
// Convert mouse position (top left is 0, 0) to webgl coordinates
// (center is 0, 0) scaled as a proportion of the resolution
pickingMaterial.uniforms.mouse.value.x = (event.clientX / window.innerWidth) * 2 - 1;
pickingMaterial.uniforms.mouse.value.y = -(event.clientY / window.innerHeight) * 2 + 1;
console.log(pickingMaterial.uniforms.mouse.value);
});
let renderPicking = false;
const alternatePicking = () => {
setTimeout(() => {
renderPicking = !renderPicking;
alternatePicking();
}, 1000);
};
// alternatePicking();
function render() {
requestAnimationFrame(render);
controls.update();
// Render picking scene
renderer.setRenderTarget(pickingTexture);
renderer.setClearColor(new THREE.Color('green'), 1);
renderer.clear();
renderer.render(pickingScene, pickingCamera);
// Read pixel data
const pixelBuffer = new Uint8Array(4 * NUM_PLANES);
renderer.readRenderTargetPixels(pickingTexture, 0, 0, NUM_PLANES, 1, pixelBuffer);
for (let i = 0; i < NUM_PLANES; i++) {
if (pixelBuffer[i * 4] === 255) {
// Red channel
planeMaterials[i].color.set(0xff0000); // Highlight if under mouse
} else {
planeMaterials[i].color.set(0xffffff); // Default color
}
}
// Render main scene
renderer.setRenderTarget(null);
if (renderPicking) {
renderer.render(pickingScene, pickingCamera);
} else {
renderer.render(scene, camera);
}
}
render();
The example implements GPU picking in such a way that multiple geometries can register as being under the mouse cursor. It does this by evaluating a separate 1xN pixel array, where each pixel represents whether or not the given geometry is near the mouse on screen. The positions for the actual geometries are passed in as uniforms, it calculates in parallel whether the given geometries are near the mouse, and change color on the hittest accordingly. Right now it only gives a binary value as to whether the data is near the mouse, but it could easily return a distance that can then be used on the CPU side to figure out the top-most hit.
All that said, this seems rather poor for hit testing. Since I’m using planar geometries, picking on corners is really unreliable/nonexistent without making the radius fairly large, which in turn can result in false positives.
Also seems somewhat computationally inefficient, and this is before we add more logic to do things like lasso selection.
Having implemented this now, it feels like a mistake to move away from doing a constant look up with a single pixel buffer that is directly under the mouse (always O(1)) to a lookup that requires evaluating every single instance in the instanced mesh (which is O(N)). Still, maybe I’m wrong and this is fine?
Curious to hear other thoughts