add browser hand tracking source

This commit is contained in:
Tom Boullay
2026-05-06 23:23:04 +01:00
parent d7dd76a853
commit 5824ae162a
6 changed files with 318 additions and 3 deletions
+10 -3
View File
@@ -8,6 +8,7 @@
"name": "la-fabrik", "name": "la-fabrik",
"version": "0.0.1", "version": "0.0.1",
"dependencies": { "dependencies": {
"@mediapipe/tasks-vision": "^0.10.35",
"@react-three/drei": "^10.7.7", "@react-three/drei": "^10.7.7",
"@react-three/fiber": "^9.6.1", "@react-three/fiber": "^9.6.1",
"@react-three/rapier": "^2.2.0", "@react-three/rapier": "^2.2.0",
@@ -601,9 +602,9 @@
} }
}, },
"node_modules/@mediapipe/tasks-vision": { "node_modules/@mediapipe/tasks-vision": {
"version": "0.10.17", "version": "0.10.35",
"resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.17.tgz", "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.35.tgz",
"integrity": "sha512-CZWV/q6TTe8ta61cZXjfnnHsfWIdFhms03M9T7Cnd5y2mdpylJM0rF1qRq+wsQVRMLz1OYPVEBU9ph2Bx8cxrg==", "integrity": "sha512-HOvadwVRE6JC+45nyYhmnywnr5h/J8KZvOeUNVOG9q/0875pZgItznFB9bRTvLc264YSJqiZ1NsIpCStJw/egg==",
"license": "Apache-2.0" "license": "Apache-2.0"
}, },
"node_modules/@monogrid/gainmap-js": { "node_modules/@monogrid/gainmap-js": {
@@ -709,6 +710,12 @@
} }
} }
}, },
"node_modules/@react-three/drei/node_modules/@mediapipe/tasks-vision": {
"version": "0.10.17",
"resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.17.tgz",
"integrity": "sha512-CZWV/q6TTe8ta61cZXjfnnHsfWIdFhms03M9T7Cnd5y2mdpylJM0rF1qRq+wsQVRMLz1OYPVEBU9ph2Bx8cxrg==",
"license": "Apache-2.0"
},
"node_modules/@react-three/fiber": { "node_modules/@react-three/fiber": {
"version": "9.6.1", "version": "9.6.1",
"resolved": "https://registry.npmjs.org/@react-three/fiber/-/fiber-9.6.1.tgz", "resolved": "https://registry.npmjs.org/@react-three/fiber/-/fiber-9.6.1.tgz",
+1
View File
@@ -17,6 +17,7 @@
"typecheck": "tsc -b" "typecheck": "tsc -b"
}, },
"dependencies": { "dependencies": {
"@mediapipe/tasks-vision": "^0.10.35",
"@react-three/drei": "^10.7.7", "@react-three/drei": "^10.7.7",
"@react-three/fiber": "^9.6.1", "@react-three/fiber": "^9.6.1",
"@react-three/rapier": "^2.2.0", "@react-three/rapier": "^2.2.0",
+4
View File
@@ -7,6 +7,10 @@ export const HAND_TRACKING_TARGET_FPS = 10;
export const HAND_TRACKING_JPEG_QUALITY = 0.55; export const HAND_TRACKING_JPEG_QUALITY = 0.55;
export const HAND_TRACKING_CAMERA_TIMEOUT_MS = 8_000; export const HAND_TRACKING_CAMERA_TIMEOUT_MS = 8_000;
export const HAND_TRACKING_RESPONSE_TIMEOUT_MS = 1_500; export const HAND_TRACKING_RESPONSE_TIMEOUT_MS = 1_500;
export const HAND_TRACKING_BROWSER_WASM_URL =
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.35/wasm";
export const HAND_TRACKING_BROWSER_MODEL_URL =
"https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task";
export function getHandTrackingWsUrl(): string { export function getHandTrackingWsUrl(): string {
const configuredUrl = import.meta.env.VITE_HAND_TRACKING_WS_URL; const configuredUrl = import.meta.env.VITE_HAND_TRACKING_WS_URL;
@@ -0,0 +1,184 @@
import { useEffect, useRef, useState } from "react";
import {
HAND_TRACKING_CAMERA_TIMEOUT_MS,
HAND_TRACKING_FRAME_HEIGHT,
HAND_TRACKING_FRAME_WIDTH,
HAND_TRACKING_TARGET_FPS,
} from "@/data/handTrackingConfig";
import {
convertBrowserHandResult,
getBrowserHandLandmarker,
} from "@/lib/handTracking/browserHandTracking";
import type { HandTrackingSnapshot } from "@/types/handTracking/handTracking";
interface UseBrowserHandTrackingOptions {
enabled: boolean;
}
const INITIAL_SNAPSHOT: HandTrackingSnapshot = {
hands: [],
status: "idle",
usageStatus: "inactive",
serverStatus: null,
error: null,
};
function getCameraStreamWithTimeout(
constraints: MediaStreamConstraints,
): Promise<MediaStream> {
let didTimeout = false;
const streamPromise = navigator.mediaDevices.getUserMedia(constraints);
const timeoutPromise = new Promise<never>((_, reject) => {
window.setTimeout(() => {
didTimeout = true;
reject(
new Error(
"Camera request timed out. Restart Arc or check camera permissions for localhost:5173.",
),
);
}, HAND_TRACKING_CAMERA_TIMEOUT_MS);
});
streamPromise.then((stream) => {
if (didTimeout) {
stream.getTracks().forEach((track) => track.stop());
}
});
return Promise.race([streamPromise, timeoutPromise]);
}
export function useBrowserHandTracking({
enabled,
}: UseBrowserHandTrackingOptions): HandTrackingSnapshot {
const [snapshot, setSnapshot] =
useState<HandTrackingSnapshot>(INITIAL_SNAPSHOT);
const videoRef = useRef<HTMLVideoElement | null>(null);
const streamRef = useRef<MediaStream | null>(null);
const intervalRef = useRef<number | null>(null);
useEffect(() => {
if (!enabled) {
return undefined;
}
let cancelled = false;
const cleanup = (): void => {
if (intervalRef.current !== null) {
window.clearInterval(intervalRef.current);
intervalRef.current = null;
}
streamRef.current?.getTracks().forEach((track) => track.stop());
streamRef.current = null;
videoRef.current = null;
};
const start = async (): Promise<void> => {
setSnapshot({
hands: [],
status: "requesting_camera",
usageStatus: "available",
serverStatus: "Browser JS",
error: null,
});
try {
const stream = await getCameraStreamWithTimeout({
video: {
width: HAND_TRACKING_FRAME_WIDTH,
height: HAND_TRACKING_FRAME_HEIGHT,
facingMode: "user",
},
audio: false,
});
if (cancelled) {
stream.getTracks().forEach((track) => track.stop());
return;
}
setSnapshot((current) => ({
...current,
status: "starting_camera",
}));
const video = document.createElement("video");
video.muted = true;
video.playsInline = true;
video.srcObject = stream;
await video.play();
if (cancelled) {
stream.getTracks().forEach((track) => track.stop());
return;
}
setSnapshot((current) => ({
...current,
status: "connecting",
serverStatus: "Loading Browser JS model",
}));
const handLandmarker = await getBrowserHandLandmarker();
if (cancelled) {
stream.getTracks().forEach((track) => track.stop());
return;
}
streamRef.current = stream;
videoRef.current = video;
setSnapshot((current) => ({
...current,
status: "connected",
serverStatus: "Browser JS",
}));
intervalRef.current = window.setInterval(() => {
if (video.readyState < HTMLMediaElement.HAVE_CURRENT_DATA) return;
const result = handLandmarker.detectForVideo(
video,
performance.now(),
);
const hands = convertBrowserHandResult(result);
setSnapshot((current) => ({
...current,
hands,
usageStatus: hands.some((hand) => hand.isFist)
? "active"
: "available",
error: null,
}));
}, 1_000 / HAND_TRACKING_TARGET_FPS);
} catch (error) {
if (cancelled) return;
setSnapshot({
hands: [],
status: "error",
usageStatus: "inactive",
serverStatus: "Browser JS",
error:
error instanceof Error
? error.message
: "Browser hand tracking failed",
});
}
};
void start();
return () => {
cancelled = true;
cleanup();
};
}, [enabled]);
return snapshot;
}
+117
View File
@@ -0,0 +1,117 @@
import {
HAND_TRACKING_BROWSER_MODEL_URL,
HAND_TRACKING_BROWSER_WASM_URL,
} from "@/data/handTrackingConfig";
import type {
HandTrackingHand,
HandTrackingLandmark,
} from "@/types/handTracking/handTracking";
type HandLandmarkerModule = typeof import("@mediapipe/tasks-vision");
type HandLandmarker = Awaited<
ReturnType<HandLandmarkerModule["HandLandmarker"]["createFromOptions"]>
>;
type HandLandmarkerResult = ReturnType<HandLandmarker["detectForVideo"]>;
let handLandmarkerPromise: Promise<HandLandmarker> | null = null;
function averageLandmarks(
landmarks: HandTrackingLandmark[],
indices: number[],
): HandTrackingLandmark {
const point = indices.reduce(
(current, index) => {
const landmark = landmarks[index];
if (!landmark) return current;
return {
x: current.x + landmark.x,
y: current.y + landmark.y,
z: current.z + landmark.z,
};
},
{ x: 0, y: 0, z: 0 },
);
return {
x: point.x / indices.length,
y: point.y / indices.length,
z: point.z / indices.length,
};
}
function distance(
pointA: HandTrackingLandmark,
pointB: HandTrackingLandmark,
): number {
return Math.sqrt(
(pointA.x - pointB.x) ** 2 +
(pointA.y - pointB.y) ** 2 +
(pointA.z - pointB.z) ** 2,
);
}
function isFist(landmarks: HandTrackingLandmark[]): boolean {
const palmCenter = averageLandmarks(landmarks, [0, 5, 9, 13, 17]);
const wrist = landmarks[0];
const middleMcp = landmarks[9];
if (!wrist || !middleMcp) return false;
const palmSize = distance(wrist, middleMcp);
if (palmSize <= 0) return false;
const foldedFingerCount = [8, 12, 16, 20].filter((index) => {
const landmark = landmarks[index];
if (!landmark) return false;
return distance(landmark, palmCenter) / palmSize < 1.05;
}).length;
return foldedFingerCount >= 4;
}
export async function getBrowserHandLandmarker(): Promise<HandLandmarker> {
handLandmarkerPromise ??= import("@mediapipe/tasks-vision").then(
async ({ FilesetResolver, HandLandmarker }) => {
const vision = await FilesetResolver.forVisionTasks(
HAND_TRACKING_BROWSER_WASM_URL,
);
return HandLandmarker.createFromOptions(vision, {
baseOptions: {
modelAssetPath: HAND_TRACKING_BROWSER_MODEL_URL,
delegate: "GPU",
},
numHands: 2,
runningMode: "VIDEO",
});
},
);
return handLandmarkerPromise;
}
export function convertBrowserHandResult(
result: HandLandmarkerResult,
): HandTrackingHand[] {
return result.landmarks.map((landmarks, index) => {
const normalizedLandmarks = landmarks.map((landmark) => ({
x: landmark.x,
y: landmark.y,
z: landmark.z,
}));
const palmCenter = averageLandmarks(normalizedLandmarks, [0, 5, 9, 13, 17]);
const handedness = result.handedness[index]?.[0];
return {
x: palmCenter.x,
y: palmCenter.y,
z: palmCenter.z,
landmarks: normalizedLandmarks,
handedness: handedness?.categoryName ?? "Unknown",
isFist: isFist(normalizedLandmarks),
score: handedness?.score ?? 0,
};
});
}
+2
View File
@@ -4,6 +4,8 @@ export interface HandTrackingLandmark {
z: number; z: number;
} }
export type HandTrackingSource = "backend" | "browser";
export interface HandTrackingHand { export interface HandTrackingHand {
x: number; x: number;
y: number; y: number;