From 03dfef4aad0a3e8485765ee150dffdf12475543b Mon Sep 17 00:00:00 2001 From: Tom Boullay Date: Wed, 6 May 2026 23:23:04 +0100 Subject: [PATCH] add browser hand tracking source --- package-lock.json | 13 +- package.json | 1 + src/data/handTrackingConfig.ts | 4 + .../handTracking/useBrowserHandTracking.ts | 184 ++++++++++++++++++ src/lib/handTracking/browserHandTracking.ts | 117 +++++++++++ src/types/handTracking/handTracking.ts | 2 + 6 files changed, 318 insertions(+), 3 deletions(-) create mode 100644 src/hooks/handTracking/useBrowserHandTracking.ts create mode 100644 src/lib/handTracking/browserHandTracking.ts diff --git a/package-lock.json b/package-lock.json index 4c37327..3a4a64a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,7 @@ "name": "la-fabrik", "version": "0.0.1", "dependencies": { + "@mediapipe/tasks-vision": "^0.10.35", "@react-three/drei": "^10.7.7", "@react-three/fiber": "^9.6.1", "@react-three/rapier": "^2.2.0", @@ -601,9 +602,9 @@ } }, "node_modules/@mediapipe/tasks-vision": { - "version": "0.10.17", - "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.17.tgz", - "integrity": "sha512-CZWV/q6TTe8ta61cZXjfnnHsfWIdFhms03M9T7Cnd5y2mdpylJM0rF1qRq+wsQVRMLz1OYPVEBU9ph2Bx8cxrg==", + "version": "0.10.35", + "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.35.tgz", + "integrity": "sha512-HOvadwVRE6JC+45nyYhmnywnr5h/J8KZvOeUNVOG9q/0875pZgItznFB9bRTvLc264YSJqiZ1NsIpCStJw/egg==", "license": "Apache-2.0" }, "node_modules/@monogrid/gainmap-js": { @@ -709,6 +710,12 @@ } } }, + "node_modules/@react-three/drei/node_modules/@mediapipe/tasks-vision": { + "version": "0.10.17", + "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.17.tgz", + "integrity": "sha512-CZWV/q6TTe8ta61cZXjfnnHsfWIdFhms03M9T7Cnd5y2mdpylJM0rF1qRq+wsQVRMLz1OYPVEBU9ph2Bx8cxrg==", + "license": "Apache-2.0" + }, "node_modules/@react-three/fiber": { "version": "9.6.1", "resolved": "https://registry.npmjs.org/@react-three/fiber/-/fiber-9.6.1.tgz", diff --git a/package.json b/package.json index 4987af9..3a71c82 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "typecheck": "tsc -b" }, "dependencies": { + "@mediapipe/tasks-vision": "^0.10.35", "@react-three/drei": "^10.7.7", "@react-three/fiber": "^9.6.1", "@react-three/rapier": "^2.2.0", diff --git a/src/data/handTrackingConfig.ts b/src/data/handTrackingConfig.ts index f07de54..e24bbe5 100644 --- a/src/data/handTrackingConfig.ts +++ b/src/data/handTrackingConfig.ts @@ -7,6 +7,10 @@ export const HAND_TRACKING_TARGET_FPS = 10; export const HAND_TRACKING_JPEG_QUALITY = 0.55; export const HAND_TRACKING_CAMERA_TIMEOUT_MS = 8_000; export const HAND_TRACKING_RESPONSE_TIMEOUT_MS = 1_500; +export const HAND_TRACKING_BROWSER_WASM_URL = + "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.35/wasm"; +export const HAND_TRACKING_BROWSER_MODEL_URL = + "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task"; export function getHandTrackingWsUrl(): string { const configuredUrl = import.meta.env.VITE_HAND_TRACKING_WS_URL; diff --git a/src/hooks/handTracking/useBrowserHandTracking.ts b/src/hooks/handTracking/useBrowserHandTracking.ts new file mode 100644 index 0000000..b9ffd78 --- /dev/null +++ b/src/hooks/handTracking/useBrowserHandTracking.ts @@ -0,0 +1,184 @@ +import { useEffect, useRef, useState } from "react"; +import { + HAND_TRACKING_CAMERA_TIMEOUT_MS, + HAND_TRACKING_FRAME_HEIGHT, + HAND_TRACKING_FRAME_WIDTH, + HAND_TRACKING_TARGET_FPS, +} from "@/data/handTrackingConfig"; +import { + convertBrowserHandResult, + getBrowserHandLandmarker, +} from "@/lib/handTracking/browserHandTracking"; +import type { HandTrackingSnapshot } from "@/types/handTracking/handTracking"; + +interface UseBrowserHandTrackingOptions { + enabled: boolean; +} + +const INITIAL_SNAPSHOT: HandTrackingSnapshot = { + hands: [], + status: "idle", + usageStatus: "inactive", + serverStatus: null, + error: null, +}; + +function getCameraStreamWithTimeout( + constraints: MediaStreamConstraints, +): Promise { + let didTimeout = false; + const streamPromise = navigator.mediaDevices.getUserMedia(constraints); + + const timeoutPromise = new Promise((_, reject) => { + window.setTimeout(() => { + didTimeout = true; + reject( + new Error( + "Camera request timed out. Restart Arc or check camera permissions for localhost:5173.", + ), + ); + }, HAND_TRACKING_CAMERA_TIMEOUT_MS); + }); + + streamPromise.then((stream) => { + if (didTimeout) { + stream.getTracks().forEach((track) => track.stop()); + } + }); + + return Promise.race([streamPromise, timeoutPromise]); +} + +export function useBrowserHandTracking({ + enabled, +}: UseBrowserHandTrackingOptions): HandTrackingSnapshot { + const [snapshot, setSnapshot] = + useState(INITIAL_SNAPSHOT); + const videoRef = useRef(null); + const streamRef = useRef(null); + const intervalRef = useRef(null); + + useEffect(() => { + if (!enabled) { + return undefined; + } + + let cancelled = false; + + const cleanup = (): void => { + if (intervalRef.current !== null) { + window.clearInterval(intervalRef.current); + intervalRef.current = null; + } + + streamRef.current?.getTracks().forEach((track) => track.stop()); + streamRef.current = null; + videoRef.current = null; + }; + + const start = async (): Promise => { + setSnapshot({ + hands: [], + status: "requesting_camera", + usageStatus: "available", + serverStatus: "Browser JS", + error: null, + }); + + try { + const stream = await getCameraStreamWithTimeout({ + video: { + width: HAND_TRACKING_FRAME_WIDTH, + height: HAND_TRACKING_FRAME_HEIGHT, + facingMode: "user", + }, + audio: false, + }); + + if (cancelled) { + stream.getTracks().forEach((track) => track.stop()); + return; + } + + setSnapshot((current) => ({ + ...current, + status: "starting_camera", + })); + + const video = document.createElement("video"); + video.muted = true; + video.playsInline = true; + video.srcObject = stream; + await video.play(); + + if (cancelled) { + stream.getTracks().forEach((track) => track.stop()); + return; + } + + setSnapshot((current) => ({ + ...current, + status: "connecting", + serverStatus: "Loading Browser JS model", + })); + + const handLandmarker = await getBrowserHandLandmarker(); + + if (cancelled) { + stream.getTracks().forEach((track) => track.stop()); + return; + } + + streamRef.current = stream; + videoRef.current = video; + + setSnapshot((current) => ({ + ...current, + status: "connected", + serverStatus: "Browser JS", + })); + + intervalRef.current = window.setInterval(() => { + if (video.readyState < HTMLMediaElement.HAVE_CURRENT_DATA) return; + + const result = handLandmarker.detectForVideo( + video, + performance.now(), + ); + const hands = convertBrowserHandResult(result); + + setSnapshot((current) => ({ + ...current, + hands, + usageStatus: hands.some((hand) => hand.isFist) + ? "active" + : "available", + error: null, + })); + }, 1_000 / HAND_TRACKING_TARGET_FPS); + } catch (error) { + if (cancelled) return; + + setSnapshot({ + hands: [], + status: "error", + usageStatus: "inactive", + serverStatus: "Browser JS", + error: + error instanceof Error + ? error.message + : "Browser hand tracking failed", + }); + } + }; + + void start(); + + return () => { + cancelled = true; + cleanup(); + }; + }, [enabled]); + + return snapshot; +} diff --git a/src/lib/handTracking/browserHandTracking.ts b/src/lib/handTracking/browserHandTracking.ts new file mode 100644 index 0000000..06b80b4 --- /dev/null +++ b/src/lib/handTracking/browserHandTracking.ts @@ -0,0 +1,117 @@ +import { + HAND_TRACKING_BROWSER_MODEL_URL, + HAND_TRACKING_BROWSER_WASM_URL, +} from "@/data/handTrackingConfig"; +import type { + HandTrackingHand, + HandTrackingLandmark, +} from "@/types/handTracking/handTracking"; + +type HandLandmarkerModule = typeof import("@mediapipe/tasks-vision"); +type HandLandmarker = Awaited< + ReturnType +>; +type HandLandmarkerResult = ReturnType; + +let handLandmarkerPromise: Promise | null = null; + +function averageLandmarks( + landmarks: HandTrackingLandmark[], + indices: number[], +): HandTrackingLandmark { + const point = indices.reduce( + (current, index) => { + const landmark = landmarks[index]; + if (!landmark) return current; + + return { + x: current.x + landmark.x, + y: current.y + landmark.y, + z: current.z + landmark.z, + }; + }, + { x: 0, y: 0, z: 0 }, + ); + + return { + x: point.x / indices.length, + y: point.y / indices.length, + z: point.z / indices.length, + }; +} + +function distance( + pointA: HandTrackingLandmark, + pointB: HandTrackingLandmark, +): number { + return Math.sqrt( + (pointA.x - pointB.x) ** 2 + + (pointA.y - pointB.y) ** 2 + + (pointA.z - pointB.z) ** 2, + ); +} + +function isFist(landmarks: HandTrackingLandmark[]): boolean { + const palmCenter = averageLandmarks(landmarks, [0, 5, 9, 13, 17]); + const wrist = landmarks[0]; + const middleMcp = landmarks[9]; + + if (!wrist || !middleMcp) return false; + + const palmSize = distance(wrist, middleMcp); + if (palmSize <= 0) return false; + + const foldedFingerCount = [8, 12, 16, 20].filter((index) => { + const landmark = landmarks[index]; + if (!landmark) return false; + + return distance(landmark, palmCenter) / palmSize < 1.05; + }).length; + + return foldedFingerCount >= 4; +} + +export async function getBrowserHandLandmarker(): Promise { + handLandmarkerPromise ??= import("@mediapipe/tasks-vision").then( + async ({ FilesetResolver, HandLandmarker }) => { + const vision = await FilesetResolver.forVisionTasks( + HAND_TRACKING_BROWSER_WASM_URL, + ); + + return HandLandmarker.createFromOptions(vision, { + baseOptions: { + modelAssetPath: HAND_TRACKING_BROWSER_MODEL_URL, + delegate: "GPU", + }, + numHands: 2, + runningMode: "VIDEO", + }); + }, + ); + + return handLandmarkerPromise; +} + +export function convertBrowserHandResult( + result: HandLandmarkerResult, +): HandTrackingHand[] { + return result.landmarks.map((landmarks, index) => { + const normalizedLandmarks = landmarks.map((landmark) => ({ + x: landmark.x, + y: landmark.y, + z: landmark.z, + })); + const palmCenter = averageLandmarks(normalizedLandmarks, [0, 5, 9, 13, 17]); + const handedness = result.handedness[index]?.[0]; + + return { + x: palmCenter.x, + y: palmCenter.y, + z: palmCenter.z, + landmarks: normalizedLandmarks, + handedness: handedness?.categoryName ?? "Unknown", + isFist: isFist(normalizedLandmarks), + score: handedness?.score ?? 0, + }; + }); +} diff --git a/src/types/handTracking/handTracking.ts b/src/types/handTracking/handTracking.ts index 8c6a94e..905a57d 100644 --- a/src/types/handTracking/handTracking.ts +++ b/src/types/handTracking/handTracking.ts @@ -4,6 +4,8 @@ export interface HandTrackingLandmark { z: number; } +export type HandTrackingSource = "backend" | "browser"; + export interface HandTrackingHand { x: number; y: number;