add browser hand tracking source
This commit is contained in:
@@ -7,6 +7,10 @@ export const HAND_TRACKING_TARGET_FPS = 10;
|
||||
export const HAND_TRACKING_JPEG_QUALITY = 0.55;
|
||||
export const HAND_TRACKING_CAMERA_TIMEOUT_MS = 8_000;
|
||||
export const HAND_TRACKING_RESPONSE_TIMEOUT_MS = 1_500;
|
||||
export const HAND_TRACKING_BROWSER_WASM_URL =
|
||||
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.35/wasm";
|
||||
export const HAND_TRACKING_BROWSER_MODEL_URL =
|
||||
"https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task";
|
||||
|
||||
export function getHandTrackingWsUrl(): string {
|
||||
const configuredUrl = import.meta.env.VITE_HAND_TRACKING_WS_URL;
|
||||
|
||||
@@ -0,0 +1,184 @@
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import {
|
||||
HAND_TRACKING_CAMERA_TIMEOUT_MS,
|
||||
HAND_TRACKING_FRAME_HEIGHT,
|
||||
HAND_TRACKING_FRAME_WIDTH,
|
||||
HAND_TRACKING_TARGET_FPS,
|
||||
} from "@/data/handTrackingConfig";
|
||||
import {
|
||||
convertBrowserHandResult,
|
||||
getBrowserHandLandmarker,
|
||||
} from "@/lib/handTracking/browserHandTracking";
|
||||
import type { HandTrackingSnapshot } from "@/types/handTracking/handTracking";
|
||||
|
||||
interface UseBrowserHandTrackingOptions {
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
const INITIAL_SNAPSHOT: HandTrackingSnapshot = {
|
||||
hands: [],
|
||||
status: "idle",
|
||||
usageStatus: "inactive",
|
||||
serverStatus: null,
|
||||
error: null,
|
||||
};
|
||||
|
||||
function getCameraStreamWithTimeout(
|
||||
constraints: MediaStreamConstraints,
|
||||
): Promise<MediaStream> {
|
||||
let didTimeout = false;
|
||||
const streamPromise = navigator.mediaDevices.getUserMedia(constraints);
|
||||
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
window.setTimeout(() => {
|
||||
didTimeout = true;
|
||||
reject(
|
||||
new Error(
|
||||
"Camera request timed out. Restart Arc or check camera permissions for localhost:5173.",
|
||||
),
|
||||
);
|
||||
}, HAND_TRACKING_CAMERA_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
streamPromise.then((stream) => {
|
||||
if (didTimeout) {
|
||||
stream.getTracks().forEach((track) => track.stop());
|
||||
}
|
||||
});
|
||||
|
||||
return Promise.race([streamPromise, timeoutPromise]);
|
||||
}
|
||||
|
||||
export function useBrowserHandTracking({
|
||||
enabled,
|
||||
}: UseBrowserHandTrackingOptions): HandTrackingSnapshot {
|
||||
const [snapshot, setSnapshot] =
|
||||
useState<HandTrackingSnapshot>(INITIAL_SNAPSHOT);
|
||||
const videoRef = useRef<HTMLVideoElement | null>(null);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const intervalRef = useRef<number | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (!enabled) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
let cancelled = false;
|
||||
|
||||
const cleanup = (): void => {
|
||||
if (intervalRef.current !== null) {
|
||||
window.clearInterval(intervalRef.current);
|
||||
intervalRef.current = null;
|
||||
}
|
||||
|
||||
streamRef.current?.getTracks().forEach((track) => track.stop());
|
||||
streamRef.current = null;
|
||||
videoRef.current = null;
|
||||
};
|
||||
|
||||
const start = async (): Promise<void> => {
|
||||
setSnapshot({
|
||||
hands: [],
|
||||
status: "requesting_camera",
|
||||
usageStatus: "available",
|
||||
serverStatus: "Browser JS",
|
||||
error: null,
|
||||
});
|
||||
|
||||
try {
|
||||
const stream = await getCameraStreamWithTimeout({
|
||||
video: {
|
||||
width: HAND_TRACKING_FRAME_WIDTH,
|
||||
height: HAND_TRACKING_FRAME_HEIGHT,
|
||||
facingMode: "user",
|
||||
},
|
||||
audio: false,
|
||||
});
|
||||
|
||||
if (cancelled) {
|
||||
stream.getTracks().forEach((track) => track.stop());
|
||||
return;
|
||||
}
|
||||
|
||||
setSnapshot((current) => ({
|
||||
...current,
|
||||
status: "starting_camera",
|
||||
}));
|
||||
|
||||
const video = document.createElement("video");
|
||||
video.muted = true;
|
||||
video.playsInline = true;
|
||||
video.srcObject = stream;
|
||||
await video.play();
|
||||
|
||||
if (cancelled) {
|
||||
stream.getTracks().forEach((track) => track.stop());
|
||||
return;
|
||||
}
|
||||
|
||||
setSnapshot((current) => ({
|
||||
...current,
|
||||
status: "connecting",
|
||||
serverStatus: "Loading Browser JS model",
|
||||
}));
|
||||
|
||||
const handLandmarker = await getBrowserHandLandmarker();
|
||||
|
||||
if (cancelled) {
|
||||
stream.getTracks().forEach((track) => track.stop());
|
||||
return;
|
||||
}
|
||||
|
||||
streamRef.current = stream;
|
||||
videoRef.current = video;
|
||||
|
||||
setSnapshot((current) => ({
|
||||
...current,
|
||||
status: "connected",
|
||||
serverStatus: "Browser JS",
|
||||
}));
|
||||
|
||||
intervalRef.current = window.setInterval(() => {
|
||||
if (video.readyState < HTMLMediaElement.HAVE_CURRENT_DATA) return;
|
||||
|
||||
const result = handLandmarker.detectForVideo(
|
||||
video,
|
||||
performance.now(),
|
||||
);
|
||||
const hands = convertBrowserHandResult(result);
|
||||
|
||||
setSnapshot((current) => ({
|
||||
...current,
|
||||
hands,
|
||||
usageStatus: hands.some((hand) => hand.isFist)
|
||||
? "active"
|
||||
: "available",
|
||||
error: null,
|
||||
}));
|
||||
}, 1_000 / HAND_TRACKING_TARGET_FPS);
|
||||
} catch (error) {
|
||||
if (cancelled) return;
|
||||
|
||||
setSnapshot({
|
||||
hands: [],
|
||||
status: "error",
|
||||
usageStatus: "inactive",
|
||||
serverStatus: "Browser JS",
|
||||
error:
|
||||
error instanceof Error
|
||||
? error.message
|
||||
: "Browser hand tracking failed",
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
void start();
|
||||
|
||||
return () => {
|
||||
cancelled = true;
|
||||
cleanup();
|
||||
};
|
||||
}, [enabled]);
|
||||
|
||||
return snapshot;
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
import {
|
||||
HAND_TRACKING_BROWSER_MODEL_URL,
|
||||
HAND_TRACKING_BROWSER_WASM_URL,
|
||||
} from "@/data/handTrackingConfig";
|
||||
import type {
|
||||
HandTrackingHand,
|
||||
HandTrackingLandmark,
|
||||
} from "@/types/handTracking/handTracking";
|
||||
|
||||
type HandLandmarkerModule = typeof import("@mediapipe/tasks-vision");
|
||||
type HandLandmarker = Awaited<
|
||||
ReturnType<HandLandmarkerModule["HandLandmarker"]["createFromOptions"]>
|
||||
>;
|
||||
type HandLandmarkerResult = ReturnType<HandLandmarker["detectForVideo"]>;
|
||||
|
||||
let handLandmarkerPromise: Promise<HandLandmarker> | null = null;
|
||||
|
||||
function averageLandmarks(
|
||||
landmarks: HandTrackingLandmark[],
|
||||
indices: number[],
|
||||
): HandTrackingLandmark {
|
||||
const point = indices.reduce(
|
||||
(current, index) => {
|
||||
const landmark = landmarks[index];
|
||||
if (!landmark) return current;
|
||||
|
||||
return {
|
||||
x: current.x + landmark.x,
|
||||
y: current.y + landmark.y,
|
||||
z: current.z + landmark.z,
|
||||
};
|
||||
},
|
||||
{ x: 0, y: 0, z: 0 },
|
||||
);
|
||||
|
||||
return {
|
||||
x: point.x / indices.length,
|
||||
y: point.y / indices.length,
|
||||
z: point.z / indices.length,
|
||||
};
|
||||
}
|
||||
|
||||
function distance(
|
||||
pointA: HandTrackingLandmark,
|
||||
pointB: HandTrackingLandmark,
|
||||
): number {
|
||||
return Math.sqrt(
|
||||
(pointA.x - pointB.x) ** 2 +
|
||||
(pointA.y - pointB.y) ** 2 +
|
||||
(pointA.z - pointB.z) ** 2,
|
||||
);
|
||||
}
|
||||
|
||||
function isFist(landmarks: HandTrackingLandmark[]): boolean {
|
||||
const palmCenter = averageLandmarks(landmarks, [0, 5, 9, 13, 17]);
|
||||
const wrist = landmarks[0];
|
||||
const middleMcp = landmarks[9];
|
||||
|
||||
if (!wrist || !middleMcp) return false;
|
||||
|
||||
const palmSize = distance(wrist, middleMcp);
|
||||
if (palmSize <= 0) return false;
|
||||
|
||||
const foldedFingerCount = [8, 12, 16, 20].filter((index) => {
|
||||
const landmark = landmarks[index];
|
||||
if (!landmark) return false;
|
||||
|
||||
return distance(landmark, palmCenter) / palmSize < 1.05;
|
||||
}).length;
|
||||
|
||||
return foldedFingerCount >= 4;
|
||||
}
|
||||
|
||||
export async function getBrowserHandLandmarker(): Promise<HandLandmarker> {
|
||||
handLandmarkerPromise ??= import("@mediapipe/tasks-vision").then(
|
||||
async ({ FilesetResolver, HandLandmarker }) => {
|
||||
const vision = await FilesetResolver.forVisionTasks(
|
||||
HAND_TRACKING_BROWSER_WASM_URL,
|
||||
);
|
||||
|
||||
return HandLandmarker.createFromOptions(vision, {
|
||||
baseOptions: {
|
||||
modelAssetPath: HAND_TRACKING_BROWSER_MODEL_URL,
|
||||
delegate: "GPU",
|
||||
},
|
||||
numHands: 2,
|
||||
runningMode: "VIDEO",
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
return handLandmarkerPromise;
|
||||
}
|
||||
|
||||
export function convertBrowserHandResult(
|
||||
result: HandLandmarkerResult,
|
||||
): HandTrackingHand[] {
|
||||
return result.landmarks.map((landmarks, index) => {
|
||||
const normalizedLandmarks = landmarks.map((landmark) => ({
|
||||
x: landmark.x,
|
||||
y: landmark.y,
|
||||
z: landmark.z,
|
||||
}));
|
||||
const palmCenter = averageLandmarks(normalizedLandmarks, [0, 5, 9, 13, 17]);
|
||||
const handedness = result.handedness[index]?.[0];
|
||||
|
||||
return {
|
||||
x: palmCenter.x,
|
||||
y: palmCenter.y,
|
||||
z: palmCenter.z,
|
||||
landmarks: normalizedLandmarks,
|
||||
handedness: handedness?.categoryName ?? "Unknown",
|
||||
isFist: isFist(normalizedLandmarks),
|
||||
score: handedness?.score ?? 0,
|
||||
};
|
||||
});
|
||||
}
|
||||
@@ -4,6 +4,8 @@ export interface HandTrackingLandmark {
|
||||
z: number;
|
||||
}
|
||||
|
||||
export type HandTrackingSource = "backend" | "browser";
|
||||
|
||||
export interface HandTrackingHand {
|
||||
x: number;
|
||||
y: number;
|
||||
|
||||
Reference in New Issue
Block a user