feat add remote hand tracking backend

This commit is contained in:
Tom Boullay
2026-04-27 15:49:02 +02:00
parent 8abc69ebc3
commit 641d2f8871
10 changed files with 595 additions and 3 deletions
+3 -1
View File
@@ -1,5 +1,7 @@
# Dependencies
node_modules/
__pycache__/
*.pyc
# Build
dist/
@@ -37,4 +39,4 @@ Thumbs.db
# 3D Assets Cache (drei, GLTFJSX)
.drei/
.glitchdrei-cache/
.glitchdrei-cache/
+73
View File
@@ -0,0 +1,73 @@
# Hand Tracking Backend
Remote-compatible Python backend for La-Fabrik hand tracking.
The browser captures webcam frames, downsizes them, sends JPEG frames to this backend over WebSocket, and receives hand landmarks plus pinch state.
## Setup
```bash
python3 -m venv .venv
source .venv/bin/activate
pip install -r backend/requirements.txt
python3 backend/download_model.py
```
## Run
```bash
python3 -m backend.main
```
The WebSocket endpoint is:
```txt
ws://localhost:8000/ws
```
## Health Check
```txt
http://localhost:8000/health
```
## Message Flow
Client sends a compressed frame:
```json
{
"type": "frame",
"timestamp": 1234567890,
"width": 320,
"height": 240,
"image": "base64-jpeg"
}
```
Server responds with detected hands:
```json
{
"type": "hands",
"timestamp": 1234567890,
"hands": [
{
"x": 0.5,
"y": 0.3,
"z": 0.1,
"handedness": "Right",
"isPinch": true,
"pinchDistance": 0.05,
"score": 0.92
}
]
}
```
## Notes
- The backend does not read `cv2.VideoCapture(0)`.
- This keeps local development and production behavior aligned.
- Each browser connection sends its own webcam frames.
- The backend rate-limits frames per connection and drops work when a client is already being processed.
View File
+37
View File
@@ -0,0 +1,37 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from uuid import uuid4
from fastapi import WebSocket
@dataclass
class ClientConnection:
id: str
websocket: WebSocket
is_processing: bool = False
last_frame_at: float = 0.0
metadata: dict[str, Any] = field(default_factory=dict)
class ConnectionManager:
def __init__(self) -> None:
self._connections: dict[str, ClientConnection] = {}
@property
def count(self) -> int:
return len(self._connections)
async def connect(self, websocket: WebSocket) -> ClientConnection:
await websocket.accept()
connection = ClientConnection(id=str(uuid4()), websocket=websocket)
self._connections[connection.id] = connection
return connection
def disconnect(self, connection: ClientConnection) -> None:
self._connections.pop(connection.id, None)
async def send(self, connection: ClientConnection, payload: dict[str, Any]) -> None:
await connection.websocket.send_json(payload)
+22
View File
@@ -0,0 +1,22 @@
from __future__ import annotations
from pathlib import Path
from urllib.request import urlretrieve
MODEL_URL = "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task"
MODEL_PATH = Path(__file__).with_name("hand_landmarker.task")
def download_model() -> None:
if MODEL_PATH.exists():
print(f"Model already exists at {MODEL_PATH}")
return
print("Downloading MediaPipe Hand Landmarker model...")
urlretrieve(MODEL_URL, MODEL_PATH)
print(f"Model downloaded to {MODEL_PATH}")
if __name__ == "__main__":
download_model()
+107
View File
@@ -0,0 +1,107 @@
from __future__ import annotations
import base64
import math
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import cv2
import mediapipe as mp
import numpy as np
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
@dataclass(frozen=True)
class HandData:
x: float
y: float
z: float
handedness: str
is_pinch: bool
pinch_distance: float
score: float
def to_payload(self) -> dict[str, float | str | bool]:
return {
"x": self.x,
"y": self.y,
"z": self.z,
"handedness": self.handedness,
"isPinch": self.is_pinch,
"pinchDistance": self.pinch_distance,
"score": self.score,
}
class HandTracker:
def __init__(self, max_hands: int = 2) -> None:
model_path = Path(__file__).with_name("hand_landmarker.task")
if not model_path.exists():
raise FileNotFoundError(
"Missing hand_landmarker.task. Run `python backend/download_model.py`.",
)
base_options = python.BaseOptions(model_asset_path=str(model_path))
options = vision.HandLandmarkerOptions(
base_options=base_options,
running_mode=vision.RunningMode.IMAGE,
num_hands=max_hands,
)
self._detector = vision.HandLandmarker.create_from_options(options)
def detect_from_base64_jpeg(self, image_base64: str) -> list[HandData]:
image_data = base64.b64decode(image_base64, validate=True)
image_buffer = np.frombuffer(image_data, dtype=np.uint8)
frame = cv2.imdecode(image_buffer, cv2.IMREAD_COLOR)
if frame is None:
raise ValueError("Invalid JPEG frame")
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
result = self._detector.detect(mp_image)
return self._to_hands(result)
def close(self) -> None:
self._detector.close()
def _to_hands(self, result: vision.HandLandmarkerResult) -> list[HandData]:
hands: list[HandData] = []
if not result.hand_landmarks or not result.handedness:
return hands
for landmarks, handedness_categories in zip(
result.hand_landmarks,
result.handedness,
):
index_tip = landmarks[8]
thumb_tip = landmarks[4]
pinch_distance = self._calculate_distance(index_tip, thumb_tip)
handedness = handedness_categories[0]
hands.append(
HandData(
x=index_tip.x,
y=index_tip.y,
z=index_tip.z,
handedness=handedness.category_name,
is_pinch=pinch_distance < 0.07,
pinch_distance=pinch_distance,
score=handedness.score,
),
)
return hands
def _calculate_distance(self, point_a: Any, point_b: Any) -> float:
return math.sqrt(
(point_a.x - point_b.x) ** 2
+ (point_a.y - point_b.y) ** 2
+ (point_a.z - point_b.z) ** 2,
)
def now_ms() -> int:
return time.monotonic_ns() // 1_000_000
+122
View File
@@ -0,0 +1,122 @@
from __future__ import annotations
import asyncio
from contextlib import asynccontextmanager
from typing import Any
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.responses import JSONResponse
from backend.connection_manager import ClientConnection, ConnectionManager
from backend.hand_tracker import HandTracker, now_ms
MAX_FRAME_BYTES = 220_000
MIN_FRAME_INTERVAL_SECONDS = 0.08
manager = ConnectionManager()
tracker: HandTracker | None = None
detection_lock = asyncio.Lock()
@asynccontextmanager
async def lifespan(app: FastAPI):
global tracker
tracker = HandTracker(max_hands=2)
yield
if tracker:
tracker.close()
app = FastAPI(title="La-Fabrik Hand Tracking", lifespan=lifespan)
@app.get("/health")
async def health() -> JSONResponse:
return JSONResponse(
{
"status": "ok",
"connections": manager.count,
},
)
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket) -> None:
connection = await manager.connect(websocket)
await manager.send(connection, status_payload("connected"))
try:
while True:
message = await websocket.receive_json()
response = await handle_message(connection, message)
await manager.send(connection, response)
except WebSocketDisconnect:
manager.disconnect(connection)
except Exception as error:
await manager.send(connection, error_payload(str(error)))
manager.disconnect(connection)
async def handle_message(
connection: ClientConnection,
message: dict[str, Any],
) -> dict[str, Any]:
if message.get("type") != "frame":
return error_payload("Unsupported message type")
current_time = asyncio.get_running_loop().time()
if current_time - connection.last_frame_at < MIN_FRAME_INTERVAL_SECONDS:
return status_payload("rate_limited")
if connection.is_processing:
return status_payload("busy")
image = message.get("image")
if not isinstance(image, str):
return error_payload("Missing image payload")
if len(image) > MAX_FRAME_BYTES:
return error_payload("Frame payload too large")
if tracker is None:
return error_payload("Hand tracker is not ready")
if detection_lock.locked():
return status_payload("busy")
connection.last_frame_at = current_time
connection.is_processing = True
try:
async with detection_lock:
hands = await asyncio.to_thread(tracker.detect_from_base64_jpeg, image)
return {
"type": "hands",
"timestamp": now_ms(),
"hands": [hand.to_payload() for hand in hands],
}
finally:
connection.is_processing = False
def status_payload(status: str) -> dict[str, str | int]:
return {
"type": "status",
"timestamp": now_ms(),
"status": status,
}
def error_payload(message: str) -> dict[str, str | int | list[Any]]:
return {
"type": "error",
"timestamp": now_ms(),
"hands": [],
"message": message,
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
+5
View File
@@ -0,0 +1,5 @@
fastapi==0.115.0
uvicorn[standard]==0.30.6
opencv-python-headless==4.10.0.84
mediapipe==0.10.20
numpy==1.26.4
+223 -2
View File
@@ -25,6 +25,7 @@
"@types/react": "^19.2.14",
"@types/react-dom": "^19.2.3",
"@vitejs/plugin-react": "^6.0.1",
"concurrently": "^9.2.1",
"eslint": "^9.39.4",
"eslint-config-prettier": "^10.1.8",
"eslint-plugin-prettier": "^5.5.5",
@@ -1609,6 +1610,16 @@
"url": "https://github.com/sponsors/epoberezkin"
}
},
"node_modules/ansi-regex": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/ansi-styles": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
@@ -1811,6 +1822,21 @@
"url": "https://github.com/chalk/chalk?sponsor=1"
}
},
"node_modules/cliui": {
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
"integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
"dev": true,
"license": "ISC",
"dependencies": {
"string-width": "^4.2.0",
"strip-ansi": "^6.0.1",
"wrap-ansi": "^7.0.0"
},
"engines": {
"node": ">=12"
}
},
"node_modules/color-convert": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@@ -1838,6 +1864,47 @@
"dev": true,
"license": "MIT"
},
"node_modules/concurrently": {
"version": "9.2.1",
"resolved": "https://registry.npmjs.org/concurrently/-/concurrently-9.2.1.tgz",
"integrity": "sha512-fsfrO0MxV64Znoy8/l1vVIjjHa29SZyyqPgQBwhiDcaW8wJc2W3XWVOGx4M3oJBnv/zdUZIIp1gDeS98GzP8Ng==",
"dev": true,
"license": "MIT",
"dependencies": {
"chalk": "4.1.2",
"rxjs": "7.8.2",
"shell-quote": "1.8.3",
"supports-color": "8.1.1",
"tree-kill": "1.2.2",
"yargs": "17.7.2"
},
"bin": {
"conc": "dist/bin/concurrently.js",
"concurrently": "dist/bin/concurrently.js"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/open-cli-tools/concurrently?sponsor=1"
}
},
"node_modules/concurrently/node_modules/supports-color": {
"version": "8.1.1",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz",
"integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==",
"dev": true,
"license": "MIT",
"dependencies": {
"has-flag": "^4.0.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/chalk/supports-color?sponsor=1"
}
},
"node_modules/convert-source-map": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
@@ -1940,6 +2007,13 @@
"dev": true,
"license": "ISC"
},
"node_modules/emoji-regex": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
"dev": true,
"license": "MIT"
},
"node_modules/escalade": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
@@ -2328,6 +2402,16 @@
"node": ">=6.9.0"
}
},
"node_modules/get-caller-file": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
"dev": true,
"license": "ISC",
"engines": {
"node": "6.* || 8.* || >= 10.*"
}
},
"node_modules/glob-parent": {
"version": "6.0.2",
"resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
@@ -2472,6 +2556,16 @@
"node": ">=0.10.0"
}
},
"node_modules/is-fullwidth-code-point": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/is-glob": {
"version": "4.0.3",
"resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
@@ -3507,6 +3601,16 @@
}
}
},
"node_modules/require-directory": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/require-from-string": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
@@ -3567,6 +3671,16 @@
"dev": true,
"license": "MIT"
},
"node_modules/rxjs": {
"version": "7.8.2",
"resolved": "https://registry.npmjs.org/rxjs/-/rxjs-7.8.2.tgz",
"integrity": "sha512-dhKf903U/PQZY6boNNtAGdWbG85WAbjT/1xYoZIC7FAY0yWapOBQVsVrDl58W86//e1VpMNBtRV4MaXfdMySFA==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"tslib": "^2.1.0"
}
},
"node_modules/scheduler": {
"version": "0.27.0",
"resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz",
@@ -3604,6 +3718,19 @@
"node": ">=8"
}
},
"node_modules/shell-quote": {
"version": "1.8.3",
"resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.3.tgz",
"integrity": "sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/source-map-js": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
@@ -3640,6 +3767,34 @@
"integrity": "sha512-hNKz8phvYLPEcRkeG1rsGmV5ChMjKDAWU7/OJJdDErPBNChQXxCo3WZurGpnWc6gZhAzEPFad1aVgyOANH1sMw==",
"license": "MIT"
},
"node_modules/string-width": {
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
"dev": true,
"license": "MIT",
"dependencies": {
"emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0",
"strip-ansi": "^6.0.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/strip-ansi": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
"dev": true,
"license": "MIT",
"dependencies": {
"ansi-regex": "^5.0.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/strip-json-comments": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
@@ -3746,6 +3901,16 @@
"url": "https://github.com/sponsors/SuperchupuDev"
}
},
"node_modules/tree-kill": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz",
"integrity": "sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==",
"dev": true,
"license": "MIT",
"bin": {
"tree-kill": "cli.js"
}
},
"node_modules/troika-three-text": {
"version": "0.52.4",
"resolved": "https://registry.npmjs.org/troika-three-text/-/troika-three-text-0.52.4.tgz",
@@ -3794,8 +3959,7 @@
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"dev": true,
"license": "0BSD",
"optional": true
"license": "0BSD"
},
"node_modules/tunnel-rat": {
"version": "0.1.2",
@@ -4065,6 +4229,34 @@
"node": ">=0.10.0"
}
},
"node_modules/wrap-ansi": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
"dev": true,
"license": "MIT",
"dependencies": {
"ansi-styles": "^4.0.0",
"string-width": "^4.1.0",
"strip-ansi": "^6.0.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
}
},
"node_modules/y18n": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
"integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
"dev": true,
"license": "ISC",
"engines": {
"node": ">=10"
}
},
"node_modules/yallist": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz",
@@ -4072,6 +4264,35 @@
"dev": true,
"license": "ISC"
},
"node_modules/yargs": {
"version": "17.7.2",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
"integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
"dev": true,
"license": "MIT",
"dependencies": {
"cliui": "^8.0.1",
"escalade": "^3.1.1",
"get-caller-file": "^2.0.5",
"require-directory": "^2.1.1",
"string-width": "^4.2.3",
"y18n": "^5.0.5",
"yargs-parser": "^21.1.1"
},
"engines": {
"node": ">=12"
}
},
"node_modules/yargs-parser": {
"version": "21.1.1",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
"integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
"dev": true,
"license": "ISC",
"engines": {
"node": ">=12"
}
},
"node_modules/yocto-queue": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz",
+3
View File
@@ -5,6 +5,8 @@
"type": "module",
"scripts": {
"dev": "vite",
"dev:hand": "python3 -m backend.main",
"dev:full": "concurrently \"npm run dev\" \"npm run dev:hand\"",
"build": "tsc -b && vite build",
"lint": "eslint .",
"lint:fix": "eslint . --fix",
@@ -31,6 +33,7 @@
"@types/react": "^19.2.14",
"@types/react-dom": "^19.2.3",
"@vitejs/plugin-react": "^6.0.1",
"concurrently": "^9.2.1",
"eslint": "^9.39.4",
"eslint-config-prettier": "^10.1.8",
"eslint-plugin-prettier": "^5.5.5",