Files
La-Fabrik/backend/hand_tracker.py
T
2026-04-29 09:52:46 +02:00

114 lines
3.5 KiB
Python

from __future__ import annotations
import base64
import math
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import cv2
import mediapipe as mp
import numpy as np
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
@dataclass(frozen=True)
class HandData:
x: float
y: float
z: float
landmarks: list[dict[str, float]]
handedness: str
is_pinch: bool
pinch_distance: float
score: float
def to_payload(self) -> dict[str, float | str | bool | list[dict[str, float]]]:
return {
"x": self.x,
"y": self.y,
"z": self.z,
"landmarks": self.landmarks,
"handedness": self.handedness,
"isPinch": self.is_pinch,
"pinchDistance": self.pinch_distance,
"score": self.score,
}
class HandTracker:
def __init__(self, max_hands: int = 2) -> None:
model_path = Path(__file__).with_name("hand_landmarker.task")
if not model_path.exists():
raise FileNotFoundError(
"Missing hand_landmarker.task. Run `python backend/download_model.py`.",
)
base_options = python.BaseOptions(model_asset_path=str(model_path))
options = vision.HandLandmarkerOptions(
base_options=base_options,
running_mode=vision.RunningMode.IMAGE,
num_hands=max_hands,
)
self._detector = vision.HandLandmarker.create_from_options(options)
def detect_from_base64_jpeg(self, image_base64: str) -> list[HandData]:
image_data = base64.b64decode(image_base64, validate=True)
image_buffer = np.frombuffer(image_data, dtype=np.uint8)
frame = cv2.imdecode(image_buffer, cv2.IMREAD_COLOR)
if frame is None:
raise ValueError("Invalid JPEG frame")
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
result = self._detector.detect(mp_image)
return self._to_hands(result)
def close(self) -> None:
self._detector.close()
def _to_hands(self, result: vision.HandLandmarkerResult) -> list[HandData]:
hands: list[HandData] = []
if not result.hand_landmarks or not result.handedness:
return hands
for landmarks, handedness_categories in zip(
result.hand_landmarks,
result.handedness,
):
index_tip = landmarks[8]
thumb_tip = landmarks[4]
pinch_distance = self._calculate_distance(index_tip, thumb_tip)
handedness = handedness_categories[0]
hands.append(
HandData(
x=index_tip.x,
y=index_tip.y,
z=index_tip.z,
landmarks=[
{"x": point.x, "y": point.y, "z": point.z}
for point in landmarks
],
handedness=handedness.category_name,
is_pinch=pinch_distance < 0.07,
pinch_distance=pinch_distance,
score=handedness.score,
),
)
return hands
def _calculate_distance(self, point_a: Any, point_b: Any) -> float:
return math.sqrt(
(point_a.x - point_b.x) ** 2
+ (point_a.y - point_b.y) ** 2
+ (point_a.z - point_b.z) ** 2,
)
def now_ms() -> int:
return time.monotonic_ns() // 1_000_000