114 lines
3.5 KiB
Python
114 lines
3.5 KiB
Python
from __future__ import annotations
|
|
|
|
import base64
|
|
import math
|
|
import time
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import cv2
|
|
import mediapipe as mp
|
|
import numpy as np
|
|
from mediapipe.tasks import python
|
|
from mediapipe.tasks.python import vision
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class HandData:
|
|
x: float
|
|
y: float
|
|
z: float
|
|
landmarks: list[dict[str, float]]
|
|
handedness: str
|
|
is_pinch: bool
|
|
pinch_distance: float
|
|
score: float
|
|
|
|
def to_payload(self) -> dict[str, float | str | bool | list[dict[str, float]]]:
|
|
return {
|
|
"x": self.x,
|
|
"y": self.y,
|
|
"z": self.z,
|
|
"landmarks": self.landmarks,
|
|
"handedness": self.handedness,
|
|
"isPinch": self.is_pinch,
|
|
"pinchDistance": self.pinch_distance,
|
|
"score": self.score,
|
|
}
|
|
|
|
|
|
class HandTracker:
|
|
def __init__(self, max_hands: int = 2) -> None:
|
|
model_path = Path(__file__).with_name("hand_landmarker.task")
|
|
if not model_path.exists():
|
|
raise FileNotFoundError(
|
|
"Missing hand_landmarker.task. Run `python backend/download_model.py`.",
|
|
)
|
|
|
|
base_options = python.BaseOptions(model_asset_path=str(model_path))
|
|
options = vision.HandLandmarkerOptions(
|
|
base_options=base_options,
|
|
running_mode=vision.RunningMode.IMAGE,
|
|
num_hands=max_hands,
|
|
)
|
|
self._detector = vision.HandLandmarker.create_from_options(options)
|
|
|
|
def detect_from_base64_jpeg(self, image_base64: str) -> list[HandData]:
|
|
image_data = base64.b64decode(image_base64, validate=True)
|
|
image_buffer = np.frombuffer(image_data, dtype=np.uint8)
|
|
frame = cv2.imdecode(image_buffer, cv2.IMREAD_COLOR)
|
|
if frame is None:
|
|
raise ValueError("Invalid JPEG frame")
|
|
|
|
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
|
|
result = self._detector.detect(mp_image)
|
|
return self._to_hands(result)
|
|
|
|
def close(self) -> None:
|
|
self._detector.close()
|
|
|
|
def _to_hands(self, result: vision.HandLandmarkerResult) -> list[HandData]:
|
|
hands: list[HandData] = []
|
|
if not result.hand_landmarks or not result.handedness:
|
|
return hands
|
|
|
|
for landmarks, handedness_categories in zip(
|
|
result.hand_landmarks,
|
|
result.handedness,
|
|
):
|
|
index_tip = landmarks[8]
|
|
thumb_tip = landmarks[4]
|
|
pinch_distance = self._calculate_distance(index_tip, thumb_tip)
|
|
handedness = handedness_categories[0]
|
|
|
|
hands.append(
|
|
HandData(
|
|
x=index_tip.x,
|
|
y=index_tip.y,
|
|
z=index_tip.z,
|
|
landmarks=[
|
|
{"x": point.x, "y": point.y, "z": point.z}
|
|
for point in landmarks
|
|
],
|
|
handedness=handedness.category_name,
|
|
is_pinch=pinch_distance < 0.07,
|
|
pinch_distance=pinch_distance,
|
|
score=handedness.score,
|
|
),
|
|
)
|
|
|
|
return hands
|
|
|
|
def _calculate_distance(self, point_a: Any, point_b: Any) -> float:
|
|
return math.sqrt(
|
|
(point_a.x - point_b.x) ** 2
|
|
+ (point_a.y - point_b.y) ** 2
|
|
+ (point_a.z - point_b.z) ** 2,
|
|
)
|
|
|
|
|
|
def now_ms() -> int:
|
|
return time.monotonic_ns() // 1_000_000
|