feat add remote hand tracking backend
This commit is contained in:
@@ -0,0 +1,107 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import math
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
import mediapipe as mp
|
||||
import numpy as np
|
||||
from mediapipe.tasks import python
|
||||
from mediapipe.tasks.python import vision
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HandData:
|
||||
x: float
|
||||
y: float
|
||||
z: float
|
||||
handedness: str
|
||||
is_pinch: bool
|
||||
pinch_distance: float
|
||||
score: float
|
||||
|
||||
def to_payload(self) -> dict[str, float | str | bool]:
|
||||
return {
|
||||
"x": self.x,
|
||||
"y": self.y,
|
||||
"z": self.z,
|
||||
"handedness": self.handedness,
|
||||
"isPinch": self.is_pinch,
|
||||
"pinchDistance": self.pinch_distance,
|
||||
"score": self.score,
|
||||
}
|
||||
|
||||
|
||||
class HandTracker:
|
||||
def __init__(self, max_hands: int = 2) -> None:
|
||||
model_path = Path(__file__).with_name("hand_landmarker.task")
|
||||
if not model_path.exists():
|
||||
raise FileNotFoundError(
|
||||
"Missing hand_landmarker.task. Run `python backend/download_model.py`.",
|
||||
)
|
||||
|
||||
base_options = python.BaseOptions(model_asset_path=str(model_path))
|
||||
options = vision.HandLandmarkerOptions(
|
||||
base_options=base_options,
|
||||
running_mode=vision.RunningMode.IMAGE,
|
||||
num_hands=max_hands,
|
||||
)
|
||||
self._detector = vision.HandLandmarker.create_from_options(options)
|
||||
|
||||
def detect_from_base64_jpeg(self, image_base64: str) -> list[HandData]:
|
||||
image_data = base64.b64decode(image_base64, validate=True)
|
||||
image_buffer = np.frombuffer(image_data, dtype=np.uint8)
|
||||
frame = cv2.imdecode(image_buffer, cv2.IMREAD_COLOR)
|
||||
if frame is None:
|
||||
raise ValueError("Invalid JPEG frame")
|
||||
|
||||
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
|
||||
result = self._detector.detect(mp_image)
|
||||
return self._to_hands(result)
|
||||
|
||||
def close(self) -> None:
|
||||
self._detector.close()
|
||||
|
||||
def _to_hands(self, result: vision.HandLandmarkerResult) -> list[HandData]:
|
||||
hands: list[HandData] = []
|
||||
if not result.hand_landmarks or not result.handedness:
|
||||
return hands
|
||||
|
||||
for landmarks, handedness_categories in zip(
|
||||
result.hand_landmarks,
|
||||
result.handedness,
|
||||
):
|
||||
index_tip = landmarks[8]
|
||||
thumb_tip = landmarks[4]
|
||||
pinch_distance = self._calculate_distance(index_tip, thumb_tip)
|
||||
handedness = handedness_categories[0]
|
||||
|
||||
hands.append(
|
||||
HandData(
|
||||
x=index_tip.x,
|
||||
y=index_tip.y,
|
||||
z=index_tip.z,
|
||||
handedness=handedness.category_name,
|
||||
is_pinch=pinch_distance < 0.07,
|
||||
pinch_distance=pinch_distance,
|
||||
score=handedness.score,
|
||||
),
|
||||
)
|
||||
|
||||
return hands
|
||||
|
||||
def _calculate_distance(self, point_a: Any, point_b: Any) -> float:
|
||||
return math.sqrt(
|
||||
(point_a.x - point_b.x) ** 2
|
||||
+ (point_a.y - point_b.y) ** 2
|
||||
+ (point_a.z - point_b.z) ** 2,
|
||||
)
|
||||
|
||||
|
||||
def now_ms() -> int:
|
||||
return time.monotonic_ns() // 1_000_000
|
||||
Reference in New Issue
Block a user