I've been experimenting with using a web worker to detect poses frame by frame, and then displaying the results on the main thread. However, I'm encountering some delays and synchronization issues.
My setup involves Next.js 14.0.4 with @mediapipe/[email protected].
pose-landmarker-worker.ts
import { PoseLandmarker } from "@mediapipe/tasks-vision";
import { createPoseLandmarker } from "@/lib/create-pose-landmarker";
let poseLandmarker: PoseLandmarker;
self.onmessage = async function (event: MessageEvent) {
const data = event.data;
if (data.action === "init") {
if (!data.runningMode) {
throw new Error("Could not initialize. The runningMode prop is not specified");
}
if (!poseLandmarker) {
const poseLandmarkerInstance = await createPoseLandmarker(data.runningMode);
if (!poseLandmarkerInstance) {
throw new Error("Failed to create pose landmarker instance");
}
poseLandmarker = poseLandmarkerInstance;
console.log("Created new pose landmarker instance");
} else {
console.warn("pose landmarker already initialized");
}
} else if (data.action === "detectForVideo") {
const imageBitmap = data.frame as ImageBitmap;
const result = poseLandmarker.detectForVideo(imageBitmap, data.timestamp);
self.postMessage(result);
}
};
page.tsx
"use client";
import React, { useEffect, useRef } from "react";
import { PoseLandmarkerResult } from "@mediapipe/tasks-vision";
import { drawConnectors } from "@/lib/draw-utils";
export default function WebWorkers2() {
const videoRef = useRef<HTMLVideoElement>(null);
const offscreenCanvasRef = useRef<OffscreenCanvas | null>(null);
const offscreenCanvasCtxRef = useRef<OffscreenCanvasRenderingContext2D | null>(null);
const drawCanvasRef = useRef<HTMLCanvasElement>(null);
const workerRef = useRef<Worker | null>(null);
const drawVideoToCanvas = () => {
const video = videoRef.current;
const worker = workerRef.current;
if (!video || !worker) {
console.log("invalid data");
return;
}
const width = video.width;
const height = video.height;
if (!offscreenCanvasRef.current) {
const offscreenCanvasInstance = new OffscreenCanvas(width, height);
offscreenCanvasRef.current = offscreenCanvasInstance;
offscreenCanvasCtxRef.current = offscreenCanvasInstance.getContext("2d", { willReadFrequently: true });
}
const ctx = offscreenCanvasCtxRef.current;
const offscreenCanvas = offscreenCanvasRef.current;
if (!offscreenCanvas || !ctx) {
return;
}
ctx.drawImage(video, 0, 0, width, height);
const imageBitmap = offscreenCanvas.transferToImageBitmap();
const timestamp = performance.now();
worker.postMessage({ action: "detectForVideo", frame: imageBitmap, timestamp }, [imageBitmap]);
requestAnimationFrame(drawVideoToCanvas);
};
useEffect(() => {
workerRef.current = new Worker(new URL("@/lib/pose-landmarker-worker.ts", import.meta.url));
const ctx = drawCanvasRef.current?.getContext("2d")!;
workerRef.current.onmessage = function (event: MessageEvent) {
const result = event.data as PoseLandmarkerResult;
if (ctx) {
ctx.save();
ctx.clearRect(0, 0, 360, 640);
drawConnectors(result, ctx);
ctx.restore();
}
};
return () => {
const worker = workerRef.current;
if (worker) {
worker.terminate();
}
};
}, []);
return (
<div className="flex h-screen flex-col">
<div className="flex ">
<div className="relative">
<video
ref={videoRef}
onLoadedData={(event) => {
workerRef.current?.postMessage({
action: "init",
runningMode: "VIDEO",
});
setTimeout(drawVideoToCanvas, 3000);
}}
src={"/static/videos/body_scan.mp4"}
height={640}
width={360}
playsInline
autoPlay
muted
loop
/>
<canvas ref={drawCanvasRef} className="absolute left-0 top-0" height={640} width={360} />
</div>
</div>
</div>
);
}
The outcome: https://drive.google.com/file/d/1KBt_VRzuE9zc-sco9d3tHZHj059fTYnf/view?usp=drive_link