Skip to content

Commit 9064ef7

Browse files
committed
Add video object detection example
1 parent dabdfb9 commit 9064ef7

File tree

7 files changed

+2309
-0
lines changed

7 files changed

+2309
-0
lines changed

video-object-detection/.gitignore

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Logs
2+
logs
3+
*.log
4+
npm-debug.log*
5+
yarn-debug.log*
6+
yarn-error.log*
7+
pnpm-debug.log*
8+
lerna-debug.log*
9+
10+
node_modules
11+
dist
12+
dist-ssr
13+
*.local
14+
15+
# Editor directories and files
16+
.vscode/*
17+
!.vscode/extensions.json
18+
.idea
19+
.DS_Store
20+
*.suo
21+
*.ntvs*
22+
*.njsproj
23+
*.sln
24+
*.sw?

video-object-detection/index.html

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<!doctype html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8" />
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
6+
<title>Transformers.js | Real-time object detection</title>
7+
</head>
8+
9+
<body>
10+
<h1>
11+
Real-time object detection w/
12+
<a href="https://github.com/WongKinYiu/yolov9" target="_blank">YOLOv9</a>
13+
</h1>
14+
<h4>
15+
Runs locally in your browser, powered by
16+
<a href="http://github.com/xenova/transformers.js" target="_blank"
17+
>🤗 Transformers.js</a
18+
>
19+
</h4>
20+
<div id="container">
21+
<video id="video" autoplay muted playsinline></video>
22+
<canvas id="canvas" width="360" height="240"></canvas>
23+
<div id="overlay"></div>
24+
</div>
25+
<div id="controls">
26+
<div>
27+
<label>Image size</label>
28+
(<label id="size-value">128</label>)
29+
<br />
30+
<input
31+
id="size"
32+
type="range"
33+
min="64"
34+
max="256"
35+
step="32"
36+
value="128"
37+
disabled
38+
/>
39+
</div>
40+
<div>
41+
<label>Threshold</label>
42+
(<label id="threshold-value">0.25</label>)
43+
<br />
44+
<input
45+
id="threshold"
46+
type="range"
47+
min="0.01"
48+
max="1"
49+
step="0.01"
50+
value="0.25"
51+
disabled
52+
/>
53+
</div>
54+
<div>
55+
<label>Scale</label>
56+
(<label id="scale-value">0.5</label>)
57+
<br />
58+
<input
59+
id="scale"
60+
type="range"
61+
min="0.01"
62+
max="1"
63+
step="0.01"
64+
value="0.5"
65+
disabled
66+
/>
67+
</div>
68+
</div>
69+
<label id="status"></label>
70+
71+
<script type="module" src="/main.js"></script>
72+
</body>
73+
</html>

video-object-detection/main.js

+170
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
import "./style.css";
2+
3+
import { AutoModel, AutoProcessor, RawImage } from "@huggingface/transformers";
4+
5+
// Reference the elements that we will need
6+
const status = document.getElementById("status");
7+
const container = document.getElementById("container");
8+
const overlay = document.getElementById("overlay");
9+
const canvas = document.getElementById("canvas");
10+
const video = document.getElementById("video");
11+
const thresholdSlider = document.getElementById("threshold");
12+
const thresholdLabel = document.getElementById("threshold-value");
13+
const sizeSlider = document.getElementById("size");
14+
const sizeLabel = document.getElementById("size-value");
15+
const scaleSlider = document.getElementById("scale");
16+
const scaleLabel = document.getElementById("scale-value");
17+
18+
function setStreamSize(width, height) {
19+
video.width = canvas.width = Math.round(width);
20+
video.height = canvas.height = Math.round(height);
21+
}
22+
23+
status.textContent = "Loading model...";
24+
25+
// Load model and processor
26+
const model_id = "Xenova/gelan-c_all";
27+
const model = await AutoModel.from_pretrained(model_id);
28+
const processor = await AutoProcessor.from_pretrained(model_id);
29+
30+
// Set up controls
31+
let scale = 0.5;
32+
scaleSlider.addEventListener("input", () => {
33+
scale = Number(scaleSlider.value);
34+
setStreamSize(video.videoWidth * scale, video.videoHeight * scale);
35+
scaleLabel.textContent = scale;
36+
});
37+
scaleSlider.disabled = false;
38+
39+
let threshold = 0.25;
40+
thresholdSlider.addEventListener("input", () => {
41+
threshold = Number(thresholdSlider.value);
42+
thresholdLabel.textContent = threshold.toFixed(2);
43+
});
44+
thresholdSlider.disabled = false;
45+
46+
let size = 128;
47+
processor.feature_extractor.size = { shortest_edge: size };
48+
sizeSlider.addEventListener("input", () => {
49+
size = Number(sizeSlider.value);
50+
processor.feature_extractor.size = { shortest_edge: size };
51+
sizeLabel.textContent = size;
52+
});
53+
sizeSlider.disabled = false;
54+
55+
status.textContent = "Ready";
56+
57+
const COLOURS = [
58+
"#EF4444",
59+
"#4299E1",
60+
"#059669",
61+
"#FBBF24",
62+
"#4B52B1",
63+
"#7B3AC2",
64+
"#ED507A",
65+
"#1DD1A1",
66+
"#F3873A",
67+
"#4B5563",
68+
"#DC2626",
69+
"#1852B4",
70+
"#18A35D",
71+
"#F59E0B",
72+
"#4059BE",
73+
"#6027A5",
74+
"#D63D60",
75+
"#00AC9B",
76+
"#E64A19",
77+
"#272A34",
78+
];
79+
80+
// Render a bounding box and label on the image
81+
function renderBox([xmin, ymin, xmax, ymax, score, id], [w, h]) {
82+
if (score < threshold) return; // Skip boxes with low confidence
83+
84+
// Generate a random color for the box
85+
const color = COLOURS[id % COLOURS.length];
86+
87+
// Draw the box
88+
const boxElement = document.createElement("div");
89+
boxElement.className = "bounding-box";
90+
Object.assign(boxElement.style, {
91+
borderColor: color,
92+
left: (100 * xmin) / w + "%",
93+
top: (100 * ymin) / h + "%",
94+
width: (100 * (xmax - xmin)) / w + "%",
95+
height: (100 * (ymax - ymin)) / h + "%",
96+
});
97+
98+
// Draw label
99+
const labelElement = document.createElement("span");
100+
labelElement.textContent = `${model.config.id2label[id]} (${(100 * score).toFixed(2)}%)`;
101+
labelElement.className = "bounding-box-label";
102+
labelElement.style.backgroundColor = color;
103+
104+
boxElement.appendChild(labelElement);
105+
overlay.appendChild(boxElement);
106+
}
107+
108+
let isProcessing = false;
109+
let previousTime;
110+
const context = canvas.getContext("2d", { willReadFrequently: true });
111+
function updateCanvas() {
112+
const { width, height } = canvas;
113+
context.drawImage(video, 0, 0, width, height);
114+
115+
if (!isProcessing) {
116+
isProcessing = true;
117+
(async function () {
118+
// Read the current frame from the video
119+
const pixelData = context.getImageData(0, 0, width, height).data;
120+
const image = new RawImage(pixelData, width, height, 4);
121+
122+
// Process the image and run the model
123+
const inputs = await processor(image);
124+
const { outputs } = await model(inputs);
125+
126+
// Update UI
127+
overlay.innerHTML = "";
128+
129+
const sizes = inputs.reshaped_input_sizes[0].reverse();
130+
outputs.tolist().forEach((x) => renderBox(x, sizes));
131+
132+
if (previousTime !== undefined) {
133+
const fps = 1000 / (performance.now() - previousTime);
134+
status.textContent = `FPS: ${fps.toFixed(2)}`;
135+
}
136+
previousTime = performance.now();
137+
isProcessing = false;
138+
})();
139+
}
140+
141+
window.requestAnimationFrame(updateCanvas);
142+
}
143+
144+
// Start the video stream
145+
navigator.mediaDevices
146+
.getUserMedia(
147+
{ video: true }, // Ask for video
148+
)
149+
.then((stream) => {
150+
// Set up the video and canvas elements.
151+
video.srcObject = stream;
152+
video.play();
153+
154+
const videoTrack = stream.getVideoTracks()[0];
155+
const { width, height } = videoTrack.getSettings();
156+
157+
setStreamSize(width * scale, height * scale);
158+
159+
// Set container width and height depending on the image aspect ratio
160+
const ar = width / height;
161+
const [cw, ch] = ar > 720 / 405 ? [720, 720 / ar] : [405 * ar, 405];
162+
container.style.width = `${cw}px`;
163+
container.style.height = `${ch}px`;
164+
165+
// Start the animation loop
166+
window.requestAnimationFrame(updateCanvas);
167+
})
168+
.catch((error) => {
169+
alert(error);
170+
});

0 commit comments

Comments
 (0)