YOLO11n image preprocessing in Node

Tue Sep 16 2025

The YOLO11n model expects letterboxed images with dimensions (640, 640) and with gray 114 padding. The last part of the preprocess method converts the Node Buffer to a Tensor that ONNX can use as input.

	/**
	 * Preprocess the image for the YOLO11n model.
	 * @param buf - The original image buffer.
	 * @param size - The size of the processed image (letterboxed) in pixels.
	 * @returns The preprocessed image and metadata.
	 */
	private async _preprocess(buf: Buffer, size: number): Promise<{ image: Tensor, params: LetterboxParams }> {
		// rotate according to EXIF orientation
		const img = sharp(buf).rotate()

		// calculate letterbox params
		const metadata = await img.metadata()
		const origW = metadata.width!, origH = metadata.height!
		const params: LetterboxParams = computeLetterboxParams({ width: origW, height: origH }, size)

		// letterbox to (640, 640), pad with gray 114 (Ultralytics default letterbox color)
		const { width, height } = params.canvas
		const letterboxed: Buffer = await img
			.resize(width, height, { fit: 'contain', background: { r: 114, g: 114, b: 114 } })
			.toColorspace('srgb')
			.removeAlpha()
			.raw()
			.toBuffer()

		// HWC RGB uint8 -> CHW float32 normalized [0,1] (YOLOv8/11 expected input)
		const chw: Float32Array = hwcToChwYOLO(letterboxed, { width: size, height: size })
		const dims = [1, 3, size, size] as const
		const image = new Tensor('float32', chw, dims)

		return { image, params }
	}