story-studio/remotion/node_modules/mediabunny/src/media-sink.ts

/*!
 * Copyright (c) 2026-present, Vanilagy and contributors
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

import { parsePcmCodec, PCM_AUDIO_CODECS, PcmAudioCodec, VideoCodec, AudioCodec } from './codec';
import {
	concatAvcNalUnits,
	deserializeAvcDecoderConfigurationRecord,
	determineVideoPacketType,
	extractNalUnitTypeForAvc,
	extractNalUnitTypeForHevc,
	HevcNalUnitType,
	iterateAvcNalUnits,
	iterateHevcNalUnits,
	parseAvcSps,
} from './codec-data';
import { CustomVideoDecoder, customVideoDecoders, CustomAudioDecoder, customAudioDecoders } from './custom-coder';
import { InputDisposedError } from './input';
import { InputAudioTrack, InputTrack, InputVideoTrack } from './input-track';
import {
	AnyIterable,
	assert,
	assertNever,
	CallSerializer,
	getInt24,
	getUint24,
	insertSorted,
	isChromium,
	isFirefox,
	isNumber,
	isWebKit,
	last,
	mapAsyncGenerator,
	promiseWithResolvers,
	Rotation,
	toAsyncIterator,
	toDataView,
	toUint8Array,
	validateAnyIterable,
} from './misc';
import { EncodedPacket } from './packet';
import { fromAlaw, fromUlaw } from './pcm';
import { AudioSample, clampCropRectangle, CropRectangle, validateCropRectangle, VideoSample } from './sample';

/**
 * Additional options for controlling packet retrieval.
 * @group Media sinks
 * @public
 */
export type PacketRetrievalOptions = {
	/**
	 * When set to `true`, only packet metadata (like timestamp) will be retrieved - the actual packet data will not
	 * be loaded.
	 */
	metadataOnly?: boolean;

	/**
	 * When set to true, key packets will be verified upon retrieval by looking into the packet's bitstream.
	 * If not enabled, the packet types will be determined solely by what's stored in the containing file and may be
	 * incorrect, potentially leading to decoder errors. Since determining a packet's actual type requires looking into
	 * its data, this option cannot be enabled together with `metadataOnly`.
	 */
	verifyKeyPackets?: boolean;
};

const validatePacketRetrievalOptions = (options: PacketRetrievalOptions) => {
	if (!options || typeof options !== 'object') {
		throw new TypeError('options must be an object.');
	}
	if (options.metadataOnly !== undefined && typeof options.metadataOnly !== 'boolean') {
		throw new TypeError('options.metadataOnly, when defined, must be a boolean.');
	}
	if (options.verifyKeyPackets !== undefined && typeof options.verifyKeyPackets !== 'boolean') {
		throw new TypeError('options.verifyKeyPackets, when defined, must be a boolean.');
	}
	if (options.verifyKeyPackets && options.metadataOnly) {
		throw new TypeError('options.verifyKeyPackets and options.metadataOnly cannot be enabled together.');
	}
};

const validateTimestamp = (timestamp: number) => {
	if (!isNumber(timestamp)) {
		throw new TypeError('timestamp must be a number.'); // It can be non-finite, that's fine
	}
};

const maybeFixPacketType = (
	track: InputTrack,
	promise: Promise<EncodedPacket | null>,
	options: PacketRetrievalOptions,
) => {
	if (options.verifyKeyPackets) {
		return promise.then(async (packet) => {
			if (!packet || packet.type === 'delta') {
				return packet;
			}

			const determinedType = await track.determinePacketType(packet);
			if (determinedType) {
				// @ts-expect-error Technically readonly
				packet.type = determinedType;
			}

			return packet;
		});
	} else {
		return promise;
	}
};

/**
 * Sink for retrieving encoded packets from an input track.
 * @group Media sinks
 * @public
 */
export class EncodedPacketSink {
	/** @internal */
	_track: InputTrack;

	/** Creates a new {@link EncodedPacketSink} for the given {@link InputTrack}. */
	constructor(track: InputTrack) {
		if (!(track instanceof InputTrack)) {
			throw new TypeError('track must be an InputTrack.');
		}

		this._track = track;
	}

	/**
	 * Retrieves the track's first packet (in decode order), or null if it has no packets. The first packet is very
	 * likely to be a key packet.
	 */
	getFirstPacket(options: PacketRetrievalOptions = {}) {
		validatePacketRetrievalOptions(options);

		if (this._track.input._disposed) {
			throw new InputDisposedError();
		}

		return maybeFixPacketType(this._track, this._track._backing.getFirstPacket(options), options);
	}

	/**
	 * Retrieves the packet corresponding to the given timestamp, in seconds. More specifically, returns the last packet
	 * (in presentation order) with a start timestamp less than or equal to the given timestamp. This method can be
	 * used to retrieve a track's last packet using `getPacket(Infinity)`. The method returns null if the timestamp
	 * is before the first packet in the track.
	 *
	 * @param timestamp - The timestamp used for retrieval, in seconds.
	 */
	getPacket(timestamp: number, options: PacketRetrievalOptions = {}) {
		validateTimestamp(timestamp);
		validatePacketRetrievalOptions(options);

		if (this._track.input._disposed) {
			throw new InputDisposedError();
		}

		return maybeFixPacketType(this._track, this._track._backing.getPacket(timestamp, options), options);
	}

	/**
	 * Retrieves the packet following the given packet (in decode order), or null if the given packet is the
	 * last packet.
	 */
	getNextPacket(packet: EncodedPacket, options: PacketRetrievalOptions = {}) {
		if (!(packet instanceof EncodedPacket)) {
			throw new TypeError('packet must be an EncodedPacket.');
		}
		validatePacketRetrievalOptions(options);

		if (this._track.input._disposed) {
			throw new InputDisposedError();
		}

		return maybeFixPacketType(this._track, this._track._backing.getNextPacket(packet, options), options);
	}

	/**
	 * Retrieves the key packet corresponding to the given timestamp, in seconds. More specifically, returns the last
	 * key packet (in presentation order) with a start timestamp less than or equal to the given timestamp. A key packet
	 * is a packet that doesn't require previous packets to be decoded. This method can be used to retrieve a track's
	 * last key packet using `getKeyPacket(Infinity)`. The method returns null if the timestamp is before the first
	 * key packet in the track.
	 *
	 * To ensure that the returned packet is guaranteed to be a real key frame, enable `options.verifyKeyPackets`.
	 *
	 * @param timestamp - The timestamp used for retrieval, in seconds.
	 */
	async getKeyPacket(timestamp: number, options: PacketRetrievalOptions = {}): Promise<EncodedPacket | null> {
		validateTimestamp(timestamp);
		validatePacketRetrievalOptions(options);

		if (this._track.input._disposed) {
			throw new InputDisposedError();
		}

		if (!options.verifyKeyPackets) {
			return this._track._backing.getKeyPacket(timestamp, options);
		}

		const packet = await this._track._backing.getKeyPacket(timestamp, options);
		if (!packet) {
			return packet;
		}
		assert(packet.type === 'key');

		const determinedType = await this._track.determinePacketType(packet);
		if (determinedType === 'delta') {
			// Try returning the previous key packet (in hopes that it's actually a key packet)
			return this.getKeyPacket(packet.timestamp - 1 / this._track.timeResolution, options);
		}

		return packet;
	}

	/**
	 * Retrieves the key packet following the given packet (in decode order), or null if the given packet is the last
	 * key packet.
	 *
	 * To ensure that the returned packet is guaranteed to be a real key frame, enable `options.verifyKeyPackets`.
	 */
	async getNextKeyPacket(packet: EncodedPacket, options: PacketRetrievalOptions = {}): Promise<EncodedPacket | null> {
		if (!(packet instanceof EncodedPacket)) {
			throw new TypeError('packet must be an EncodedPacket.');
		}
		validatePacketRetrievalOptions(options);

		if (this._track.input._disposed) {
			throw new InputDisposedError();
		}

		if (!options.verifyKeyPackets) {
			return this._track._backing.getNextKeyPacket(packet, options);
		}

		const nextPacket = await this._track._backing.getNextKeyPacket(packet, options);
		if (!nextPacket) {
			return nextPacket;
		}
		assert(nextPacket.type === 'key');

		const determinedType = await this._track.determinePacketType(nextPacket);
		if (determinedType === 'delta') {
			// Try returning the next key packet (in hopes that it's actually a key packet)
			return this.getNextKeyPacket(nextPacket, options);
		}

		return nextPacket;
	}

	/**
	 * Creates an async iterator that yields the packets in this track in decode order. To enable fast iteration, this
	 * method will intelligently preload packets based on the speed of the consumer.
	 *
	 * @param startPacket - (optional) The packet from which iteration should begin. This packet will also be yielded.
	 * @param endTimestamp - (optional) The timestamp at which iteration should end. This packet will _not_ be yielded.
	 */
	packets(
		startPacket?: EncodedPacket,
		endPacket?: EncodedPacket,
		options: PacketRetrievalOptions = {},
	): AsyncGenerator<EncodedPacket, void, unknown> {
		if (startPacket !== undefined && !(startPacket instanceof EncodedPacket)) {
			throw new TypeError('startPacket must be an EncodedPacket.');
		}
		if (startPacket !== undefined && startPacket.isMetadataOnly && !options?.metadataOnly) {
			throw new TypeError('startPacket can only be metadata-only if options.metadataOnly is enabled.');
		}
		if (endPacket !== undefined && !(endPacket instanceof EncodedPacket)) {
			throw new TypeError('endPacket must be an EncodedPacket.');
		}
		validatePacketRetrievalOptions(options);

		if (this._track.input._disposed) {
			throw new InputDisposedError();
		}

		const packetQueue: EncodedPacket[] = [];

		let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers();
		let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers();
		let ended = false;
		let terminated = false;

		// This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this
		// method but instead in a different context. This error should not go unnoticed and must be bubbled up to
		// the consumer.
		let outOfBandError = null as Error | null;

		const timestamps: number[] = [];
		// The queue should always be big enough to hold 1 second worth of packets
		const maxQueueSize = () => Math.max(2, timestamps.length);

		// The following is the "pump" process that keeps pumping packets into the queue
		(async () => {
			let packet = startPacket ?? await this.getFirstPacket(options);

			while (packet && !terminated && !this._track.input._disposed) {
				if (endPacket && packet.sequenceNumber >= endPacket?.sequenceNumber) {
					break;
				}

				if (packetQueue.length > maxQueueSize()) {
					({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers());
					await queueDequeue;
					continue;
				}

				packetQueue.push(packet);

				onQueueNotEmpty();
				({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers());

				packet = await this.getNextPacket(packet, options);
			}

			ended = true;
			onQueueNotEmpty();
		})().catch((error: Error) => {
			if (!outOfBandError) {
				outOfBandError = error;
				onQueueNotEmpty();
			}
		});

		const track = this._track;

		return {
			async next() {
				while (true) {
					if (track.input._disposed) {
						throw new InputDisposedError();
					} else if (terminated) {
						return { value: undefined, done: true };
					} else if (outOfBandError) {
						throw outOfBandError;
					} else if (packetQueue.length > 0) {
						const value = packetQueue.shift()!;
						const now = performance.now();
						timestamps.push(now);

						while (timestamps.length > 0 && now - timestamps[0]! >= 1000) {
							timestamps.shift();
						}

						onQueueDequeue();

						return { value, done: false };
					} else if (ended) {
						return { value: undefined, done: true };
					} else {
						await queueNotEmpty;
					}
				}
			},
			async return() {
				terminated = true;
				onQueueDequeue();
				onQueueNotEmpty();

				return { value: undefined, done: true };
			},
			async throw(error) {
				throw error;
			},
			[Symbol.asyncIterator]() {
				return this;
			},
		};
	}
}

abstract class DecoderWrapper<
	MediaSample extends VideoSample | AudioSample,
> {
	constructor(
		public onSample: (sample: MediaSample) => unknown,
		public onError: (error: Error) => unknown,
	) {}

	abstract getDecodeQueueSize(): number;
	abstract decode(packet: EncodedPacket): void;
	abstract flush(): Promise<void>;
	abstract close(): void;
}

/**
 * Base class for decoded media sample sinks.
 * @group Media sinks
 * @public
 */
export abstract class BaseMediaSampleSink<
	MediaSample extends VideoSample | AudioSample,
> {
	/** @internal */
	abstract _track: InputTrack;

	/** @internal */
	abstract _createDecoder(
		onSample: (sample: MediaSample) => unknown,
		onError: (error: Error) => unknown
	): Promise<DecoderWrapper<MediaSample>>;
	/** @internal */
	abstract _createPacketSink(): EncodedPacketSink;

	/** @internal */
	protected mediaSamplesInRange(
		startTimestamp = 0,
		endTimestamp = Infinity,
	): AsyncGenerator<MediaSample, void, unknown> {
		validateTimestamp(startTimestamp);
		validateTimestamp(endTimestamp);

		const sampleQueue: MediaSample[] = [];
		let firstSampleQueued = false;
		let lastSample: MediaSample | null = null;
		let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers();
		let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers();
		let decoderIsFlushed = false;
		let ended = false;
		let terminated = false;

		// This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this
		// method but instead in a different context. This error should not go unnoticed and must be bubbled up to
		// the consumer.
		let outOfBandError = null as Error | null;

		// The following is the "pump" process that keeps pumping packets into the decoder
		(async () => {
			const decoder = await this._createDecoder((sample) => {
				onQueueDequeue();
				if (sample.timestamp >= endTimestamp) {
					ended = true;
				}

				if (ended) {
					sample.close();
					return;
				}

				if (lastSample) {
					if (sample.timestamp > startTimestamp) {
						// We don't know ahead of time what the first first is. This is because the first first is the
						// last first whose timestamp is less than or equal to the start timestamp. Therefore we need to
						// wait for the first first after the start timestamp, and then we'll know that the previous
						// first was the first first.
						sampleQueue.push(lastSample);
						firstSampleQueued = true;
					} else {
						lastSample.close();
					}
				}

				if (sample.timestamp >= startTimestamp) {
					sampleQueue.push(sample);
					firstSampleQueued = true;
				}

				lastSample = firstSampleQueued ? null : sample;

				if (sampleQueue.length > 0) {
					onQueueNotEmpty();
					({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers());
				}
			}, (error) => {
				if (!outOfBandError) {
					outOfBandError = error;
					onQueueNotEmpty();
				}
			});

			const packetSink = this._createPacketSink();
			const keyPacket = await packetSink.getKeyPacket(startTimestamp, { verifyKeyPackets: true })
				?? await packetSink.getFirstPacket();

			let currentPacket: EncodedPacket | null = keyPacket;

			// B-frames make it exceedingly difficult to properly define an upper bound for packet iteration if an end
			// timestamp is set, so we just don't do it. The case that makes it especially tricky is when the frames
			// following a key frame have a lower timestamp than the keyframe; something that quite frequently happens
			// in HEVC streams. The price to pay for not upper-bounding the packet iterator is a slight increase in
			// decoder work at the end of the range, but the added correctness and reliability makes this tradeoff worth
			// it.
			const endPacket = undefined;

			const packets = packetSink.packets(keyPacket ?? undefined, endPacket);
			await packets.next(); // Skip the start packet as we already have it

			while (currentPacket && !ended && !this._track.input._disposed) {
				const maxQueueSize = computeMaxQueueSize(sampleQueue.length);
				if (sampleQueue.length + decoder.getDecodeQueueSize() > maxQueueSize) {
					({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers());
					await queueDequeue;
					continue;
				}

				decoder.decode(currentPacket);

				const packetResult = await packets.next();
				if (packetResult.done) {
					break;
				}

				currentPacket = packetResult.value;
			}

			await packets.return();

			if (!terminated && !this._track.input._disposed) {
				await decoder.flush();
			}
			decoder.close();

			if (!firstSampleQueued && lastSample) {
				sampleQueue.push(lastSample);
			}

			decoderIsFlushed = true;
			onQueueNotEmpty(); // To unstuck the generator
		})().catch((error: Error) => {
			if (!outOfBandError) {
				outOfBandError = error;
				onQueueNotEmpty();
			}
		});

		const track = this._track;
		const closeSamples = () => {
			lastSample?.close();
			for (const sample of sampleQueue) {
				sample.close();
			}
		};

		return {
			async next() {
				while (true) {
					if (track.input._disposed) {
						closeSamples();
						throw new InputDisposedError();
					} else if (terminated) {
						return { value: undefined, done: true };
					} else if (outOfBandError) {
						closeSamples();
						throw outOfBandError;
					} else if (sampleQueue.length > 0) {
						const value = sampleQueue.shift()!;
						onQueueDequeue();
						return { value, done: false };
					} else if (!decoderIsFlushed) {
						await queueNotEmpty;
					} else {
						return { value: undefined, done: true };
					}
				}
			},
			async return() {
				terminated = true;
				ended = true;
				onQueueDequeue();
				onQueueNotEmpty();
				closeSamples();

				return { value: undefined, done: true };
			},
			async throw(error) {
				throw error;
			},
			[Symbol.asyncIterator]() {
				return this;
			},
		};
	}

	/** @internal */
	protected mediaSamplesAtTimestamps(
		timestamps: AnyIterable<number>,
	): AsyncGenerator<MediaSample | null, void, unknown> {
		validateAnyIterable(timestamps);
		const timestampIterator = toAsyncIterator(timestamps);
		const timestampsOfInterest: number[] = [];

		const sampleQueue: (MediaSample | null)[] = [];
		let { promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers();
		let { promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers();
		let decoderIsFlushed = false;
		let terminated = false;

		// This stores errors that are "out of band" in the sense that they didn't occur in the normal flow of this
		// method but instead in a different context. This error should not go unnoticed and must be bubbled up to
		// the consumer.
		let outOfBandError = null as Error | null;

		const pushToQueue = (sample: MediaSample | null) => {
			sampleQueue.push(sample);
			onQueueNotEmpty();
			({ promise: queueNotEmpty, resolve: onQueueNotEmpty } = promiseWithResolvers());
		};

		// The following is the "pump" process that keeps pumping packets into the decoder
		(async () => {
			const decoder = await this._createDecoder((sample) => {
				onQueueDequeue();

				if (terminated) {
					sample.close();
					return;
				}

				let sampleUses = 0;
				while (
					timestampsOfInterest.length > 0
					&& sample.timestamp - timestampsOfInterest[0]! > -1e-10 // Give it a little epsilon
				) {
					sampleUses++;
					timestampsOfInterest.shift();
				}

				if (sampleUses > 0) {
					for (let i = 0; i < sampleUses; i++) {
						// Clone the sample if we need to emit it multiple times
						pushToQueue((i < sampleUses - 1 ? sample.clone() : sample) as MediaSample);
					}
				} else {
					sample.close();
				}
			}, (error) => {
				if (!outOfBandError) {
					outOfBandError = error;
					onQueueNotEmpty();
				}
			});

			const packetSink = this._createPacketSink();
			let lastPacket: EncodedPacket | null = null;
			let lastKeyPacket: EncodedPacket | null = null;

			// The end sequence number (inclusive) in the next batch of packets that will be decoded. The batch starts
			// at the last key frame and goes until this sequence number.
			let maxSequenceNumber = -1;

			const decodePackets = async () => {
				assert(lastKeyPacket);

				// Start at the current key packet
				let currentPacket = lastKeyPacket;
				decoder.decode(currentPacket);

				while (currentPacket.sequenceNumber < maxSequenceNumber) {
					const maxQueueSize = computeMaxQueueSize(sampleQueue.length);
					while (sampleQueue.length + decoder.getDecodeQueueSize() > maxQueueSize && !terminated) {
						({ promise: queueDequeue, resolve: onQueueDequeue } = promiseWithResolvers());
						await queueDequeue;
					}

					if (terminated) {
						break;
					}

					const nextPacket = await packetSink.getNextPacket(currentPacket);
					assert(nextPacket);

					decoder.decode(nextPacket);
					currentPacket = nextPacket;
				}

				maxSequenceNumber = -1;
			};

			const flushDecoder = async () => {
				await decoder.flush();

				// We don't expect this list to have any elements in it anymore, but in case it does, let's emit
				// nulls for every remaining element, then clear it.
				for (let i = 0; i < timestampsOfInterest.length; i++) {
					pushToQueue(null);
				}
				timestampsOfInterest.length = 0;
			};

			for await (const timestamp of timestampIterator) {
				validateTimestamp(timestamp);

				if (terminated || this._track.input._disposed) {
					break;
				}

				const targetPacket = await packetSink.getPacket(timestamp);
				const keyPacket = targetPacket && await packetSink.getKeyPacket(timestamp, { verifyKeyPackets: true });

				if (!keyPacket) {
					if (maxSequenceNumber !== -1) {
						await decodePackets();
						await flushDecoder();
					}

					pushToQueue(null);
					lastPacket = null;
					continue;
				}

				// Check if the key packet has changed or if we're going back in time
				if (
					lastPacket
					&& (
						keyPacket.sequenceNumber !== lastKeyPacket!.sequenceNumber
						|| targetPacket.timestamp < lastPacket.timestamp
					)
				) {
					await decodePackets();
					await flushDecoder(); // Always flush here, improves decoder compatibility
				}

				timestampsOfInterest.push(targetPacket.timestamp);
				maxSequenceNumber = Math.max(targetPacket.sequenceNumber, maxSequenceNumber);

				lastPacket = targetPacket;
				lastKeyPacket = keyPacket;
			}

			if (!terminated && !this._track.input._disposed) {
				if (maxSequenceNumber !== -1) {
					// We still need to decode packets
					await decodePackets();
				}

				await flushDecoder();
			}
			decoder.close();

			decoderIsFlushed = true;
			onQueueNotEmpty(); // To unstuck the generator
		})().catch((error: Error) => {
			if (!outOfBandError) {
				outOfBandError = error;
				onQueueNotEmpty();
			}
		});

		const track = this._track;
		const closeSamples = () => {
			for (const sample of sampleQueue) {
				sample?.close();
			}
		};

		return {
			async next() {
				while (true) {
					if (track.input._disposed) {
						closeSamples();
						throw new InputDisposedError();
					} else if (terminated) {
						return { value: undefined, done: true };
					} else if (outOfBandError) {
						closeSamples();
						throw outOfBandError;
					} else if (sampleQueue.length > 0) {
						const value = sampleQueue.shift();
						assert(value !== undefined);
						onQueueDequeue();
						return { value, done: false };
					} else if (!decoderIsFlushed) {
						await queueNotEmpty;
					} else {
						return { value: undefined, done: true };
					}
				}
			},
			async return() {
				terminated = true;
				onQueueDequeue();
				onQueueNotEmpty();
				closeSamples();

				return { value: undefined, done: true };
			},
			async throw(error) {
				throw error;
			},
			[Symbol.asyncIterator]() {
				return this;
			},
		};
	}
}

const computeMaxQueueSize = (decodedSampleQueueSize: number) => {
	// If we have decoded samples lying around, limit the total queue size to a small value (decoded samples can use up
	// a lot of memory). If not, we're fine with a much bigger queue of encoded packets waiting to be decoded. In fact,
	// some decoders only start flushing out decoded chunks when the packet queue is large enough.
	return decodedSampleQueueSize === 0 ? 40 : 8;
};

class VideoDecoderWrapper extends DecoderWrapper<VideoSample> {
	decoder: VideoDecoder | null = null;

	customDecoder: CustomVideoDecoder | null = null;
	customDecoderCallSerializer = new CallSerializer();
	customDecoderQueueSize = 0;

	inputTimestamps: number[] = []; // Timestamps input into the decoder, sorted.
	sampleQueue: VideoSample[] = []; // Safari-specific thing, check usage.
	currentPacketIndex = 0;
	raslSkipped = false; // For HEVC stuff

	// Alpha stuff
	alphaDecoder: VideoDecoder | null = null;
	alphaHadKeyframe = false;
	colorQueue: VideoFrame[] = [];
	alphaQueue: (VideoFrame | null)[] = [];
	merger: ColorAlphaMerger | null = null;
	mergerCreationFailed = false;
	decodedAlphaChunkCount = 0;
	alphaDecoderQueueSize = 0;
	/** Each value is the number of decoded alpha chunks at which a null alpha frame should be added. */
	nullAlphaFrameQueue: number[] = [];
	currentAlphaPacketIndex = 0;
	alphaRaslSkipped = false; // For HEVC stuff

	constructor(
		onSample: (sample: VideoSample) => unknown,
		onError: (error: Error) => unknown,
		public codec: VideoCodec,
		public decoderConfig: VideoDecoderConfig,
		public rotation: Rotation,
		public timeResolution: number,
	) {
		super(onSample, onError);

		const MatchingCustomDecoder = customVideoDecoders.find(x => x.supports(codec, decoderConfig));
		if (MatchingCustomDecoder) {
			// @ts-expect-error "Can't create instance of abstract class 🤓"
			this.customDecoder = new MatchingCustomDecoder() as CustomVideoDecoder;
			// @ts-expect-error It's technically readonly
			this.customDecoder.codec = codec;
			// @ts-expect-error It's technically readonly
			this.customDecoder.config = decoderConfig;
			// @ts-expect-error It's technically readonly
			this.customDecoder.onSample = (sample) => {
				if (!(sample instanceof VideoSample)) {
					throw new TypeError('The argument passed to onSample must be a VideoSample.');
				}

				this.finalizeAndEmitSample(sample);
			};

			void this.customDecoderCallSerializer.call(() => this.customDecoder!.init());
		} else {
			const colorHandler = (frame: VideoFrame) => {
				if (this.alphaQueue.length > 0) {
					// Even when no alpha data is present (most of the time), there will be nulls in this queue
					const alphaFrame = this.alphaQueue.shift();
					assert(alphaFrame !== undefined);

					this.mergeAlpha(frame, alphaFrame);
				} else {
					this.colorQueue.push(frame);
				}
			};

			if (codec === 'avc' && this.decoderConfig.description && isChromium()) {
				// Chromium has/had a bug with playing interlaced AVC (https://issues.chromium.org/issues/456919096)
				// which can be worked around by requesting that software decoding be used. So, here we peek into the
				// AVC description, if present, and switch to software decoding if we find interlaced content.
				const record = deserializeAvcDecoderConfigurationRecord(toUint8Array(this.decoderConfig.description));
				if (record && record.sequenceParameterSets.length > 0) {
					const sps = parseAvcSps(record.sequenceParameterSets[0]!);
					if (sps && sps.frameMbsOnlyFlag === 0) {
						this.decoderConfig = {
							...this.decoderConfig,
							hardwareAcceleration: 'prefer-software',
						};
					}
				}
			}

			const stack = new Error('Decoding error').stack;

			this.decoder = new VideoDecoder({
				output: (frame) => {
					try {
						colorHandler(frame);
					} catch (error) {
						this.onError(error as Error);
					}
				},
				error: (error) => {
					error.stack = stack; // Provide a more useful stack trace, the default one sucks
					this.onError(error);
				},
			});
			this.decoder.configure(this.decoderConfig);
		}
	}

	getDecodeQueueSize() {
		if (this.customDecoder) {
			return this.customDecoderQueueSize;
		} else {
			assert(this.decoder);

			return Math.max(
				this.decoder.decodeQueueSize,
				this.alphaDecoder?.decodeQueueSize ?? 0,
			);
		}
	}

	decode(packet: EncodedPacket) {
		if (this.codec === 'hevc' && this.currentPacketIndex > 0 && !this.raslSkipped) {
			if (this.hasHevcRaslPicture(packet.data)) {
				return; // Drop
			}

			this.raslSkipped = true;
		}

		if (this.customDecoder) {
			this.customDecoderQueueSize++;
			void this.customDecoderCallSerializer
				.call(() => this.customDecoder!.decode(packet))
				.then(() => this.customDecoderQueueSize--);
		} else {
			assert(this.decoder);

			if (!isWebKit()) {
				insertSorted(this.inputTimestamps, packet.timestamp, x => x);
			}

			// Workaround for https://issues.chromium.org/issues/470109459
			if (isChromium() && this.currentPacketIndex === 0 && this.codec === 'avc') {
				const filteredNalUnits: Uint8Array[] = [];

				for (const loc of iterateAvcNalUnits(packet.data, this.decoderConfig)) {
					const type = extractNalUnitTypeForAvc(packet.data[loc.offset]!);
					// These trip up Chromium's key frame detection, so let's strip them
					if (!(type >= 20 && type <= 31)) {
						filteredNalUnits.push(packet.data.subarray(loc.offset, loc.offset + loc.length));
					}
				}

				const newData = concatAvcNalUnits(filteredNalUnits, this.decoderConfig);
				packet = new EncodedPacket(newData, packet.type, packet.timestamp, packet.duration);
			}

			this.decoder.decode(packet.toEncodedVideoChunk());
			this.decodeAlphaData(packet);
		}

		this.currentPacketIndex++;
	}

	decodeAlphaData(packet: EncodedPacket) {
		if (!packet.sideData.alpha || this.mergerCreationFailed) {
			// No alpha side data in the packet, most common case
			this.pushNullAlphaFrame();
			return;
		}

		if (!this.merger) {
			try {
				this.merger = new ColorAlphaMerger();
			} catch (error) {
				console.error('Due to an error, only color data will be decoded.', error);

				this.mergerCreationFailed = true;
				this.decodeAlphaData(packet); // Go again

				return;
			}
		}

		// Check if we need to set up the alpha decoder
		if (!this.alphaDecoder) {
			const alphaHandler = (frame: VideoFrame) => {
				this.alphaDecoderQueueSize--;

				if (this.colorQueue.length > 0) {
					const colorFrame = this.colorQueue.shift();
					assert(colorFrame !== undefined);

					this.mergeAlpha(colorFrame, frame);
				} else {
					this.alphaQueue.push(frame);
				}

				// Check if any null frames have been queued for this point
				this.decodedAlphaChunkCount++;
				while (
					this.nullAlphaFrameQueue.length > 0
					&& this.nullAlphaFrameQueue[0] === this.decodedAlphaChunkCount
				) {
					this.nullAlphaFrameQueue.shift();

					if (this.colorQueue.length > 0) {
						const colorFrame = this.colorQueue.shift();
						assert(colorFrame !== undefined);

						this.mergeAlpha(colorFrame, null);
					} else {
						this.alphaQueue.push(null);
					}
				}
			};

			const stack = new Error('Decoding error').stack;

			this.alphaDecoder = new VideoDecoder({
				output: (frame) => {
					try {
						alphaHandler(frame);
					} catch (error) {
						this.onError(error as Error);
					}
				},
				error: (error) => {
					error.stack = stack; // Provide a more useful stack trace, the default one sucks
					this.onError(error);
				},
			});
			this.alphaDecoder.configure(this.decoderConfig);
		}

		const type = determineVideoPacketType(this.codec, this.decoderConfig, packet.sideData.alpha);

		// Alpha packets might follow a different key frame rhythm than the main packets. Therefore, before we start
		// decoding, we must first find a packet that's actually a key frame. Until then, we treat the image as opaque.
		if (!this.alphaHadKeyframe) {
			this.alphaHadKeyframe = type === 'key';
		}

		if (this.alphaHadKeyframe) {
			// Same RASL skipping logic as for color, unlikely to be hit (since who uses HEVC with separate alpha??) but
			// here for symmetry.
			if (this.codec === 'hevc' && this.currentAlphaPacketIndex > 0 && !this.alphaRaslSkipped) {
				if (this.hasHevcRaslPicture(packet.sideData.alpha)) {
					this.pushNullAlphaFrame();
					return;
				}

				this.alphaRaslSkipped = true;
			}

			this.currentAlphaPacketIndex++;
			this.alphaDecoder.decode(packet.alphaToEncodedVideoChunk(type ?? packet.type));
			this.alphaDecoderQueueSize++;
		} else {
			this.pushNullAlphaFrame();
		}
	}

	pushNullAlphaFrame() {
		if (this.alphaDecoderQueueSize === 0) {
			// Easy
			this.alphaQueue.push(null);
		} else {
			// There are still alpha chunks being decoded, so pushing `null` immediately would result in out-of-order
			// data and be incorrect. Instead, we need to enqueue a "null frame" for when the current decoder workload
			// has finished.
			this.nullAlphaFrameQueue.push(this.decodedAlphaChunkCount + this.alphaDecoderQueueSize);
		}
	}

	/**
	 * If we're using HEVC, we need to make sure to skip any RASL slices that follow a non-IDR key frame such as
	 * CRA_NUT. This is because RASL slices cannot be decoded without data before the CRA_NUT. Browsers behave
	 * differently here: Chromium drops the packets, Safari throws a decoder error. Either way, it's not good
	 * and causes bugs upstream. So, let's take the dropping into our own hands.
	 */
	hasHevcRaslPicture(packetData: Uint8Array) {
		for (const loc of iterateHevcNalUnits(packetData, this.decoderConfig)) {
			const type = extractNalUnitTypeForHevc(packetData[loc.offset]!);
			if (type === HevcNalUnitType.RASL_N || type === HevcNalUnitType.RASL_R) {
				return true;
			}
		}

		return false;
	}

	/** Handler for the WebCodecs VideoDecoder for ironing out browser differences. */
	sampleHandler(sample: VideoSample) {
		if (isWebKit()) {
			// For correct B-frame handling, we don't just hand over the frames directly but instead add them to
			// a queue, because we want to ensure frames are emitted in presentation order. We flush the queue
			// each time we receive a frame with a timestamp larger than the highest we've seen so far, as we
			// can sure that is not a B-frame. Typically, WebCodecs automatically guarantees that frames are
			// emitted in presentation order, but Safari doesn't always follow this rule.
			if (this.sampleQueue.length > 0 && (sample.timestamp >= last(this.sampleQueue)!.timestamp)) {
				for (const sample of this.sampleQueue) {
					this.finalizeAndEmitSample(sample);
				}

				this.sampleQueue.length = 0;
			}

			insertSorted(this.sampleQueue, sample, x => x.timestamp);
		} else {
			// Assign it the next earliest timestamp from the input. We do this because browsers, by spec, are
			// required to emit decoded frames in presentation order *while* retaining the timestamp of their
			// originating EncodedVideoChunk. For files with B-frames but no out-of-order timestamps (like a
			// missing ctts box, for example), this causes a mismatch. We therefore fix the timestamps and
			// ensure they are sorted by doing this.
			const timestamp = this.inputTimestamps.shift();

			// There's no way we'd have more decoded frames than encoded packets we passed in. Actually, the
			// correspondence should be 1:1.
			assert(timestamp !== undefined);

			sample.setTimestamp(timestamp);
			this.finalizeAndEmitSample(sample);
		}
	}

	finalizeAndEmitSample(sample: VideoSample) {
		// Round the timestamps to the time resolution
		sample.setTimestamp(Math.round(sample.timestamp * this.timeResolution) / this.timeResolution);
		sample.setDuration(Math.round(sample.duration * this.timeResolution) / this.timeResolution);
		sample.setRotation(this.rotation);

		this.onSample(sample);
	}

	mergeAlpha(color: VideoFrame, alpha: VideoFrame | null) {
		if (!alpha) {
			// Nothing needs to be merged
			const finalSample = new VideoSample(color);
			this.sampleHandler(finalSample);

			return;
		}

		assert(this.merger);

		this.merger.update(color, alpha);
		color.close();
		alpha.close();

		const finalFrame = new VideoFrame(this.merger.canvas, {
			timestamp: color.timestamp,
			duration: color.duration ?? undefined,
		});

		const finalSample = new VideoSample(finalFrame);
		this.sampleHandler(finalSample);
	}

	async flush() {
		if (this.customDecoder) {
			await this.customDecoderCallSerializer.call(() => this.customDecoder!.flush());
		} else {
			assert(this.decoder);
			await Promise.all([
				this.decoder.flush(),
				this.alphaDecoder?.flush(),
			]);

			this.colorQueue.forEach(x => x.close());
			this.colorQueue.length = 0;
			this.alphaQueue.forEach(x => x?.close());
			this.alphaQueue.length = 0;

			this.alphaHadKeyframe = false;
			this.decodedAlphaChunkCount = 0;
			this.alphaDecoderQueueSize = 0;
			this.nullAlphaFrameQueue.length = 0;
			this.currentAlphaPacketIndex = 0;
			this.alphaRaslSkipped = false;
		}

		if (isWebKit()) {
			for (const sample of this.sampleQueue) {
				this.finalizeAndEmitSample(sample);
			}

			this.sampleQueue.length = 0;
		}

		this.currentPacketIndex = 0;
		this.raslSkipped = false;
	}

	close() {
		if (this.customDecoder) {
			void this.customDecoderCallSerializer.call(() => this.customDecoder!.close());
		} else {
			assert(this.decoder);
			this.decoder.close();
			this.alphaDecoder?.close();

			this.colorQueue.forEach(x => x.close());
			this.colorQueue.length = 0;
			this.alphaQueue.forEach(x => x?.close());
			this.alphaQueue.length = 0;

			this.merger?.close();
		}

		for (const sample of this.sampleQueue) {
			sample.close();
		}
		this.sampleQueue.length = 0;
	}
}

/** Utility class that merges together color and alpha information using simple WebGL 2 shaders. */
class ColorAlphaMerger {
	canvas: OffscreenCanvas | HTMLCanvasElement;
	private gl: WebGL2RenderingContext;
	private program: WebGLProgram;
	private vao: WebGLVertexArrayObject;
	private colorTexture: WebGLTexture;
	private alphaTexture: WebGLTexture;

	constructor() {
		// Canvas will be resized later
		if (typeof OffscreenCanvas !== 'undefined') {
			// Prefer OffscreenCanvas for Worker environments
			this.canvas = new OffscreenCanvas(300, 150);
		} else {
			this.canvas = document.createElement('canvas');
		}

		const gl = this.canvas.getContext('webgl2', {
			premultipliedAlpha: false,
		}) as unknown as WebGL2RenderingContext | null; // Casting because of some TypeScript weirdness
		if (!gl) {
			throw new Error('Couldn\'t acquire WebGL 2 context.');
		}

		this.gl = gl;
		this.program = this.createProgram();
		this.vao = this.createVAO();
		this.colorTexture = this.createTexture();
		this.alphaTexture = this.createTexture();

		this.gl.useProgram(this.program);
		this.gl.uniform1i(this.gl.getUniformLocation(this.program, 'u_colorTexture'), 0);
		this.gl.uniform1i(this.gl.getUniformLocation(this.program, 'u_alphaTexture'), 1);
	}

	private createProgram(): WebGLProgram {
		const vertexShader = this.createShader(this.gl.VERTEX_SHADER, `#version 300 es
			in vec2 a_position;
			in vec2 a_texCoord;
			out vec2 v_texCoord;

			void main() {
				gl_Position = vec4(a_position, 0.0, 1.0);
				v_texCoord = a_texCoord;
			}
		`);

		const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es
			precision highp float;

			uniform sampler2D u_colorTexture;
			uniform sampler2D u_alphaTexture;
			in vec2 v_texCoord;
			out vec4 fragColor;

			void main() {
				vec3 color = texture(u_colorTexture, v_texCoord).rgb;
				float alpha = texture(u_alphaTexture, v_texCoord).r;
				fragColor = vec4(color, alpha);
			}
		`);

		const program = this.gl.createProgram();
		this.gl.attachShader(program, vertexShader);
		this.gl.attachShader(program, fragmentShader);
		this.gl.linkProgram(program);

		return program;
	}

	private createShader(type: number, source: string): WebGLShader {
		const shader = this.gl.createShader(type)!;
		this.gl.shaderSource(shader, source);
		this.gl.compileShader(shader);
		return shader;
	}

	private createVAO(): WebGLVertexArrayObject {
		const vao = this.gl.createVertexArray();
		this.gl.bindVertexArray(vao);

		const vertices = new Float32Array([
			-1, -1, 0, 1,
			1, -1, 1, 1,
			-1, 1, 0, 0,
			1, 1, 1, 0,
		]);

		const buffer = this.gl.createBuffer();
		this.gl.bindBuffer(this.gl.ARRAY_BUFFER, buffer);
		this.gl.bufferData(this.gl.ARRAY_BUFFER, vertices, this.gl.STATIC_DRAW);

		const positionLocation = this.gl.getAttribLocation(this.program, 'a_position');
		const texCoordLocation = this.gl.getAttribLocation(this.program, 'a_texCoord');

		this.gl.enableVertexAttribArray(positionLocation);
		this.gl.vertexAttribPointer(positionLocation, 2, this.gl.FLOAT, false, 16, 0);

		this.gl.enableVertexAttribArray(texCoordLocation);
		this.gl.vertexAttribPointer(texCoordLocation, 2, this.gl.FLOAT, false, 16, 8);

		return vao;
	}

	private createTexture(): WebGLTexture {
		const texture = this.gl.createTexture();

		this.gl.bindTexture(this.gl.TEXTURE_2D, texture);
		this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_S, this.gl.CLAMP_TO_EDGE);
		this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_T, this.gl.CLAMP_TO_EDGE);
		this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MIN_FILTER, this.gl.LINEAR);
		this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MAG_FILTER, this.gl.LINEAR);

		return texture;
	}

	update(color: VideoFrame, alpha: VideoFrame): void {
		if (color.displayWidth !== this.canvas.width || color.displayHeight !== this.canvas.height) {
			this.canvas.width = color.displayWidth;
			this.canvas.height = color.displayHeight;
		}

		this.gl.activeTexture(this.gl.TEXTURE0);
		this.gl.bindTexture(this.gl.TEXTURE_2D, this.colorTexture);
		this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, color);

		this.gl.activeTexture(this.gl.TEXTURE1);
		this.gl.bindTexture(this.gl.TEXTURE_2D, this.alphaTexture);
		this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, alpha);

		this.gl.viewport(0, 0, this.canvas.width, this.canvas.height);
		this.gl.clear(this.gl.COLOR_BUFFER_BIT);

		this.gl.bindVertexArray(this.vao);
		this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4);
	}

	close() {
		this.gl.getExtension('WEBGL_lose_context')?.loseContext();
		this.gl = null as unknown as WebGL2RenderingContext;
	}
}

/**
 * A sink that retrieves decoded video samples (video frames) from a video track.
 * @group Media sinks
 * @public
 */
export class VideoSampleSink extends BaseMediaSampleSink<VideoSample> {
	/** @internal */
	_track: InputVideoTrack;

	/** Creates a new {@link VideoSampleSink} for the given {@link InputVideoTrack}. */
	constructor(videoTrack: InputVideoTrack) {
		if (!(videoTrack instanceof InputVideoTrack)) {
			throw new TypeError('videoTrack must be an InputVideoTrack.');
		}

		super();

		this._track = videoTrack;
	}

	/** @internal */
	async _createDecoder(
		onSample: (sample: VideoSample) => unknown,
		onError: (error: Error) => unknown,
	) {
		if (!(await this._track.canDecode())) {
			throw new Error(
				'This video track cannot be decoded by this browser. Make sure to check decodability before using'
				+ ' a track.',
			);
		}

		const codec = this._track.codec;
		const rotation = this._track.rotation;
		const decoderConfig = await this._track.getDecoderConfig();
		const timeResolution = this._track.timeResolution;
		assert(codec && decoderConfig);

		return new VideoDecoderWrapper(onSample, onError, codec, decoderConfig, rotation, timeResolution);
	}

	/** @internal */
	_createPacketSink() {
		return new EncodedPacketSink(this._track);
	}

	/**
	 * Retrieves the video sample (frame) corresponding to the given timestamp, in seconds. More specifically, returns
	 * the last video sample (in presentation order) with a start timestamp less than or equal to the given timestamp.
	 * Returns null if the timestamp is before the track's first timestamp.
	 *
	 * @param timestamp - The timestamp used for retrieval, in seconds.
	 */
	async getSample(timestamp: number) {
		validateTimestamp(timestamp);

		for await (const sample of this.mediaSamplesAtTimestamps([timestamp])) {
			return sample;
		}
		throw new Error('Internal error: Iterator returned nothing.');
	}

	/**
	 * Creates an async iterator that yields the video samples (frames) of this track in presentation order. This method
	 * will intelligently pre-decode a few frames ahead to enable fast iteration.
	 *
	 * @param startTimestamp - The timestamp in seconds at which to start yielding samples (inclusive).
	 * @param endTimestamp - The timestamp in seconds at which to stop yielding samples (exclusive).
	 */
	samples(startTimestamp = 0, endTimestamp = Infinity) {
		return this.mediaSamplesInRange(startTimestamp, endTimestamp);
	}

	/**
	 * Creates an async iterator that yields a video sample (frame) for each timestamp in the argument. This method
	 * uses an optimized decoding pipeline if these timestamps are monotonically sorted, decoding each packet at most
	 * once, and is therefore more efficient than manually getting the sample for every timestamp. The iterator may
	 * yield null if no frame is available for a given timestamp.
	 *
	 * @param timestamps - An iterable or async iterable of timestamps in seconds.
	 */
	samplesAtTimestamps(timestamps: AnyIterable<number>) {
		return this.mediaSamplesAtTimestamps(timestamps);
	}
}

/**
 * A canvas with additional timing information (timestamp & duration).
 * @group Media sinks
 * @public
 */
export type WrappedCanvas = {
	/** A canvas element or offscreen canvas. */
	canvas: HTMLCanvasElement | OffscreenCanvas;
	/** The timestamp of the corresponding video sample, in seconds. */
	timestamp: number;
	/** The duration of the corresponding video sample, in seconds. */
	duration: number;
};

/**
 * Options for constructing a CanvasSink.
 * @group Media sinks
 * @public
 */
export type CanvasSinkOptions = {
	/**
	 * Whether the output canvases should have transparency instead of a black background. Defaults to `false`. Set
	 * this to `true` when using this sink to read transparent videos.
	 */
	alpha?: boolean;
	/**
	 * The width of the output canvas in pixels, defaulting to the display width of the video track. If height is not
	 * set, it will be deduced automatically based on aspect ratio.
	 */
	width?: number;
	/**
	 * The height of the output canvas in pixels, defaulting to the display height of the video track. If width is not
	 * set, it will be deduced automatically based on aspect ratio.
	 */
	height?: number;
	/**
	 * The fitting algorithm in case both width and height are set.
	 *
	 * - `'fill'` will stretch the image to fill the entire box, potentially altering aspect ratio.
	 * - `'contain'` will contain the entire image within the box while preserving aspect ratio. This may lead to
	 * letterboxing.
	 * - `'cover'` will scale the image until the entire box is filled, while preserving aspect ratio.
	 */
	fit?: 'fill' | 'contain' | 'cover';
	/**
	 * The clockwise rotation by which to rotate the raw video frame. Defaults to the rotation set in the file metadata.
	 * Rotation is applied before resizing.
	 */
	rotation?: Rotation;
	/**
	 * Specifies the rectangular region of the input video to crop to. The crop region will automatically be clamped to
	 * the dimensions of the input video track. Cropping is performed after rotation but before resizing.
	 */
	crop?: CropRectangle;
	/**
	 * When set, specifies the number of canvases in the pool. These canvases will be reused in a ring buffer /
	 * round-robin type fashion. This keeps the amount of allocated VRAM constant and relieves the browser from
	 * constantly allocating/deallocating canvases. A pool size of 0 or `undefined` disables the pool and means a new
	 * canvas is created each time.
	 */
	poolSize?: number;
};

/**
 * A sink that renders video samples (frames) of the given video track to canvases. This is often more useful than
 * directly retrieving frames, as it comes with common preprocessing steps such as resizing or applying rotation
 * metadata.
 *
 * This sink will yield `HTMLCanvasElement`s when in a DOM context, and `OffscreenCanvas`es otherwise.
 *
 * @group Media sinks
 * @public
 */
export class CanvasSink {
	/** @internal */
	_videoTrack: InputVideoTrack;
	/** @internal */
	_alpha: boolean;
	/** @internal */
	_width: number;
	/** @internal */
	_height: number;
	/** @internal */
	_fit: 'fill' | 'contain' | 'cover';
	/** @internal */
	_rotation: Rotation;
	/** @internal */
	_crop?: { left: number; top: number; width: number; height: number };
	/** @internal */
	_videoSampleSink: VideoSampleSink;
	/** @internal */
	_canvasPool: (HTMLCanvasElement | OffscreenCanvas | null)[];
	/** @internal */
	_nextCanvasIndex = 0;

	/** Creates a new {@link CanvasSink} for the given {@link InputVideoTrack}. */
	constructor(videoTrack: InputVideoTrack, options: CanvasSinkOptions = {}) {
		if (!(videoTrack instanceof InputVideoTrack)) {
			throw new TypeError('videoTrack must be an InputVideoTrack.');
		}
		if (options && typeof options !== 'object') {
			throw new TypeError('options must be an object.');
		}
		if (options.alpha !== undefined && typeof options.alpha !== 'boolean') {
			throw new TypeError('options.alpha, when provided, must be a boolean.');
		}
		if (options.width !== undefined && (!Number.isInteger(options.width) || options.width <= 0)) {
			throw new TypeError('options.width, when defined, must be a positive integer.');
		}
		if (options.height !== undefined && (!Number.isInteger(options.height) || options.height <= 0)) {
			throw new TypeError('options.height, when defined, must be a positive integer.');
		}
		if (options.fit !== undefined && !['fill', 'contain', 'cover'].includes(options.fit)) {
			throw new TypeError('options.fit, when provided, must be one of "fill", "contain", or "cover".');
		}
		if (
			options.width !== undefined
			&& options.height !== undefined
			&& options.fit === undefined
		) {
			throw new TypeError(
				'When both options.width and options.height are provided, options.fit must also be provided.',
			);
		}
		if (options.rotation !== undefined && ![0, 90, 180, 270].includes(options.rotation)) {
			throw new TypeError('options.rotation, when provided, must be 0, 90, 180 or 270.');
		}
		if (options.crop !== undefined) {
			validateCropRectangle(options.crop, 'options.');
		}
		if (
			options.poolSize !== undefined
			&& (typeof options.poolSize !== 'number' || !Number.isInteger(options.poolSize) || options.poolSize < 0)
		) {
			throw new TypeError('poolSize must be a non-negative integer.');
		}

		const rotation = options.rotation ?? videoTrack.rotation;

		const [rotatedWidth, rotatedHeight] = rotation % 180 === 0
			? [videoTrack.codedWidth, videoTrack.codedHeight]
			: [videoTrack.codedHeight, videoTrack.codedWidth];

		const crop = options.crop;
		if (crop) {
			clampCropRectangle(crop, rotatedWidth, rotatedHeight);
		}

		let [width, height] = crop
			? [crop.width, crop.height]
			: [rotatedWidth, rotatedHeight];
		const originalAspectRatio = width / height;

		// If width and height aren't defined together, deduce the missing value using the aspect ratio
		if (options.width !== undefined && options.height === undefined) {
			width = options.width;
			height = Math.round(width / originalAspectRatio);
		} else if (options.width === undefined && options.height !== undefined) {
			height = options.height;
			width = Math.round(height * originalAspectRatio);
		} else if (options.width !== undefined && options.height !== undefined) {
			width = options.width;
			height = options.height;
		}

		this._videoTrack = videoTrack;
		this._alpha = options.alpha ?? false;
		this._width = width;
		this._height = height;
		this._rotation = rotation;
		this._crop = crop;
		this._fit = options.fit ?? 'fill';
		this._videoSampleSink = new VideoSampleSink(videoTrack);
		this._canvasPool = Array.from({ length: options.poolSize ?? 0 }, () => null);
	}

	/** @internal */
	_videoSampleToWrappedCanvas(sample: VideoSample): WrappedCanvas {
		let canvas = this._canvasPool[this._nextCanvasIndex];
		let canvasIsNew = false;

		if (!canvas) {
			if (typeof document !== 'undefined') {
				// Prefer an HTMLCanvasElement
				canvas = document.createElement('canvas');
				canvas.width = this._width;
				canvas.height = this._height;
			} else {
				canvas = new OffscreenCanvas(this._width, this._height);
			}

			if (this._canvasPool.length > 0) {
				this._canvasPool[this._nextCanvasIndex] = canvas;
			}

			canvasIsNew = true;
		}

		if (this._canvasPool.length > 0) {
			this._nextCanvasIndex = (this._nextCanvasIndex + 1) % this._canvasPool.length;
		}

		const context = canvas.getContext('2d', {
			alpha: this._alpha || isFirefox(), // Firefox has VideoFrame glitches with opaque canvases
		}) as CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D;
		assert(context);

		context.resetTransform();

		if (!canvasIsNew) {
			if (!this._alpha && isFirefox()) {
				context.fillStyle = 'black';
				context.fillRect(0, 0, this._width, this._height);
			} else {
				context.clearRect(0, 0, this._width, this._height);
			}
		}

		sample.drawWithFit(context, {
			fit: this._fit,
			rotation: this._rotation,
			crop: this._crop,
		});

		const result = {
			canvas,
			timestamp: sample.timestamp,
			duration: sample.duration,
		};

		sample.close();
		return result;
	}

	/**
	 * Retrieves a canvas with the video frame corresponding to the given timestamp, in seconds. More specifically,
	 * returns the last video frame (in presentation order) with a start timestamp less than or equal to the given
	 * timestamp. Returns null if the timestamp is before the track's first timestamp.
	 *
	 * @param timestamp - The timestamp used for retrieval, in seconds.
	 */
	async getCanvas(timestamp: number) {
		validateTimestamp(timestamp);

		const sample = await this._videoSampleSink.getSample(timestamp);
		return sample && this._videoSampleToWrappedCanvas(sample);
	}

	/**
	 * Creates an async iterator that yields canvases with the video frames of this track in presentation order. This
	 * method will intelligently pre-decode a few frames ahead to enable fast iteration.
	 *
	 * @param startTimestamp - The timestamp in seconds at which to start yielding canvases (inclusive).
	 * @param endTimestamp - The timestamp in seconds at which to stop yielding canvases (exclusive).
	 */
	canvases(startTimestamp = 0, endTimestamp = Infinity) {
		return mapAsyncGenerator(
			this._videoSampleSink.samples(startTimestamp, endTimestamp),
			sample => this._videoSampleToWrappedCanvas(sample),
		);
	}

	/**
	 * Creates an async iterator that yields a canvas for each timestamp in the argument. This method uses an optimized
	 * decoding pipeline if these timestamps are monotonically sorted, decoding each packet at most once, and is
	 * therefore more efficient than manually getting the canvas for every timestamp. The iterator may yield null if
	 * no frame is available for a given timestamp.
	 *
	 * @param timestamps - An iterable or async iterable of timestamps in seconds.
	 */
	canvasesAtTimestamps(timestamps: AnyIterable<number>) {
		return mapAsyncGenerator(
			this._videoSampleSink.samplesAtTimestamps(timestamps),
			sample => sample && this._videoSampleToWrappedCanvas(sample),
		);
	}
}

class AudioDecoderWrapper extends DecoderWrapper<AudioSample> {
	decoder: AudioDecoder | null = null;

	customDecoder: CustomAudioDecoder | null = null;
	customDecoderCallSerializer = new CallSerializer();
	customDecoderQueueSize = 0;

	// Internal state to accumulate a precise current timestamp based on audio durations, not the (potentially
	// inaccurate) packet timestamps.
	currentTimestamp: number | null = null;

	constructor(
		onSample: (sample: AudioSample) => unknown,
		onError: (error: Error) => unknown,
		codec: AudioCodec,
		decoderConfig: AudioDecoderConfig,
	) {
		super(onSample, onError);

		const sampleHandler = (sample: AudioSample) => {
			if (
				this.currentTimestamp === null
				|| Math.abs(sample.timestamp - this.currentTimestamp) >= sample.duration
			) {
				// We need to sync with the sample timestamp again
				this.currentTimestamp = sample.timestamp;
			}

			const preciseTimestamp = this.currentTimestamp;
			this.currentTimestamp += sample.duration;

			if (sample.numberOfFrames === 0) {
				// We skip zero-data (empty) AudioSamples. These are sometimes emitted, for example, by Firefox when it
				// decodes Vorbis (at the start).
				sample.close();
				return;
			}

			// Round the timestamp to the sample rate
			const sampleRate = decoderConfig.sampleRate;
			sample.setTimestamp(Math.round(preciseTimestamp * sampleRate) / sampleRate);

			onSample(sample);
		};

		const MatchingCustomDecoder = customAudioDecoders.find(x => x.supports(codec, decoderConfig));
		if (MatchingCustomDecoder) {
			// @ts-expect-error "Can't create instance of abstract class 🤓"
			this.customDecoder = new MatchingCustomDecoder() as CustomAudioDecoder;
			// @ts-expect-error It's technically readonly
			this.customDecoder.codec = codec;
			// @ts-expect-error It's technically readonly
			this.customDecoder.config = decoderConfig;
			// @ts-expect-error It's technically readonly
			this.customDecoder.onSample = (sample) => {
				if (!(sample instanceof AudioSample)) {
					throw new TypeError('The argument passed to onSample must be an AudioSample.');
				}

				sampleHandler(sample);
			};

			void this.customDecoderCallSerializer.call(() => this.customDecoder!.init());
		} else {
			const stack = new Error('Decoding error').stack;

			this.decoder = new AudioDecoder({
				output: (data) => {
					try {
						sampleHandler(new AudioSample(data));
					} catch (error) {
						this.onError(error as Error);
					}
				},
				error: (error) => {
					error.stack = stack; // Provide a more useful stack trace, the default one sucks
					this.onError(error);
				},
			});
			this.decoder.configure(decoderConfig);
		}
	}

	getDecodeQueueSize() {
		if (this.customDecoder) {
			return this.customDecoderQueueSize;
		} else {
			assert(this.decoder);
			return this.decoder.decodeQueueSize;
		}
	}

	decode(packet: EncodedPacket) {
		if (this.customDecoder) {
			this.customDecoderQueueSize++;
			void this.customDecoderCallSerializer
				.call(() => this.customDecoder!.decode(packet))
				.then(() => this.customDecoderQueueSize--);
		} else {
			assert(this.decoder);
			this.decoder.decode(packet.toEncodedAudioChunk());
		}
	}

	flush() {
		if (this.customDecoder) {
			return this.customDecoderCallSerializer.call(() => this.customDecoder!.flush());
		} else {
			assert(this.decoder);
			return this.decoder.flush();
		}
	}

	close() {
		if (this.customDecoder) {
			void this.customDecoderCallSerializer.call(() => this.customDecoder!.close());
		} else {
			assert(this.decoder);
			this.decoder.close();
		}
	}
}

// There are a lot of PCM variants not natively supported by the browser and by AudioData. Therefore we need a simple
// decoder that maps any input PCM format into a PCM format supported by the browser.
class PcmAudioDecoderWrapper extends DecoderWrapper<AudioSample> {
	codec: PcmAudioCodec;

	inputSampleSize: 1 | 2 | 3 | 4 | 8;
	readInputValue: (view: DataView, byteOffset: number) => number;

	outputSampleSize: 1 | 2 | 4;
	outputFormat: 'u8' | 's16' | 's32' | 'f32';
	writeOutputValue: (view: DataView, byteOffset: number, value: number) => void;

	// Internal state to accumulate a precise current timestamp based on audio durations, not the (potentially
	// inaccurate) packet timestamps.
	currentTimestamp: number | null = null;

	constructor(
		onSample: (sample: AudioSample) => unknown,
		onError: (error: Error) => unknown,
		public decoderConfig: AudioDecoderConfig,
	) {
		super(onSample, onError);

		assert((PCM_AUDIO_CODECS as readonly string[]).includes(decoderConfig.codec));
		this.codec = decoderConfig.codec as PcmAudioCodec;

		const { dataType, sampleSize, littleEndian } = parsePcmCodec(this.codec);
		this.inputSampleSize = sampleSize;

		switch (sampleSize) {
			case 1: {
				if (dataType === 'unsigned') {
					this.readInputValue = (view, byteOffset) => view.getUint8(byteOffset) - 2 ** 7;
				} else if (dataType === 'signed') {
					this.readInputValue = (view, byteOffset) => view.getInt8(byteOffset);
				} else if (dataType === 'ulaw') {
					this.readInputValue = (view, byteOffset) => fromUlaw(view.getUint8(byteOffset));
				} else if (dataType === 'alaw') {
					this.readInputValue = (view, byteOffset) => fromAlaw(view.getUint8(byteOffset));
				} else {
					assert(false);
				}
			}; break;
			case 2: {
				if (dataType === 'unsigned') {
					this.readInputValue = (view, byteOffset) => view.getUint16(byteOffset, littleEndian) - 2 ** 15;
				} else if (dataType === 'signed') {
					this.readInputValue = (view, byteOffset) => view.getInt16(byteOffset, littleEndian);
				} else {
					assert(false);
				}
			}; break;
			case 3: {
				if (dataType === 'unsigned') {
					this.readInputValue = (view, byteOffset) => getUint24(view, byteOffset, littleEndian) - 2 ** 23;
				} else if (dataType === 'signed') {
					this.readInputValue = (view, byteOffset) => getInt24(view, byteOffset, littleEndian);
				} else {
					assert(false);
				}
			}; break;
			case 4: {
				if (dataType === 'unsigned') {
					this.readInputValue = (view, byteOffset) => view.getUint32(byteOffset, littleEndian) - 2 ** 31;
				} else if (dataType === 'signed') {
					this.readInputValue = (view, byteOffset) => view.getInt32(byteOffset, littleEndian);
				} else if (dataType === 'float') {
					this.readInputValue = (view, byteOffset) => view.getFloat32(byteOffset, littleEndian);
				} else {
					assert(false);
				}
			}; break;
			case 8: {
				if (dataType === 'float') {
					this.readInputValue = (view, byteOffset) => view.getFloat64(byteOffset, littleEndian);
				} else {
					assert(false);
				}
			}; break;
			default: {
				assertNever(sampleSize);
				assert(false);
			};
		}

		switch (sampleSize) {
			case 1: {
				if (dataType === 'ulaw' || dataType === 'alaw') {
					this.outputSampleSize = 2;
					this.outputFormat = 's16';
					this.writeOutputValue = (view, byteOffset, value) => view.setInt16(byteOffset, value, true);
				} else {
					this.outputSampleSize = 1;
					this.outputFormat = 'u8';
					this.writeOutputValue = (view, byteOffset, value) => view.setUint8(byteOffset, value + 2 ** 7);
				}
			}; break;
			case 2: {
				this.outputSampleSize = 2;
				this.outputFormat = 's16';
				this.writeOutputValue = (view, byteOffset, value) => view.setInt16(byteOffset, value, true);
			}; break;
			case 3: {
				this.outputSampleSize = 4;
				this.outputFormat = 's32';
				// From https://www.w3.org/TR/webcodecs:
				// AudioData containing 24-bit samples SHOULD store those samples in s32 or f32. When samples are
				// stored in s32, each sample MUST be left-shifted by 8 bits.
				this.writeOutputValue = (view, byteOffset, value) => view.setInt32(byteOffset, value << 8, true);
			}; break;
			case 4: {
				this.outputSampleSize = 4;

				if (dataType === 'float') {
					this.outputFormat = 'f32';
					this.writeOutputValue = (view, byteOffset, value) => view.setFloat32(byteOffset, value, true);
				} else {
					this.outputFormat = 's32';
					this.writeOutputValue = (view, byteOffset, value) => view.setInt32(byteOffset, value, true);
				}
			}; break;
			case 8: {
				this.outputSampleSize = 4;

				this.outputFormat = 'f32';
				this.writeOutputValue = (view, byteOffset, value) => view.setFloat32(byteOffset, value, true);
			}; break;
			default: {
				assertNever(sampleSize);
				assert(false);
			};
		};
	}

	getDecodeQueueSize() {
		return 0;
	}

	decode(packet: EncodedPacket) {
		const inputView = toDataView(packet.data);

		const numberOfFrames = packet.byteLength / this.decoderConfig.numberOfChannels / this.inputSampleSize;

		const outputBufferSize = numberOfFrames * this.decoderConfig.numberOfChannels * this.outputSampleSize;
		const outputBuffer = new ArrayBuffer(outputBufferSize);
		const outputView = new DataView(outputBuffer);

		for (let i = 0; i < numberOfFrames * this.decoderConfig.numberOfChannels; i++) {
			const inputIndex = i * this.inputSampleSize;
			const outputIndex = i * this.outputSampleSize;

			const value = this.readInputValue(inputView, inputIndex);
			this.writeOutputValue(outputView, outputIndex, value);
		}

		const preciseDuration = numberOfFrames / this.decoderConfig.sampleRate;
		if (this.currentTimestamp === null || Math.abs(packet.timestamp - this.currentTimestamp) >= preciseDuration) {
			// We need to sync with the packet timestamp again
			this.currentTimestamp = packet.timestamp;
		}

		const preciseTimestamp = this.currentTimestamp;
		this.currentTimestamp += preciseDuration;

		const audioSample = new AudioSample({
			format: this.outputFormat,
			data: outputBuffer,
			numberOfChannels: this.decoderConfig.numberOfChannels,
			sampleRate: this.decoderConfig.sampleRate,
			numberOfFrames,
			timestamp: preciseTimestamp,
		});

		this.onSample(audioSample);
	}

	async flush() {
		// Do nothing
	}

	close() {
		// Do nothing
	}
}

/**
 * Sink for retrieving decoded audio samples from an audio track.
 * @group Media sinks
 * @public
 */
export class AudioSampleSink extends BaseMediaSampleSink<AudioSample> {
	/** @internal */
	_track: InputAudioTrack;

	/** Creates a new {@link AudioSampleSink} for the given {@link InputAudioTrack}. */
	constructor(audioTrack: InputAudioTrack) {
		if (!(audioTrack instanceof InputAudioTrack)) {
			throw new TypeError('audioTrack must be an InputAudioTrack.');
		}

		super();

		this._track = audioTrack;
	}

	/** @internal */
	async _createDecoder(
		onSample: (sample: AudioSample) => unknown,
		onError: (error: Error) => unknown,
	) {
		if (!(await this._track.canDecode())) {
			throw new Error(
				'This audio track cannot be decoded by this browser. Make sure to check decodability before using'
				+ ' a track.',
			);
		}

		const codec = this._track.codec;
		const decoderConfig = await this._track.getDecoderConfig();
		assert(codec && decoderConfig);

		if ((PCM_AUDIO_CODECS as readonly string[]).includes(decoderConfig.codec)) {
			return new PcmAudioDecoderWrapper(onSample, onError, decoderConfig);
		} else {
			return new AudioDecoderWrapper(onSample, onError, codec, decoderConfig);
		}
	}

	/** @internal */
	_createPacketSink() {
		return new EncodedPacketSink(this._track);
	}

	/**
	 * Retrieves the audio sample corresponding to the given timestamp, in seconds. More specifically, returns
	 * the last audio sample (in presentation order) with a start timestamp less than or equal to the given timestamp.
	 * Returns null if the timestamp is before the track's first timestamp.
	 *
	 * @param timestamp - The timestamp used for retrieval, in seconds.
	 */
	async getSample(timestamp: number) {
		validateTimestamp(timestamp);

		for await (const sample of this.mediaSamplesAtTimestamps([timestamp])) {
			return sample;
		}
		throw new Error('Internal error: Iterator returned nothing.');
	}

	/**
	 * Creates an async iterator that yields the audio samples of this track in presentation order. This method
	 * will intelligently pre-decode a few samples ahead to enable fast iteration.
	 *
	 * @param startTimestamp - The timestamp in seconds at which to start yielding samples (inclusive).
	 * @param endTimestamp - The timestamp in seconds at which to stop yielding samples (exclusive).
	 */
	samples(startTimestamp = 0, endTimestamp = Infinity) {
		return this.mediaSamplesInRange(startTimestamp, endTimestamp);
	}

	/**
	 * Creates an async iterator that yields an audio sample for each timestamp in the argument. This method
	 * uses an optimized decoding pipeline if these timestamps are monotonically sorted, decoding each packet at most
	 * once, and is therefore more efficient than manually getting the sample for every timestamp. The iterator may
	 * yield null if no sample is available for a given timestamp.
	 *
	 * @param timestamps - An iterable or async iterable of timestamps in seconds.
	 */
	samplesAtTimestamps(timestamps: AnyIterable<number>) {
		return this.mediaSamplesAtTimestamps(timestamps);
	}
}

/**
 * An AudioBuffer with additional timing information (timestamp & duration).
 * @group Media sinks
 * @public
 */
export type WrappedAudioBuffer = {
	/** An AudioBuffer. */
	buffer: AudioBuffer;
	/** The timestamp of the corresponding audio sample, in seconds. */
	timestamp: number;
	/** The duration of the corresponding audio sample, in seconds. */
	duration: number;
};

/**
 * A sink that retrieves decoded audio samples from an audio track and converts them to `AudioBuffer` instances. This is
 * often more useful than directly retrieving audio samples, as audio buffers can be directly used with the
 * Web Audio API.
 * @group Media sinks
 * @public
 */
export class AudioBufferSink {
	/** @internal */
	_audioSampleSink: AudioSampleSink;

	/** Creates a new {@link AudioBufferSink} for the given {@link InputAudioTrack}. */
	constructor(audioTrack: InputAudioTrack) {
		if (!(audioTrack instanceof InputAudioTrack)) {
			throw new TypeError('audioTrack must be an InputAudioTrack.');
		}

		this._audioSampleSink = new AudioSampleSink(audioTrack);
	}

	/** @internal */
	_audioSampleToWrappedArrayBuffer(sample: AudioSample): WrappedAudioBuffer {
		const result: WrappedAudioBuffer = {
			buffer: sample.toAudioBuffer(),
			timestamp: sample.timestamp,
			duration: sample.duration,
		};

		sample.close();
		return result;
	}

	/**
	 * Retrieves the audio buffer corresponding to the given timestamp, in seconds. More specifically, returns
	 * the last audio buffer (in presentation order) with a start timestamp less than or equal to the given timestamp.
	 * Returns null if the timestamp is before the track's first timestamp.
	 *
	 * @param timestamp - The timestamp used for retrieval, in seconds.
	 */
	async getBuffer(timestamp: number) {
		validateTimestamp(timestamp);

		const data = await this._audioSampleSink.getSample(timestamp);
		return data && this._audioSampleToWrappedArrayBuffer(data);
	}

	/**
	 * Creates an async iterator that yields audio buffers of this track in presentation order. This method
	 * will intelligently pre-decode a few buffers ahead to enable fast iteration.
	 *
	 * @param startTimestamp - The timestamp in seconds at which to start yielding buffers (inclusive).
	 * @param endTimestamp - The timestamp in seconds at which to stop yielding buffers (exclusive).
	 */
	buffers(startTimestamp = 0, endTimestamp = Infinity) {
		return mapAsyncGenerator(
			this._audioSampleSink.samples(startTimestamp, endTimestamp),
			data => this._audioSampleToWrappedArrayBuffer(data),
		);
	}

	/**
	 * Creates an async iterator that yields an audio buffer for each timestamp in the argument. This method
	 * uses an optimized decoding pipeline if these timestamps are monotonically sorted, decoding each packet at most
	 * once, and is therefore more efficient than manually getting the buffer for every timestamp. The iterator may
	 * yield null if no buffer is available for a given timestamp.
	 *
	 * @param timestamps - An iterable or async iterable of timestamps in seconds.
	 */
	buffersAtTimestamps(timestamps: AnyIterable<number>) {
		return mapAsyncGenerator(
			this._audioSampleSink.samplesAtTimestamps(timestamps),
			data => data && this._audioSampleToWrappedArrayBuffer(data),
		);
	}
}