/*! * Copyright (c) 2026-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { AUDIO_CODECS, AudioCodec, buildAacAudioSpecificConfig, parseAacAudioSpecificConfig, parsePcmCodec, PCM_AUDIO_CODECS, PcmAudioCodec, SUBTITLE_CODECS, SubtitleCodec, VIDEO_CODECS, VideoCodec, } from './codec'; import { OutputAudioTrack, OutputSubtitleTrack, OutputTrack, OutputVideoTrack } from './output'; import { assert, assertNever, CallSerializer, clamp, isFirefox, last, promiseWithResolvers, setInt24, setUint24, toUint8Array, } from './misc'; import { Muxer } from './muxer'; import { SubtitleParser } from './subtitles'; import { toAlaw, toUlaw } from './pcm'; import { CustomVideoEncoder, CustomAudioEncoder, customVideoEncoders, customAudioEncoders, } from './custom-coder'; import { EncodedPacket, EncodedPacketSideData } from './packet'; import { AudioSample, VideoSample } from './sample'; import { AudioEncodingConfig, buildAudioEncoderConfig, buildVideoEncoderConfig, validateAudioEncodingConfig, validateVideoEncodingConfig, VideoEncodingConfig, } from './encode'; /** * Base class for media sources. Media sources are used to add media samples to an output file. * @group Media sources * @public */ export abstract class MediaSource { /** @internal */ _connectedTrack: OutputTrack | null = null; /** @internal */ _closingPromise: Promise | null = null; /** @internal */ _closed = false; /** * @internal * A time offset in seconds that is added to all timestamps generated by this source. */ _timestampOffset = 0; /** @internal */ _ensureValidAdd() { if (!this._connectedTrack) { throw new Error('Source is not connected to an output track.'); } if (this._connectedTrack.output.state === 'canceled') { throw new Error('Output has been canceled.'); } if (this._connectedTrack.output.state === 'finalizing' || this._connectedTrack.output.state === 'finalized') { throw new Error('Output has been finalized.'); } if (this._connectedTrack.output.state === 'pending') { throw new Error('Output has not started.'); } if (this._closed) { throw new Error('Source is closed.'); } } /** @internal */ async _start() {} /** @internal */ // eslint-disable-next-line @typescript-eslint/no-unused-vars async _flushAndClose(forceClose: boolean) {} /** * Closes this source. This prevents future samples from being added and signals to the output file that no further * samples will come in for this track. Calling `.close()` is optional but recommended after adding the * last sample - for improved performance and reduced memory usage. */ close() { if (this._closingPromise) { return; } const connectedTrack = this._connectedTrack; if (!connectedTrack) { throw new Error('Cannot call close without connecting the source to an output track.'); } if (connectedTrack.output.state === 'pending') { throw new Error('Cannot call close before output has been started.'); } this._closingPromise = (async () => { await this._flushAndClose(false); this._closed = true; if (connectedTrack.output.state === 'finalizing' || connectedTrack.output.state === 'finalized') { return; } connectedTrack.output._muxer.onTrackClose(connectedTrack); })(); } /** @internal */ async _flushOrWaitForOngoingClose(forceClose: boolean) { return this._closingPromise ??= (async () => { await this._flushAndClose(forceClose); this._closed = true; })(); } } /** * Base class for video sources - sources for video tracks. * @group Media sources * @public */ export abstract class VideoSource extends MediaSource { /** @internal */ override _connectedTrack: OutputVideoTrack | null = null; /** @internal */ _codec: VideoCodec; /** Internal constructor. */ constructor(codec: VideoCodec) { super(); if (!VIDEO_CODECS.includes(codec)) { throw new TypeError(`Invalid video codec '${codec}'. Must be one of: ${VIDEO_CODECS.join(', ')}.`); } this._codec = codec; } } /** * The most basic video source; can be used to directly pipe encoded packets into the output file. * @group Media sources * @public */ export class EncodedVideoPacketSource extends VideoSource { /** Creates a new {@link EncodedVideoPacketSource} whose packets are encoded using `codec`. */ constructor(codec: VideoCodec) { super(codec); } /** * Adds an encoded packet to the output video track. Packets must be added in *decode order*, while a packet's * timestamp must be its *presentation timestamp*. B-frames are handled automatically. * * @param meta - Additional metadata from the encoder. You should pass this for the first call, including a valid * decoder config. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(packet: EncodedPacket, meta?: EncodedVideoChunkMetadata) { if (!(packet instanceof EncodedPacket)) { throw new TypeError('packet must be an EncodedPacket.'); } if (packet.isMetadataOnly) { throw new TypeError('Metadata-only packets cannot be added.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('meta, when provided, must be an object.'); } this._ensureValidAdd(); return this._connectedTrack!.output._muxer.addEncodedVideoPacket(this._connectedTrack!, packet, meta); } } class VideoEncoderWrapper { private ensureEncoderPromise: Promise | null = null; private encoderInitialized = false; private encoder: VideoEncoder | null = null; private muxer: Muxer | null = null; private lastMultipleOfKeyFrameInterval = -1; private codedWidth: number | null = null; private codedHeight: number | null = null; private resizeCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; private customEncoder: CustomVideoEncoder | null = null; private customEncoderCallSerializer = new CallSerializer(); private customEncoderQueueSize = 0; // Alpha stuff private alphaEncoder: VideoEncoder | null = null; private splitter: ColorAlphaSplitter | null = null; private splitterCreationFailed = false; private alphaFrameQueue: (VideoFrame | null)[] = []; /** * Encoders typically throw their errors "out of band", meaning asynchronously in some other execution context. * However, we want to surface these errors to the user within the normal control flow, so they don't go uncaught. * So, we keep track of the encoder error and throw it as soon as we get the chance. */ private error: Error | null = null; private errorNeedsNewStack = true; constructor(private source: VideoSource, private encodingConfig: VideoEncodingConfig) {} async add(videoSample: VideoSample, shouldClose: boolean, encodeOptions?: VideoEncoderEncodeOptions) { try { this.checkForEncoderError(); this.source._ensureValidAdd(); // Ensure video sample size remains constant if (this.codedWidth !== null && this.codedHeight !== null) { if (videoSample.codedWidth !== this.codedWidth || videoSample.codedHeight !== this.codedHeight) { const sizeChangeBehavior = this.encodingConfig.sizeChangeBehavior ?? 'deny'; if (sizeChangeBehavior === 'passThrough') { // Do nada } else if (sizeChangeBehavior === 'deny') { throw new Error( `Video sample size must remain constant. Expected ${this.codedWidth}x${this.codedHeight},` + ` got ${videoSample.codedWidth}x${videoSample.codedHeight}. To allow the sample size to` + ` change over time, set \`sizeChangeBehavior\` to a value other than 'strict' in the` + ` encoding options.`, ); } else { let canvasIsNew = false; if (!this.resizeCanvas) { if (typeof document !== 'undefined') { // Prefer an HTMLCanvasElement this.resizeCanvas = document.createElement('canvas'); this.resizeCanvas.width = this.codedWidth; this.resizeCanvas.height = this.codedHeight; } else { this.resizeCanvas = new OffscreenCanvas(this.codedWidth, this.codedHeight); } canvasIsNew = true; } const context = this.resizeCanvas.getContext('2d', { alpha: isFirefox(), // Firefox has VideoFrame glitches with opaque canvases }) as CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D; assert(context); if (!canvasIsNew) { if (isFirefox()) { context.fillStyle = 'black'; context.fillRect(0, 0, this.codedWidth, this.codedHeight); } else { context.clearRect(0, 0, this.codedWidth, this.codedHeight); } } videoSample.drawWithFit(context, { fit: sizeChangeBehavior }); if (shouldClose) { videoSample.close(); } videoSample = new VideoSample(this.resizeCanvas, { timestamp: videoSample.timestamp, duration: videoSample.duration, rotation: videoSample.rotation, }); shouldClose = true; } } } else { this.codedWidth = videoSample.codedWidth; this.codedHeight = videoSample.codedHeight; } if (!this.encoderInitialized) { if (!this.ensureEncoderPromise) { this.ensureEncoder(videoSample); } // No, this "if" statement is not useless. Sometimes, the above call to `ensureEncoder` might have // synchronously completed and the encoder is already initialized. In this case, we don't need to await // the promise anymore. This also fixes nasty async race condition bugs when multiple code paths are // calling this method: It's important that the call that initialized the encoder go through this // code first. if (!this.encoderInitialized) { await this.ensureEncoderPromise; } } assert(this.encoderInitialized); const keyFrameInterval = this.encodingConfig.keyFrameInterval ?? 5; const multipleOfKeyFrameInterval = Math.floor(videoSample.timestamp / keyFrameInterval); // Ensure a key frame every keyFrameInterval seconds. It is important that all video tracks follow the same // "key frame" rhythm, because aligned key frames are required to start new fragments in ISOBMFF or clusters // in Matroska (or at least desirable). const finalEncodeOptions = { ...encodeOptions, keyFrame: encodeOptions?.keyFrame || keyFrameInterval === 0 || multipleOfKeyFrameInterval !== this.lastMultipleOfKeyFrameInterval, }; this.lastMultipleOfKeyFrameInterval = multipleOfKeyFrameInterval; if (this.customEncoder) { this.customEncoderQueueSize++; // We clone the sample so it cannot be closed on us from the outside before it reaches the encoder const clonedSample = videoSample.clone(); const promise = this.customEncoderCallSerializer .call(() => this.customEncoder!.encode(clonedSample, finalEncodeOptions)) .then(() => this.customEncoderQueueSize--) .catch((error: Error) => this.error ??= error) .finally(() => { clonedSample.close(); // `videoSample` gets closed in the finally block at the end of the method }); if (this.customEncoderQueueSize >= 4) { await promise; } } else { assert(this.encoder); const videoFrame = videoSample.toVideoFrame(); if (!this.alphaEncoder) { // No alpha encoder, simple case this.encoder.encode(videoFrame, finalEncodeOptions); videoFrame.close(); } else { // We're expected to encode alpha as well const frameDefinitelyHasNoAlpha = !!videoFrame.format && !videoFrame.format.includes('A'); if (frameDefinitelyHasNoAlpha || this.splitterCreationFailed) { this.alphaFrameQueue.push(null); this.encoder.encode(videoFrame, finalEncodeOptions); videoFrame.close(); } else { const width = videoFrame.displayWidth; const height = videoFrame.displayHeight; if (!this.splitter) { try { this.splitter = new ColorAlphaSplitter(width, height); } catch (error) { console.error('Due to an error, only color data will be encoded.', error); this.splitterCreationFailed = true; this.alphaFrameQueue.push(null); this.encoder.encode(videoFrame, finalEncodeOptions); videoFrame.close(); } } if (this.splitter) { const colorFrame = this.splitter.extractColor(videoFrame); const alphaFrame = this.splitter.extractAlpha(videoFrame); this.alphaFrameQueue.push(alphaFrame); this.encoder.encode(colorFrame, finalEncodeOptions); colorFrame.close(); videoFrame.close(); } } } if (shouldClose) { videoSample.close(); } // We need to do this after sending the frame to the encoder as the frame otherwise might be closed if (this.encoder.encodeQueueSize >= 4) { await new Promise(resolve => this.encoder!.addEventListener('dequeue', resolve, { once: true })); } } await this.muxer!.mutex.currentPromise; // Allow the writer to apply backpressure } finally { if (shouldClose) { // Make sure it's always closed, even if there was an error videoSample.close(); } } } private ensureEncoder(videoSample: VideoSample) { const encoderError = new Error(); this.ensureEncoderPromise = (async () => { const encoderConfig = buildVideoEncoderConfig({ width: videoSample.codedWidth, height: videoSample.codedHeight, ...this.encodingConfig, framerate: this.source._connectedTrack?.metadata.frameRate, }); this.encodingConfig.onEncoderConfig?.(encoderConfig); const MatchingCustomEncoder = customVideoEncoders.find(x => x.supports( this.encodingConfig.codec, encoderConfig, )); if (MatchingCustomEncoder) { // @ts-expect-error "Can't create instance of abstract class 🤓" this.customEncoder = new MatchingCustomEncoder() as CustomVideoEncoder; // @ts-expect-error It's technically readonly this.customEncoder.codec = this.encodingConfig.codec; // @ts-expect-error It's technically readonly this.customEncoder.config = encoderConfig; // @ts-expect-error It's technically readonly this.customEncoder.onPacket = (packet, meta) => { if (!(packet instanceof EncodedPacket)) { throw new TypeError('The first argument passed to onPacket must be an EncodedPacket.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('The second argument passed to onPacket must be an object or undefined.'); } this.encodingConfig.onEncodedPacket?.(packet, meta); void this.muxer!.addEncodedVideoPacket(this.source._connectedTrack!, packet, meta) .catch((error) => { this.error ??= error; this.errorNeedsNewStack = false; }); }; await this.customEncoder.init(); } else { if (typeof VideoEncoder === 'undefined') { throw new Error('VideoEncoder is not supported by this browser.'); } encoderConfig.alpha = 'discard'; // Since we handle alpha ourselves if (this.encodingConfig.alpha === 'keep') { // Encoding alpha requires using two parallel encoders, so we need to make sure they stay in sync // and that neither of them drops frames. Setting latencyMode to 'quality' achieves this, because // "User Agents MUST not drop frames to achieve the target bitrate and/or framerate." encoderConfig.latencyMode = 'quality'; } const hasOddDimension = encoderConfig.width % 2 === 1 || encoderConfig.height % 2 === 1; if ( hasOddDimension && (this.encodingConfig.codec === 'avc' || this.encodingConfig.codec === 'hevc') ) { // Throw a special error for this case as it gets hit often throw new Error( `The dimensions ${encoderConfig.width}x${encoderConfig.height} are not supported for codec` + ` '${this.encodingConfig.codec}'; both width and height must be even numbers. Make sure to` + ` round your dimensions to the nearest even number.`, ); } const support = await VideoEncoder.isConfigSupported(encoderConfig); if (!support.supported) { throw new Error( `This specific encoder configuration (${encoderConfig.codec}, ${encoderConfig.bitrate} bps,` + ` ${encoderConfig.width}x${encoderConfig.height}, hardware acceleration:` + ` ${encoderConfig.hardwareAcceleration ?? 'no-preference'}) is not supported by this browser.` + ` Consider using another codec or changing your video parameters.`, ); } /** Queue of color chunks waiting for their alpha counterpart. */ const colorChunkQueue: { chunk: EncodedVideoChunk; meta: EncodedVideoChunkMetadata | undefined; }[] = []; /** Each value is the number of encoded alpha chunks at which a null alpha chunk should be added. */ const nullAlphaChunkQueue: number[] = []; let encodedAlphaChunkCount = 0; let alphaEncoderQueue = 0; const addPacket = ( colorChunk: EncodedVideoChunk, alphaChunk: EncodedVideoChunk | null, meta: EncodedVideoChunkMetadata | undefined, ) => { const sideData: EncodedPacketSideData = {}; if (alphaChunk) { const alphaData = new Uint8Array(alphaChunk.byteLength); alphaChunk.copyTo(alphaData); sideData.alpha = alphaData; } const packet = EncodedPacket.fromEncodedChunk(colorChunk, sideData); this.encodingConfig.onEncodedPacket?.(packet, meta); void this.muxer!.addEncodedVideoPacket(this.source._connectedTrack!, packet, meta) .catch((error) => { this.error ??= error; this.errorNeedsNewStack = false; }); }; this.encoder = new VideoEncoder({ output: (chunk, meta) => { if (!this.alphaEncoder) { // We're done addPacket(chunk, null, meta); return; } const alphaFrame = this.alphaFrameQueue.shift(); assert(alphaFrame !== undefined); if (alphaFrame) { this.alphaEncoder.encode(alphaFrame, { // Crucial: The alpha frame is forced to be a key frame whenever the color frame // also is. Without this, playback can glitch and even crash in some browsers. // This is the reason why the two encoders are wired in series and not in parallel. keyFrame: chunk.type === 'key', }); alphaEncoderQueue++; alphaFrame.close(); colorChunkQueue.push({ chunk, meta }); } else { // There was no alpha component for this frame if (alphaEncoderQueue === 0) { // No pending alpha encodes either, so we're done addPacket(chunk, null, meta); } else { // There are still alpha encodes pending, so we can't add the packet immediately since // we'd end up with out-of-order packets. Instead, let's queue a null alpha chunk to be // added in the future, after the current encoder workload has completed: nullAlphaChunkQueue.push(encodedAlphaChunkCount + alphaEncoderQueue); colorChunkQueue.push({ chunk, meta }); } } }, error: (error) => { error.stack = encoderError.stack; // Provide a more useful stack trace this.error ??= error; }, }); this.encoder.configure(encoderConfig); if (this.encodingConfig.alpha === 'keep') { // We need to encode alpha as well, which we do with a separate encoder this.alphaEncoder = new VideoEncoder({ // We ignore the alpha chunk's metadata // eslint-disable-next-line @typescript-eslint/no-unused-vars output: (chunk, meta) => { alphaEncoderQueue--; // There has to be a color chunk because the encoders are wired in series const colorChunk = colorChunkQueue.shift(); assert(colorChunk !== undefined); addPacket(colorChunk.chunk, chunk, colorChunk.meta); // See if there are any null alpha chunks queued up encodedAlphaChunkCount++; while ( nullAlphaChunkQueue.length > 0 && nullAlphaChunkQueue[0] === encodedAlphaChunkCount ) { nullAlphaChunkQueue.shift(); const colorChunk = colorChunkQueue.shift(); assert(colorChunk !== undefined); addPacket(colorChunk.chunk, null, colorChunk.meta); } }, error: (error) => { error.stack = encoderError.stack; // Provide a more useful stack trace this.error ??= error; }, }); this.alphaEncoder.configure(encoderConfig); } } assert(this.source._connectedTrack); this.muxer = this.source._connectedTrack.output._muxer; this.encoderInitialized = true; })(); } async flushAndClose(forceClose: boolean) { if (!forceClose) this.checkForEncoderError(); if (this.customEncoder) { if (!forceClose) { void this.customEncoderCallSerializer.call(() => this.customEncoder!.flush()); } await this.customEncoderCallSerializer.call(() => this.customEncoder!.close()); } else if (this.encoder) { if (!forceClose) { // These are wired in series, therefore they must also be flushed in series await this.encoder.flush(); await this.alphaEncoder?.flush(); } if (this.encoder.state !== 'closed') { this.encoder.close(); } if (this.alphaEncoder && this.alphaEncoder.state !== 'closed') { this.alphaEncoder.close(); } this.alphaFrameQueue.forEach(x => x?.close()); this.splitter?.close(); } if (!forceClose) this.checkForEncoderError(); } getQueueSize() { if (this.customEncoder) { return this.customEncoderQueueSize; } else { // Because the color and alpha encoders are wired in series, there's no need to also include the alpha // encoder's queue size here return this.encoder?.encodeQueueSize ?? 0; } } checkForEncoderError() { if (this.error) { if (this.errorNeedsNewStack) { this.error.stack = new Error().stack; // Provide an even more useful stack trace } throw this.error; } } } /** Utility class for splitting a composite frame into separate color and alpha components. */ class ColorAlphaSplitter { canvas: OffscreenCanvas | HTMLCanvasElement; private gl: WebGL2RenderingContext; private colorProgram: WebGLProgram; private alphaProgram: WebGLProgram; private vao: WebGLVertexArrayObject; private sourceTexture: WebGLTexture; private lastFrame: VideoFrame | null = null; private alphaResolutionLocation: WebGLUniformLocation; constructor(initialWidth: number, initialHeight: number) { if (typeof OffscreenCanvas !== 'undefined') { this.canvas = new OffscreenCanvas(initialWidth, initialHeight); } else { this.canvas = document.createElement('canvas'); this.canvas.width = initialWidth; this.canvas.height = initialHeight; } const gl = this.canvas.getContext('webgl2', { alpha: true, // Needed due to the YUV thing we do for alpha }) as unknown as WebGL2RenderingContext | null; // Casting because of some TypeScript weirdness if (!gl) { throw new Error('Couldn\'t acquire WebGL 2 context.'); } this.gl = gl; this.colorProgram = this.createColorProgram(); this.alphaProgram = this.createAlphaProgram(); this.vao = this.createVAO(); this.sourceTexture = this.createTexture(); this.alphaResolutionLocation = this.gl.getUniformLocation(this.alphaProgram, 'u_resolution')!; this.gl.useProgram(this.colorProgram); this.gl.uniform1i(this.gl.getUniformLocation(this.colorProgram, 'u_sourceTexture'), 0); this.gl.useProgram(this.alphaProgram); this.gl.uniform1i(this.gl.getUniformLocation(this.alphaProgram, 'u_sourceTexture'), 0); } private createVertexShader(): WebGLShader { return this.createShader(this.gl.VERTEX_SHADER, `#version 300 es in vec2 a_position; in vec2 a_texCoord; out vec2 v_texCoord; void main() { gl_Position = vec4(a_position, 0.0, 1.0); v_texCoord = a_texCoord; } `); } private createColorProgram(): WebGLProgram { const vertexShader = this.createVertexShader(); // This shader is simple, simply copy the color information while setting alpha to 1 const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es precision highp float; uniform sampler2D u_sourceTexture; in vec2 v_texCoord; out vec4 fragColor; void main() { vec4 source = texture(u_sourceTexture, v_texCoord); fragColor = vec4(source.rgb, 1.0); } `); const program = this.gl.createProgram(); this.gl.attachShader(program, vertexShader); this.gl.attachShader(program, fragmentShader); this.gl.linkProgram(program); return program; } private createAlphaProgram(): WebGLProgram { const vertexShader = this.createVertexShader(); // This shader's more complex. The main reason is that this shader writes data in I420 (yuv420) pixel format // instead of regular RGBA. In other words, we use the shader to write out I420 data into an RGBA canvas, which // we then later read out with JavaScript. The reason being that browsers weirdly encode canvases and mess up // the color spaces, and the only way to have full control over the color space is by outputting YUV data // directly (avoiding the RGB conversion). Doing this conversion in JS is painfully slow, so let's utlize the // GPU since we're already calling it anyway. const fragmentShader = this.createShader(this.gl.FRAGMENT_SHADER, `#version 300 es precision highp float; uniform sampler2D u_sourceTexture; uniform vec2 u_resolution; // The width and height of the canvas in vec2 v_texCoord; out vec4 fragColor; // This function determines the value for a single byte in the YUV stream float getByteValue(float byteOffset) { float width = u_resolution.x; float height = u_resolution.y; float yPlaneSize = width * height; if (byteOffset < yPlaneSize) { // This byte is in the luma plane. Find the corresponding pixel coordinates to sample from float y = floor(byteOffset / width); float x = mod(byteOffset, width); // Add 0.5 to sample the center of the texel vec2 sampleCoord = (vec2(x, y) + 0.5) / u_resolution; // The luma value is the alpha from the source texture return texture(u_sourceTexture, sampleCoord).a; } else { // Write a fixed value for chroma and beyond return 128.0 / 255.0; } } void main() { // Each fragment writes 4 bytes (R, G, B, A) float pixelIndex = floor(gl_FragCoord.y) * u_resolution.x + floor(gl_FragCoord.x); float baseByteOffset = pixelIndex * 4.0; vec4 result; for (int i = 0; i < 4; i++) { float currentByteOffset = baseByteOffset + float(i); result[i] = getByteValue(currentByteOffset); } fragColor = result; } `); const program = this.gl.createProgram(); this.gl.attachShader(program, vertexShader); this.gl.attachShader(program, fragmentShader); this.gl.linkProgram(program); return program; } private createShader(type: number, source: string): WebGLShader { const shader = this.gl.createShader(type)!; this.gl.shaderSource(shader, source); this.gl.compileShader(shader); if (!this.gl.getShaderParameter(shader, this.gl.COMPILE_STATUS)) { console.error('Shader compile error:', this.gl.getShaderInfoLog(shader)); } return shader; } private createVAO(): WebGLVertexArrayObject { const vao = this.gl.createVertexArray(); this.gl.bindVertexArray(vao); const vertices = new Float32Array([ -1, -1, 0, 1, 1, -1, 1, 1, -1, 1, 0, 0, 1, 1, 1, 0, ]); const buffer = this.gl.createBuffer(); this.gl.bindBuffer(this.gl.ARRAY_BUFFER, buffer); this.gl.bufferData(this.gl.ARRAY_BUFFER, vertices, this.gl.STATIC_DRAW); const positionLocation = this.gl.getAttribLocation(this.colorProgram, 'a_position'); const texCoordLocation = this.gl.getAttribLocation(this.colorProgram, 'a_texCoord'); this.gl.enableVertexAttribArray(positionLocation); this.gl.vertexAttribPointer(positionLocation, 2, this.gl.FLOAT, false, 16, 0); this.gl.enableVertexAttribArray(texCoordLocation); this.gl.vertexAttribPointer(texCoordLocation, 2, this.gl.FLOAT, false, 16, 8); return vao; } private createTexture(): WebGLTexture { const texture = this.gl.createTexture(); this.gl.bindTexture(this.gl.TEXTURE_2D, texture); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_S, this.gl.CLAMP_TO_EDGE); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_T, this.gl.CLAMP_TO_EDGE); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MIN_FILTER, this.gl.LINEAR); this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MAG_FILTER, this.gl.LINEAR); return texture; } private updateTexture(sourceFrame: VideoFrame): void { if (this.lastFrame === sourceFrame) { return; } if (sourceFrame.displayWidth !== this.canvas.width || sourceFrame.displayHeight !== this.canvas.height) { this.canvas.width = sourceFrame.displayWidth; this.canvas.height = sourceFrame.displayHeight; } this.gl.activeTexture(this.gl.TEXTURE0); this.gl.bindTexture(this.gl.TEXTURE_2D, this.sourceTexture); this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, this.gl.RGBA, this.gl.UNSIGNED_BYTE, sourceFrame); this.lastFrame = sourceFrame; } extractColor(sourceFrame: VideoFrame) { this.updateTexture(sourceFrame); this.gl.useProgram(this.colorProgram); this.gl.viewport(0, 0, this.canvas.width, this.canvas.height); this.gl.clear(this.gl.COLOR_BUFFER_BIT); this.gl.bindVertexArray(this.vao); this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4); return new VideoFrame(this.canvas, { timestamp: sourceFrame.timestamp, duration: sourceFrame.duration ?? undefined, alpha: 'discard', }); } extractAlpha(sourceFrame: VideoFrame) { this.updateTexture(sourceFrame); this.gl.useProgram(this.alphaProgram); this.gl.uniform2f(this.alphaResolutionLocation, this.canvas.width, this.canvas.height); this.gl.viewport(0, 0, this.canvas.width, this.canvas.height); this.gl.clear(this.gl.COLOR_BUFFER_BIT); this.gl.bindVertexArray(this.vao); this.gl.drawArrays(this.gl.TRIANGLE_STRIP, 0, 4); const { width, height } = this.canvas; const chromaSamples = Math.ceil(width / 2) * Math.ceil(height / 2); const yuvSize = width * height + chromaSamples * 2; const requiredHeight = Math.ceil(yuvSize / (width * 4)); let yuv = new Uint8Array(4 * width * requiredHeight); this.gl.readPixels(0, 0, width, requiredHeight, this.gl.RGBA, this.gl.UNSIGNED_BYTE, yuv); yuv = yuv.subarray(0, yuvSize); assert(yuv[width * height] === 128); // Where chroma data starts assert(yuv[yuv.length - 1] === 128); // Assert the YUV data has been fully written // Defining this separately because TypeScript doesn't know `transfer` and I can't be bothered to do declaration // merging right now const init = { format: 'I420' as const, codedWidth: width, codedHeight: height, timestamp: sourceFrame.timestamp, duration: sourceFrame.duration ?? undefined, transfer: [yuv.buffer], }; return new VideoFrame(yuv, init); } close() { this.gl.getExtension('WEBGL_lose_context')?.loseContext(); this.gl = null as unknown as WebGL2RenderingContext; } } /** * This source can be used to add raw, unencoded video samples (frames) to an output video track. These frames will * automatically be encoded and then piped into the output. * @group Media sources * @public */ export class VideoSampleSource extends VideoSource { /** @internal */ private _encoder: VideoEncoderWrapper; /** * Creates a new {@link VideoSampleSource} whose samples are encoded according to the specified * {@link VideoEncodingConfig}. */ constructor(encodingConfig: VideoEncodingConfig) { validateVideoEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new VideoEncoderWrapper(this, encodingConfig); } /** * Encodes a video sample (frame) and then adds it to the output. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(videoSample: VideoSample, encodeOptions?: VideoEncoderEncodeOptions) { if (!(videoSample instanceof VideoSample)) { throw new TypeError('videoSample must be a VideoSample.'); } return this._encoder.add(videoSample, false, encodeOptions); } /** @internal */ override _flushAndClose(forceClose: boolean) { return this._encoder.flushAndClose(forceClose); } } /** * This source can be used to add video frames to the output track from a fixed canvas element. Since canvases are often * used for rendering, this source provides a convenient wrapper around {@link VideoSampleSource}. * @group Media sources * @public */ export class CanvasSource extends VideoSource { /** @internal */ private _encoder: VideoEncoderWrapper; /** @internal */ private _canvas: HTMLCanvasElement | OffscreenCanvas; /** * Creates a new {@link CanvasSource} from a canvas element or `OffscreenCanvas` whose samples are encoded * according to the specified {@link VideoEncodingConfig}. */ constructor(canvas: HTMLCanvasElement | OffscreenCanvas, encodingConfig: VideoEncodingConfig) { if ( !(typeof HTMLCanvasElement !== 'undefined' && canvas instanceof HTMLCanvasElement) && !(typeof OffscreenCanvas !== 'undefined' && canvas instanceof OffscreenCanvas) ) { throw new TypeError('canvas must be an HTMLCanvasElement or OffscreenCanvas.'); } validateVideoEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new VideoEncoderWrapper(this, encodingConfig); this._canvas = canvas; } /** * Captures the current canvas state as a video sample (frame), encodes it and adds it to the output. * * @param timestamp - The timestamp of the sample, in seconds. * @param duration - The duration of the sample, in seconds. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(timestamp: number, duration = 0, encodeOptions?: VideoEncoderEncodeOptions) { if (!Number.isFinite(timestamp) || timestamp < 0) { throw new TypeError('timestamp must be a non-negative number.'); } if (!Number.isFinite(duration) || duration < 0) { throw new TypeError('duration must be a non-negative number.'); } const sample = new VideoSample(this._canvas, { timestamp, duration }); return this._encoder.add(sample, true, encodeOptions); } /** @internal */ override _flushAndClose(forceClose: boolean) { return this._encoder.flushAndClose(forceClose); } } /** * Video source that encodes the frames of a * [`MediaStreamVideoTrack`](https://developer.mozilla.org/en-US/docs/Web/API/MediaStreamTrack) and pipes them into the * output. This is useful for capturing live or real-time data such as webcams or screen captures. Frames will * automatically start being captured once the connected {@link Output} is started, and will keep being captured until * the {@link Output} is finalized or this source is closed. * @group Media sources * @public */ export class MediaStreamVideoTrackSource extends VideoSource { /** @internal */ private _encoder: VideoEncoderWrapper; /** @internal */ private _abortController: AbortController | null = null; /** @internal */ private _track: MediaStreamVideoTrack; /** @internal */ private _workerTrackId: number | null = null; /** @internal */ private _workerListener: ((event: MessageEvent) => void) | null = null; /** @internal */ private _promiseWithResolvers = promiseWithResolvers(); /** @internal */ private _errorPromiseAccessed = false; /** @internal */ private _paused = false; /** @internal */ private _lastSampleTimestamp: number | null = null; /** @internal */ private _pauseOffset = 0; /** A promise that rejects upon any error within this source. This promise never resolves. */ get errorPromise() { this._errorPromiseAccessed = true; return this._promiseWithResolvers.promise; } /** Whether this source is currently paused as a result of calling `.pause()`. */ get paused() { return this._paused; } /** * Creates a new {@link MediaStreamVideoTrackSource} from a * [`MediaStreamVideoTrack`](https://developer.mozilla.org/en-US/docs/Web/API/MediaStreamTrack), which will pull * video samples from the stream in real time and encode them according to {@link VideoEncodingConfig}. */ constructor(track: MediaStreamVideoTrack, encodingConfig: VideoEncodingConfig) { if (!(track instanceof MediaStreamTrack) || track.kind !== 'video') { throw new TypeError('track must be a video MediaStreamTrack.'); } validateVideoEncodingConfig(encodingConfig); encodingConfig = { ...encodingConfig, latencyMode: 'realtime', }; super(encodingConfig.codec); this._encoder = new VideoEncoderWrapper(this, encodingConfig); this._track = track; } /** @internal */ override async _start() { if (!this._errorPromiseAccessed) { console.warn( 'Make sure not to ignore the `errorPromise` field on MediaStreamVideoTrackSource, so that any internal' + ' errors get bubbled up properly.', ); } this._abortController = new AbortController(); let firstVideoFrameTimestamp: number | null = null; let errored = false; const onVideoFrame = (videoFrame: VideoFrame) => { if (errored) { videoFrame.close(); return; } const currentTimestamp = videoFrame.timestamp / 1e6; if (this._paused) { const frameSeen = firstVideoFrameTimestamp !== null; if (frameSeen) { if (this._lastSampleTimestamp !== null) { // In addition to dropping this frame, let's also keep track of the time we have lost due to the // pause. Doing it like this instead of simply keeping track of the paused time is better since // it retains the frame rate of the underlying source. const timeDelta = currentTimestamp - this._lastSampleTimestamp; // We modify this field instead of _timestampOffset since we still might have data in flight // in the encoder, with which we don't want to mess. this._pauseOffset -= timeDelta; } this._lastSampleTimestamp = currentTimestamp; } videoFrame.close(); return; } if (firstVideoFrameTimestamp === null) { firstVideoFrameTimestamp = currentTimestamp; const muxer = this._connectedTrack!.output._muxer; if (muxer.firstMediaStreamTimestamp === null) { muxer.firstMediaStreamTimestamp = performance.now() / 1000; this._timestampOffset = -firstVideoFrameTimestamp; } else { this._timestampOffset = (performance.now() / 1000 - muxer.firstMediaStreamTimestamp) - firstVideoFrameTimestamp; } } this._lastSampleTimestamp = currentTimestamp; if (this._encoder.getQueueSize() >= 4) { // Drop frames if the encoder is overloaded videoFrame.close(); return; } const sample = new VideoSample(videoFrame, { timestamp: currentTimestamp + this._pauseOffset, }); void this._encoder.add(sample, true) .catch((error) => { errored = true; this._abortController?.abort(); this._promiseWithResolvers.reject(error); if (this._workerTrackId !== null) { // Tell the worker to stop the track sendMessageToMediaStreamTrackProcessorWorker({ type: 'stopTrack', trackId: this._workerTrackId, }); } }); }; if (typeof MediaStreamTrackProcessor !== 'undefined') { // We can do it here directly, perfect const processor = new MediaStreamTrackProcessor({ track: this._track }); const consumer = new WritableStream({ write: onVideoFrame }); processor.readable.pipeTo(consumer, { signal: this._abortController.signal, }).catch((error) => { // Handle AbortError silently if (error instanceof DOMException && error.name === 'AbortError') return; this._promiseWithResolvers.reject(error); }); } else { // It might still be supported in a worker, so let's check that const supportedInWorker = await mediaStreamTrackProcessorIsSupportedInWorker(); if (supportedInWorker) { this._workerTrackId = nextMediaStreamTrackProcessorWorkerId++; sendMessageToMediaStreamTrackProcessorWorker({ type: 'videoTrack', trackId: this._workerTrackId, track: this._track, }); this._workerListener = (event: MessageEvent) => { const message = event.data as MediaStreamTrackProcessorWorkerMessage; if (message.type === 'videoFrame' && message.trackId === this._workerTrackId) { onVideoFrame(message.videoFrame); } else if (message.type === 'error' && message.trackId === this._workerTrackId) { this._promiseWithResolvers.reject(message.error); } }; mediaStreamTrackProcessorWorker!.addEventListener('message', this._workerListener); } else { throw new Error('MediaStreamTrackProcessor is required but not supported by this browser.'); } } } /** * Pauses the capture of video frames - any video frames emitted by the underlying media stream will be ignored * while paused. This does *not* close the underlying `MediaStreamVideoTrack`, it just ignores its output. */ pause() { this._paused = true; } /** Resumes the capture of video frames after being paused. */ resume() { this._paused = false; } /** @internal */ override async _flushAndClose(forceClose: boolean) { if (this._abortController) { this._abortController.abort(); this._abortController = null; } if (this._workerTrackId !== null) { assert(this._workerListener); sendMessageToMediaStreamTrackProcessorWorker({ type: 'stopTrack', trackId: this._workerTrackId, }); // Wait for the worker to stop the track await new Promise((resolve) => { const listener = (event: MessageEvent) => { const message = event.data as MediaStreamTrackProcessorWorkerMessage; if (message.type === 'trackStopped' && message.trackId === this._workerTrackId) { assert(this._workerListener); mediaStreamTrackProcessorWorker!.removeEventListener('message', this._workerListener); mediaStreamTrackProcessorWorker!.removeEventListener('message', listener); resolve(); } }; mediaStreamTrackProcessorWorker!.addEventListener('message', listener); }); } await this._encoder.flushAndClose(forceClose); } } /** * Base class for audio sources - sources for audio tracks. * @group Media sources * @public */ export abstract class AudioSource extends MediaSource { /** @internal */ override _connectedTrack: OutputAudioTrack | null = null; /** @internal */ _codec: AudioCodec; /** Internal constructor. */ constructor(codec: AudioCodec) { super(); if (!AUDIO_CODECS.includes(codec)) { throw new TypeError(`Invalid audio codec '${codec}'. Must be one of: ${AUDIO_CODECS.join(', ')}.`); } this._codec = codec; } } /** * The most basic audio source; can be used to directly pipe encoded packets into the output file. * @group Media sources * @public */ export class EncodedAudioPacketSource extends AudioSource { /** Creates a new {@link EncodedAudioPacketSource} whose packets are encoded using `codec`. */ constructor(codec: AudioCodec) { super(codec); } /** * Adds an encoded packet to the output audio track. Packets must be added in *decode order*. * * @param meta - Additional metadata from the encoder. You should pass this for the first call, including a valid * decoder config. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(packet: EncodedPacket, meta?: EncodedAudioChunkMetadata) { if (!(packet instanceof EncodedPacket)) { throw new TypeError('packet must be an EncodedPacket.'); } if (packet.isMetadataOnly) { throw new TypeError('Metadata-only packets cannot be added.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('meta, when provided, must be an object.'); } this._ensureValidAdd(); return this._connectedTrack!.output._muxer.addEncodedAudioPacket(this._connectedTrack!, packet, meta); } } class AudioEncoderWrapper { private ensureEncoderPromise: Promise | null = null; private encoderInitialized = false; private encoder: AudioEncoder | null = null; private muxer: Muxer | null = null; private lastNumberOfChannels: number | null = null; private lastSampleRate: number | null = null; private isPcmEncoder = false; private outputSampleSize: number | null = null; private writeOutputValue: ((view: DataView, byteOffset: number, value: number) => void) | null = null; private customEncoder: CustomAudioEncoder | null = null; private customEncoderCallSerializer = new CallSerializer(); private customEncoderQueueSize = 0; private lastEndSampleIndex: number | null = null; /** * Encoders typically throw their errors "out of band", meaning asynchronously in some other execution context. * However, we want to surface these errors to the user within the normal control flow, so they don't go uncaught. * So, we keep track of the encoder error and throw it as soon as we get the chance. */ private error: Error | null = null; private errorNeedsNewStack = true; constructor(private source: AudioSource, private encodingConfig: AudioEncodingConfig) {} async add(audioSample: AudioSample, shouldClose: boolean) { try { this.checkForEncoderError(); this.source._ensureValidAdd(); // Ensure audio parameters remain constant if (this.lastNumberOfChannels !== null && this.lastSampleRate !== null) { if ( audioSample.numberOfChannels !== this.lastNumberOfChannels || audioSample.sampleRate !== this.lastSampleRate ) { throw new Error( `Audio parameters must remain constant. Expected ${this.lastNumberOfChannels} channels at` + ` ${this.lastSampleRate} Hz, got ${audioSample.numberOfChannels} channels at` + ` ${audioSample.sampleRate} Hz.`, ); } } else { this.lastNumberOfChannels = audioSample.numberOfChannels; this.lastSampleRate = audioSample.sampleRate; } if (!this.encoderInitialized) { if (!this.ensureEncoderPromise) { this.ensureEncoder(audioSample); } // No, this "if" statement is not useless. Sometimes, the above call to `ensureEncoder` might have // synchronously completed and the encoder is already initialized. In this case, we don't need to await // the promise anymore. This also fixes nasty async race condition bugs when multiple code paths are // calling this method: It's important that the call that initialized the encoder go through this // code first. if (!this.encoderInitialized) { await this.ensureEncoderPromise; } } assert(this.encoderInitialized); // Handle padding of gaps with silence to avoid audio drift over time, like in // https://github.com/Vanilagy/mediabunny/issues/176 // TODO An open question is how encoders deal with the first AudioData having a non-zero timestamp, and with // AudioDatas that have an overlapping timestamp range. { const startSampleIndex = Math.round( audioSample.timestamp * audioSample.sampleRate, ); const endSampleIndex = Math.round( (audioSample.timestamp + audioSample.duration) * audioSample.sampleRate, ); if (this.lastEndSampleIndex === null) { this.lastEndSampleIndex = endSampleIndex; } else { const sampleDiff = startSampleIndex - this.lastEndSampleIndex; if (sampleDiff >= 64) { // The gap is big enough, let's add a correction sample const fillSample = new AudioSample({ data: new Float32Array(sampleDiff * audioSample.numberOfChannels), format: 'f32-planar', sampleRate: audioSample.sampleRate, numberOfChannels: audioSample.numberOfChannels, numberOfFrames: sampleDiff, timestamp: this.lastEndSampleIndex / audioSample.sampleRate, }); await this.add(fillSample, true); // Recursive call } this.lastEndSampleIndex += audioSample.numberOfFrames; } } if (this.customEncoder) { this.customEncoderQueueSize++; // We clone the sample so it cannot be closed on us from the outside before it reaches the encoder const clonedSample = audioSample.clone(); const promise = this.customEncoderCallSerializer .call(() => this.customEncoder!.encode(clonedSample)) .then(() => this.customEncoderQueueSize--) .catch((error: Error) => this.error ??= error) .finally(() => { clonedSample.close(); // `audioSample` gets closed in the finally block at the end of the method }); if (this.customEncoderQueueSize >= 4) { await promise; } await this.muxer!.mutex.currentPromise; // Allow the writer to apply backpressure } else if (this.isPcmEncoder) { await this.doPcmEncoding(audioSample, shouldClose); } else { assert(this.encoder); const audioData = audioSample.toAudioData(); this.encoder.encode(audioData); audioData.close(); if (shouldClose) { audioSample.close(); } if (this.encoder.encodeQueueSize >= 4) { await new Promise(resolve => this.encoder!.addEventListener('dequeue', resolve, { once: true })); } await this.muxer!.mutex.currentPromise; // Allow the writer to apply backpressure } } finally { if (shouldClose) { // Make sure it's always closed, even if there was an error audioSample.close(); } } } private async doPcmEncoding(audioSample: AudioSample, shouldClose: boolean) { assert(this.outputSampleSize); assert(this.writeOutputValue); // Need to extract data from the audio data before we close it const { numberOfChannels, numberOfFrames, sampleRate, timestamp } = audioSample; const CHUNK_SIZE = 2048; const outputs: { frameCount: number; view: DataView; }[] = []; // Prepare all of the output buffers, each being bounded by CHUNK_SIZE so we don't generate huge packets for (let frame = 0; frame < numberOfFrames; frame += CHUNK_SIZE) { const frameCount = Math.min(CHUNK_SIZE, audioSample.numberOfFrames - frame); const outputSize = frameCount * numberOfChannels * this.outputSampleSize; const outputBuffer = new ArrayBuffer(outputSize); const outputView = new DataView(outputBuffer); outputs.push({ frameCount, view: outputView }); } const allocationSize = audioSample.allocationSize(({ planeIndex: 0, format: 'f32-planar' })); const floats = new Float32Array(allocationSize / Float32Array.BYTES_PER_ELEMENT); for (let i = 0; i < numberOfChannels; i++) { audioSample.copyTo(floats, { planeIndex: i, format: 'f32-planar' }); for (let j = 0; j < outputs.length; j++) { const { frameCount, view } = outputs[j]!; for (let k = 0; k < frameCount; k++) { this.writeOutputValue( view, (k * numberOfChannels + i) * this.outputSampleSize, floats[j * CHUNK_SIZE + k]!, ); } } } if (shouldClose) { audioSample.close(); } const meta: EncodedAudioChunkMetadata = { decoderConfig: { codec: this.encodingConfig.codec, numberOfChannels, sampleRate, }, }; for (let i = 0; i < outputs.length; i++) { const { frameCount, view } = outputs[i]!; const outputBuffer = view.buffer; const startFrame = i * CHUNK_SIZE; const packet = new EncodedPacket( new Uint8Array(outputBuffer), 'key', timestamp + startFrame / sampleRate, frameCount / sampleRate, ); this.encodingConfig.onEncodedPacket?.(packet, meta); await this.muxer!.addEncodedAudioPacket(this.source._connectedTrack!, packet, meta); // With backpressure } } private ensureEncoder(audioSample: AudioSample) { const encoderError = new Error(); this.ensureEncoderPromise = (async () => { const { numberOfChannels, sampleRate } = audioSample; const encoderConfig = buildAudioEncoderConfig({ numberOfChannels, sampleRate, ...this.encodingConfig, }); this.encodingConfig.onEncoderConfig?.(encoderConfig); const MatchingCustomEncoder = customAudioEncoders.find(x => x.supports( this.encodingConfig.codec, encoderConfig, )); if (MatchingCustomEncoder) { // @ts-expect-error "Can't create instance of abstract class 🤓" this.customEncoder = new MatchingCustomEncoder() as CustomAudioEncoder; // @ts-expect-error It's technically readonly this.customEncoder.codec = this.encodingConfig.codec; // @ts-expect-error It's technically readonly this.customEncoder.config = encoderConfig; // @ts-expect-error It's technically readonly this.customEncoder.onPacket = (packet, meta) => { if (!(packet instanceof EncodedPacket)) { throw new TypeError('The first argument passed to onPacket must be an EncodedPacket.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('The second argument passed to onPacket must be an object or undefined.'); } this.encodingConfig.onEncodedPacket?.(packet, meta); void this.muxer!.addEncodedAudioPacket(this.source._connectedTrack!, packet, meta) .catch((error) => { this.error ??= error; this.errorNeedsNewStack = false; }); }; await this.customEncoder.init(); } else if ((PCM_AUDIO_CODECS as readonly string[]).includes(this.encodingConfig.codec)) { this.initPcmEncoder(); } else { if (typeof AudioEncoder === 'undefined') { throw new Error('AudioEncoder is not supported by this browser.'); } const support = await AudioEncoder.isConfigSupported(encoderConfig); if (!support.supported) { throw new Error( `This specific encoder configuration (${encoderConfig.codec}, ${encoderConfig.bitrate} bps,` + ` ${encoderConfig.numberOfChannels} channels, ${encoderConfig.sampleRate} Hz) is not` + ` supported by this browser. Consider using another codec or changing your audio parameters.`, ); } this.encoder = new AudioEncoder({ output: (chunk, meta) => { // WebKit emits an invalid description for AAC (https://bugs.webkit.org/show_bug.cgi?id=302253), // which we try to detect here. If detected, we'll provide our own description instead, derived // from the codec string and audio parameters. if (this.encodingConfig.codec === 'aac' && meta?.decoderConfig) { let needsDescriptionOverwrite = false; if (!meta.decoderConfig.description || meta.decoderConfig.description.byteLength < 2) { needsDescriptionOverwrite = true; } else { const audioSpecificConfig = parseAacAudioSpecificConfig( toUint8Array(meta.decoderConfig.description), ); needsDescriptionOverwrite = audioSpecificConfig.objectType === 0; } if (needsDescriptionOverwrite) { const objectType = Number(last(encoderConfig.codec.split('.'))); meta.decoderConfig.description = buildAacAudioSpecificConfig({ objectType, numberOfChannels: meta.decoderConfig.numberOfChannels, sampleRate: meta.decoderConfig.sampleRate, }); } } const packet = EncodedPacket.fromEncodedChunk(chunk); this.encodingConfig.onEncodedPacket?.(packet, meta); void this.muxer!.addEncodedAudioPacket(this.source._connectedTrack!, packet, meta) .catch((error) => { this.error ??= error; this.errorNeedsNewStack = false; }); }, error: (error) => { error.stack = encoderError.stack; // Provide a more useful stack trace this.error ??= error; }, }); this.encoder.configure(encoderConfig); } assert(this.source._connectedTrack); this.muxer = this.source._connectedTrack.output._muxer; this.encoderInitialized = true; })(); } private initPcmEncoder() { this.isPcmEncoder = true; const codec = this.encodingConfig.codec as PcmAudioCodec; const { dataType, sampleSize, littleEndian } = parsePcmCodec(codec); this.outputSampleSize = sampleSize; // All these functions receive a float sample as input and map it into the desired format switch (sampleSize) { case 1: { if (dataType === 'unsigned') { this.writeOutputValue = (view, byteOffset, value) => view.setUint8(byteOffset, clamp((value + 1) * 127.5, 0, 255)); } else if (dataType === 'signed') { this.writeOutputValue = (view, byteOffset, value) => { view.setInt8(byteOffset, clamp(Math.round(value * 128), -128, 127)); }; } else if (dataType === 'ulaw') { this.writeOutputValue = (view, byteOffset, value) => { const int16 = clamp(Math.floor(value * 32767), -32768, 32767); view.setUint8(byteOffset, toUlaw(int16)); }; } else if (dataType === 'alaw') { this.writeOutputValue = (view, byteOffset, value) => { const int16 = clamp(Math.floor(value * 32767), -32768, 32767); view.setUint8(byteOffset, toAlaw(int16)); }; } else { assert(false); } }; break; case 2: { if (dataType === 'unsigned') { this.writeOutputValue = (view, byteOffset, value) => view.setUint16(byteOffset, clamp((value + 1) * 32767.5, 0, 65535), littleEndian); } else if (dataType === 'signed') { this.writeOutputValue = (view, byteOffset, value) => view.setInt16(byteOffset, clamp(Math.round(value * 32767), -32768, 32767), littleEndian); } else { assert(false); } }; break; case 3: { if (dataType === 'unsigned') { this.writeOutputValue = (view, byteOffset, value) => setUint24(view, byteOffset, clamp((value + 1) * 8388607.5, 0, 16777215), littleEndian); } else if (dataType === 'signed') { this.writeOutputValue = (view, byteOffset, value) => setInt24( view, byteOffset, clamp(Math.round(value * 8388607), -8388608, 8388607), littleEndian, ); } else { assert(false); } }; break; case 4: { if (dataType === 'unsigned') { this.writeOutputValue = (view, byteOffset, value) => view.setUint32(byteOffset, clamp((value + 1) * 2147483647.5, 0, 4294967295), littleEndian); } else if (dataType === 'signed') { this.writeOutputValue = (view, byteOffset, value) => view.setInt32( byteOffset, clamp(Math.round(value * 2147483647), -2147483648, 2147483647), littleEndian, ); } else if (dataType === 'float') { this.writeOutputValue = (view, byteOffset, value) => view.setFloat32(byteOffset, value, littleEndian); } else { assert(false); } }; break; case 8: { if (dataType === 'float') { this.writeOutputValue = (view, byteOffset, value) => view.setFloat64(byteOffset, value, littleEndian); } else { assert(false); } }; break; default: { assertNever(sampleSize); assert(false); }; } } async flushAndClose(forceClose: boolean) { if (!forceClose) this.checkForEncoderError(); if (this.customEncoder) { if (!forceClose) { void this.customEncoderCallSerializer.call(() => this.customEncoder!.flush()); } await this.customEncoderCallSerializer.call(() => this.customEncoder!.close()); } else if (this.encoder) { if (!forceClose) { await this.encoder.flush(); } if (this.encoder.state !== 'closed') { this.encoder.close(); } } if (!forceClose) this.checkForEncoderError(); } getQueueSize() { if (this.customEncoder) { return this.customEncoderQueueSize; } else if (this.isPcmEncoder) { return 0; } else { return this.encoder?.encodeQueueSize ?? 0; } } checkForEncoderError() { if (this.error) { if (this.errorNeedsNewStack) { this.error.stack = new Error().stack; // Provide an even more useful stack trace } throw this.error; } } } /** * This source can be used to add raw, unencoded audio samples to an output audio track. These samples will * automatically be encoded and then piped into the output. * @group Media sources * @public */ export class AudioSampleSource extends AudioSource { /** @internal */ private _encoder: AudioEncoderWrapper; /** * Creates a new {@link AudioSampleSource} whose samples are encoded according to the specified * {@link AudioEncodingConfig}. */ constructor(encodingConfig: AudioEncodingConfig) { validateAudioEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new AudioEncoderWrapper(this, encodingConfig); } /** * Encodes an audio sample and then adds it to the output. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(audioSample: AudioSample) { if (!(audioSample instanceof AudioSample)) { throw new TypeError('audioSample must be an AudioSample.'); } return this._encoder.add(audioSample, false); } /** @internal */ override _flushAndClose(forceClose: boolean) { return this._encoder.flushAndClose(forceClose); } } /** * This source can be used to add audio data from an AudioBuffer to the output track. This is useful when working with * the Web Audio API. * @group Media sources * @public */ export class AudioBufferSource extends AudioSource { /** @internal */ private _encoder: AudioEncoderWrapper; /** @internal */ private _accumulatedTime = 0; /** * Creates a new {@link AudioBufferSource} whose `AudioBuffer` instances are encoded according to the specified * {@link AudioEncodingConfig}. */ constructor(encodingConfig: AudioEncodingConfig) { validateAudioEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new AudioEncoderWrapper(this, encodingConfig); } /** * Converts an AudioBuffer to audio samples, encodes them and adds them to the output. The first AudioBuffer will * be played at timestamp 0, and any subsequent AudioBuffer will have a timestamp equal to the total duration of * all previous AudioBuffers. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ async add(audioBuffer: AudioBuffer) { if (!(audioBuffer instanceof AudioBuffer)) { throw new TypeError('audioBuffer must be an AudioBuffer.'); } const iterator = AudioSample._fromAudioBuffer(audioBuffer, this._accumulatedTime); this._accumulatedTime += audioBuffer.duration; for (const audioSample of iterator) { await this._encoder.add(audioSample, true); } } /** @internal */ override _flushAndClose(forceClose: boolean) { return this._encoder.flushAndClose(forceClose); } } /** * Audio source that encodes the data of a * [`MediaStreamAudioTrack`](https://developer.mozilla.org/en-US/docs/Web/API/MediaStreamTrack) and pipes it into the * output. This is useful for capturing live or real-time audio such as microphones or audio from other media elements. * Audio will automatically start being captured once the connected {@link Output} is started, and will keep being * captured until the {@link Output} is finalized or this source is closed. * @group Media sources * @public */ export class MediaStreamAudioTrackSource extends AudioSource { /** @internal */ private _encoder: AudioEncoderWrapper; /** @internal */ private _abortController: AbortController | null = null; /** @internal */ private _track: MediaStreamAudioTrack; /** @internal */ private _audioContext: AudioContext | null = null; /** @internal */ private _scriptProcessorNode: ScriptProcessorNode | null = null; // Deprecated but goated /** @internal */ private _promiseWithResolvers = promiseWithResolvers(); /** @internal */ private _errorPromiseAccessed = false; /** @internal */ private _paused = false; /** @internal */ private _lastSampleTimestamp: number | null = null; /** @internal */ private _pauseOffset = 0; /** A promise that rejects upon any error within this source. This promise never resolves. */ get errorPromise() { this._errorPromiseAccessed = true; return this._promiseWithResolvers.promise; } /** Whether this source is currently paused as a result of calling `.pause()`. */ get paused() { return this._paused; } /** * Creates a new {@link MediaStreamAudioTrackSource} from a `MediaStreamAudioTrack`, which will pull audio samples * from the stream in real time and encode them according to {@link AudioEncodingConfig}. */ constructor(track: MediaStreamAudioTrack, encodingConfig: AudioEncodingConfig) { if (!(track instanceof MediaStreamTrack) || track.kind !== 'audio') { throw new TypeError('track must be an audio MediaStreamTrack.'); } validateAudioEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new AudioEncoderWrapper(this, encodingConfig); this._track = track; } /** @internal */ override async _start() { if (!this._errorPromiseAccessed) { console.warn( 'Make sure not to ignore the `errorPromise` field on MediaStreamVideoTrackSource, so that any internal' + ' errors get bubbled up properly.', ); } this._abortController = new AbortController(); let firstAudioDataTimestamp: number | null = null; let errored = false; const onAudioSample = (audioSample: AudioSample) => { if (errored) { audioSample.close(); return; } const currentTimestamp = audioSample.timestamp; if (this._paused) { const dataSeen = firstAudioDataTimestamp !== null; if (dataSeen) { if (this._lastSampleTimestamp !== null) { // In addition to dropping this sample, let's also keep track of the time we have lost due to // the pause. Doing it like this instead of simply keeping track of the paused time is better // since it retains the sample rate of the underlying source. const timeDelta = currentTimestamp - this._lastSampleTimestamp; // We modify this field instead of _timestampOffset since we still might have data in flight // in the encoder, with which we don't want to mess. this._pauseOffset -= timeDelta; } this._lastSampleTimestamp = currentTimestamp; } audioSample.close(); return; } if (firstAudioDataTimestamp === null) { firstAudioDataTimestamp = audioSample.timestamp; const muxer = this._connectedTrack!.output._muxer; if (muxer.firstMediaStreamTimestamp === null) { muxer.firstMediaStreamTimestamp = performance.now() / 1000; this._timestampOffset = -firstAudioDataTimestamp; } else { this._timestampOffset = (performance.now() / 1000 - muxer.firstMediaStreamTimestamp) - firstAudioDataTimestamp; } } this._lastSampleTimestamp = currentTimestamp; if (this._encoder.getQueueSize() >= 4) { // Drop data if the encoder is overloaded audioSample.close(); return; } audioSample.setTimestamp(currentTimestamp + this._pauseOffset); void this._encoder.add(audioSample, true) .catch((error) => { errored = true; this._abortController?.abort(); this._promiseWithResolvers.reject(error); void this._audioContext?.suspend(); }); }; if (typeof MediaStreamTrackProcessor !== 'undefined') { // Great, MediaStreamTrackProcessor is supported, this is the preferred way of doing things const processor = new MediaStreamTrackProcessor({ track: this._track }); const consumer = new WritableStream({ write: audioData => onAudioSample(new AudioSample(audioData)), }); processor.readable.pipeTo(consumer, { signal: this._abortController.signal, }).catch((error) => { // Handle AbortError silently if (error instanceof DOMException && error.name === 'AbortError') return; this._promiseWithResolvers.reject(error); }); } else { // Let's fall back to an AudioContext approach // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access const AudioContext = window.AudioContext || (window as any).webkitAudioContext; this._audioContext = new AudioContext({ sampleRate: this._track.getSettings().sampleRate }); const sourceNode = this._audioContext.createMediaStreamSource(new MediaStream([this._track])); this._scriptProcessorNode = this._audioContext.createScriptProcessor(4096); if (this._audioContext.state === 'suspended') { await this._audioContext.resume(); } sourceNode.connect(this._scriptProcessorNode); this._scriptProcessorNode.connect(this._audioContext.destination); let totalDuration = 0; this._scriptProcessorNode.onaudioprocess = (event) => { const iterator = AudioSample._fromAudioBuffer(event.inputBuffer, totalDuration); totalDuration += event.inputBuffer.duration; for (const audioSample of iterator) { onAudioSample(audioSample); } }; } } /** * Pauses the capture of audio data - any audio data emitted by the underlying media stream will be ignored * while paused. This does *not* close the underlying `MediaStreamAudioTrack`, it just ignores its output. */ pause() { this._paused = true; } /** Resumes the capture of audio data after being paused. */ resume() { this._paused = false; } /** @internal */ override async _flushAndClose(forceClose: boolean) { if (this._abortController) { this._abortController.abort(); this._abortController = null; } if (this._audioContext) { assert(this._scriptProcessorNode); this._scriptProcessorNode.disconnect(); await this._audioContext.suspend(); } await this._encoder.flushAndClose(forceClose); } } // === MEDIA STREAM TRACK PROCESSOR WORKER === type MediaStreamTrackProcessorWorkerMessage = { type: 'support'; supported: boolean; } | { type: 'videoFrame'; trackId: number; videoFrame: VideoFrame; } | { type: 'trackStopped'; trackId: number; } | { type: 'error'; trackId: number; error: Error; }; type MediaStreamTrackProcessorControllerMessage = { type: 'videoTrack'; trackId: number; track: MediaStreamVideoTrack; } | { type: 'stopTrack'; trackId: number; }; const mediaStreamTrackProcessorWorkerCode = () => { const sendMessage = (message: MediaStreamTrackProcessorWorkerMessage, transfer?: Transferable[]) => { if (transfer) { self.postMessage(message, { transfer }); } else { self.postMessage(message); } }; // Immediately send a message to the main thread, letting them know of the support sendMessage({ type: 'support', supported: typeof MediaStreamTrackProcessor !== 'undefined', }); const abortControllers = new Map(); const activeTracks = new Map(); self.addEventListener('message', (event) => { const message = event.data as MediaStreamTrackProcessorControllerMessage; switch (message.type) { case 'videoTrack': { activeTracks.set(message.trackId, message.track); const processor = new MediaStreamTrackProcessor({ track: message.track }); const consumer = new WritableStream({ write: (videoFrame) => { if (!activeTracks.has(message.trackId)) { videoFrame.close(); return; } // Send it to the main thread sendMessage({ type: 'videoFrame', trackId: message.trackId, videoFrame, }, [videoFrame]); }, }); const abortController = new AbortController(); abortControllers.set(message.trackId, abortController); processor.readable.pipeTo(consumer, { signal: abortController.signal, }).catch((error: Error) => { // Handle AbortError silently if (error instanceof DOMException && error.name === 'AbortError') return; sendMessage({ type: 'error', trackId: message.trackId, error, }); }); }; break; case 'stopTrack': { const abortController = abortControllers.get(message.trackId); if (abortController) { abortController.abort(); abortControllers.delete(message.trackId); } const track = activeTracks.get(message.trackId); track?.stop(); activeTracks.delete(message.trackId); sendMessage({ type: 'trackStopped', trackId: message.trackId, }); }; break; default: assertNever(message); } }); }; let nextMediaStreamTrackProcessorWorkerId = 0; let mediaStreamTrackProcessorWorker: Worker | null = null; const initMediaStreamTrackProcessorWorker = () => { const blob = new Blob( [`(${mediaStreamTrackProcessorWorkerCode.toString()})()`], { type: 'application/javascript' }, ); const url = URL.createObjectURL(blob); mediaStreamTrackProcessorWorker = new Worker(url); }; let mediaStreamTrackProcessorIsSupportedInWorkerCache: boolean | null = null; const mediaStreamTrackProcessorIsSupportedInWorker = async () => { if (mediaStreamTrackProcessorIsSupportedInWorkerCache !== null) { return mediaStreamTrackProcessorIsSupportedInWorkerCache; } if (!mediaStreamTrackProcessorWorker) { initMediaStreamTrackProcessorWorker(); } return new Promise((resolve) => { assert(mediaStreamTrackProcessorWorker); const listener = (event: MessageEvent) => { const message = event.data as MediaStreamTrackProcessorWorkerMessage; if (message.type === 'support') { mediaStreamTrackProcessorIsSupportedInWorkerCache = message.supported; mediaStreamTrackProcessorWorker!.removeEventListener('message', listener); resolve(message.supported); } }; mediaStreamTrackProcessorWorker.addEventListener('message', listener); }); }; const sendMessageToMediaStreamTrackProcessorWorker = ( message: MediaStreamTrackProcessorControllerMessage, transfer?: Transferable[], ) => { assert(mediaStreamTrackProcessorWorker); if (transfer) { mediaStreamTrackProcessorWorker.postMessage(message, transfer); } else { mediaStreamTrackProcessorWorker.postMessage(message); } }; /** * Base class for subtitle sources - sources for subtitle tracks. * @group Media sources * @public */ export abstract class SubtitleSource extends MediaSource { /** @internal */ override _connectedTrack: OutputSubtitleTrack | null = null; /** @internal */ _codec: SubtitleCodec; /** Internal constructor. */ constructor(codec: SubtitleCodec) { super(); if (!SUBTITLE_CODECS.includes(codec)) { throw new TypeError(`Invalid subtitle codec '${codec}'. Must be one of: ${SUBTITLE_CODECS.join(', ')}.`); } this._codec = codec; } } /** * This source can be used to add subtitles from a subtitle text file. * @group Media sources * @public */ export class TextSubtitleSource extends SubtitleSource { /** @internal */ private _parser: SubtitleParser; /** @internal */ private _error: Error | null = null; /** Creates a new {@link TextSubtitleSource} where added text chunks are in the specified `codec`. */ constructor(codec: SubtitleCodec) { super(codec); this._parser = new SubtitleParser({ codec, output: (cue, metadata) => { void this._connectedTrack?.output._muxer.addSubtitleCue(this._connectedTrack, cue, metadata) .catch((error) => { this._error ??= error; }); }, }); } /** * Parses the subtitle text according to the specified codec and adds it to the output track. You don't have to * add the entire subtitle file at once here; you can provide it in chunks. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(text: string) { if (typeof text !== 'string') { throw new TypeError('text must be a string.'); } this._checkForError(); this._ensureValidAdd(); this._parser.parse(text); return this._connectedTrack!.output._muxer.mutex.currentPromise; } /** @internal */ _checkForError() { if (this._error) { throw this._error; } } /** @internal */ override async _flushAndClose(forceClose: boolean) { if (!forceClose) { this._checkForError(); } } }