Feature(audio): forward client PCM to web viewers with continuous playback

This commit is contained in:
yuanyuanxiang
2026-06-02 01:56:10 +02:00
parent da024fb3fb
commit 9aca587654
4 changed files with 505 additions and 5 deletions

View File

@@ -2609,7 +2609,8 @@ DWORD WINAPI CScreenManager::AudioThreadProc(LPVOID lpParam)
} }
#endif #endif
} }
if (pThis->m_pCaptureClient == nullptr)
break;
pThis->m_pCaptureClient->ReleaseBuffer(numFramesAvailable); pThis->m_pCaptureClient->ReleaseBuffer(numFramesAvailable);
hr = pThis->m_pCaptureClient->GetNextPacketSize(&packetLength); hr = pThis->m_pCaptureClient->GetNextPacketSize(&packetLength);

View File

@@ -18,6 +18,7 @@
#include <md5.h> #include <md5.h>
#include <cstdint> // for uint16_t #include <cstdint> // for uint16_t
#include <vector> #include <vector>
#include <mutex> // for std::mutex, std::lock_guard
#include "WebService.h" #include "WebService.h"
// 文件接收消息数据结构 // 文件接收消息数据结构
@@ -3494,9 +3495,53 @@ void CScreenSpyDlg::StopAudioPlayback()
#endif #endif
m_nAudioCompression = 0; m_nAudioCompression = 0;
// 重置网页端音频格式标志(线程安全的清理)
{
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
m_bAudioFormatSent = FALSE;
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
}
Mprintf("[ScreenSpy] 音频播放已停止\n"); Mprintf("[ScreenSpy] 音频播放已停止\n");
} }
void CScreenSpyDlg::DisableAudio()
{
// 复用 IDM_AUDIO_TOGGLE 的逻辑,但仅禁用
if (m_Settings.AudioEnabled) {
m_Settings.AudioEnabled = FALSE;
SendAudioCtrl(CYCLEAUDIO_DISABLE, 1);
StopAudioPlayback();
// 清理网页端格式状态(在 mutex 保护下)
{
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
m_bAudioFormatSent = FALSE;
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
}
Mprintf("[Audio Web] 禁用音频(来自 web 命令)\n");
}
}
void CScreenSpyDlg::EnableAudio()
{
// 复用 IDM_AUDIO_TOGGLE 的逻辑,但仅启用
if (!m_Settings.AudioEnabled) {
m_Settings.AudioEnabled = TRUE;
SendAudioCtrl(CYCLEAUDIO_ENABLE, 1);
// 强制重新发送格式信息(清理缓存)
{
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
m_bAudioFormatSent = FALSE;
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
}
Mprintf("[Audio Web] 启用音频(来自 web 命令)\n");
}
}
void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len) void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
{ {
if (len < 1) return; if (len < 1) return;
@@ -3535,12 +3580,20 @@ void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
UINT32 audioLen = len - offset; UINT32 audioLen = len - offset;
if (audioLen == 0) return; if (audioLen == 0) return;
// 保存"上线格式"字节Opus 模式下是原始压缩包PCM 模式下是原始 PCM
// 这就是要透传给 web 的数据 —— web 端用 MSE+WebM 直接播 Opus
// 不需要服务器解码后再发 PCM。本地 waveOut 仍然需要 PCM因此下面
// 还是会解码一遍。
BYTE* pWireData = pAudioData;
UINT32 wireLen = audioLen;
BYTE wireCompression = (BYTE)m_nAudioCompression;
// 帧对齐参数 // 帧对齐参数
DWORD blockAlign = m_AudioFormat.nBlockAlign; DWORD blockAlign = m_AudioFormat.nBlockAlign;
if (blockAlign == 0) blockAlign = 4; // 默认 stereo 16-bit if (blockAlign == 0) blockAlign = 4; // 默认 stereo 16-bit
#if USING_OPUS #if USING_OPUS
// Opus 解码 // Opus 解码(仅供本地 waveOut 使用web 仍会收到原始压缩包)
if (m_nAudioCompression == AUDIO_COMPRESS_OPUS && m_pOpusDecoder && m_pOpusDecodeBuffer) { if (m_nAudioCompression == AUDIO_COMPRESS_OPUS && m_pOpusDecoder && m_pOpusDecodeBuffer) {
COpusDecoder* pDecoder = (COpusDecoder*)m_pOpusDecoder; COpusDecoder* pDecoder = (COpusDecoder*)m_pOpusDecoder;
int decodedSamples = pDecoder->Decode(pAudioData, audioLen, m_pOpusDecodeBuffer, 960 * 2); int decodedSamples = pDecoder->Decode(pAudioData, audioLen, m_pOpusDecodeBuffer, 960 * 2);
@@ -3583,10 +3636,104 @@ void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
Mprintf("[Audio] 预缓冲完成,开始播放 (缓冲: %u bytes)\n", m_nRingDataLen); Mprintf("[Audio] 预缓冲完成,开始播放 (缓冲: %u bytes)\n", m_nRingDataLen);
} }
// 发送上线格式Opus 压缩包 / 或原始 PCM到网页
SendAudioToWeb(pWireData, wireLen, &m_AudioFormat, wireCompression);
// 填充可用的 waveOut 缓冲区 // 填充可用的 waveOut 缓冲区
FeedAudioBuffers(); FeedAudioBuffers();
} }
void CScreenSpyDlg::SendAudioToWeb(const BYTE* pAudioData, UINT32 len, const WAVEFORMATEX* pFormat, BYTE compression)
{
if (!WebService().IsRunning()) return;
if (!pAudioData || len == 0) return;
if (!m_ContextObject) return;
if (!m_Settings.AudioEnabled) return;
std::vector<BYTE> packet;
BOOL formatChanged = FALSE;
{
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
if (!m_bAudioFormatSent) {
formatChanged = TRUE;
} else if (pFormat && (
pFormat->nChannels != m_AudioFormatWeb.channels ||
pFormat->nSamplesPerSec != m_AudioFormatWeb.sampleRate ||
pFormat->wBitsPerSample != m_AudioFormatWeb.bitsPerSample ||
compression != m_AudioFormatWeb.compression)) {
formatChanged = TRUE;
}
// 第1字节是否包含格式信息
packet.push_back(formatChanged ? 1 : 0);
if (formatChanged && pFormat) {
if (pFormat->nChannels < 1 || pFormat->nChannels > 8 ||
pFormat->nSamplesPerSec < 8000 || pFormat->nSamplesPerSec > 48000 ||
pFormat->wBitsPerSample != 16) {
Mprintf("[Audio Web] Invalid format: ch=%d, sr=%d, bps=%d\n",
pFormat->nChannels, pFormat->nSamplesPerSec, pFormat->wBitsPerSample);
return;
}
// 12-byte AudioFormat 结构commands.h, pack(1)
AudioFormat fmt;
fmt.channels = (WORD)pFormat->nChannels;
fmt.sampleRate = (DWORD)pFormat->nSamplesPerSec;
fmt.bitsPerSample = (WORD)pFormat->wBitsPerSample;
// blockAlign 对 Opus 是 informational 的(包是变长压缩),按 PCM 推算填上即可。
fmt.blockAlign = (WORD)(fmt.channels * fmt.bitsPerSample / 8);
fmt.compression = compression;
fmt.reserved = 0;
BYTE* pFmt = (BYTE*)&fmt;
packet.insert(packet.end(), pFmt, pFmt + sizeof(fmt));
// padding byte: 保持后续音频数据落在偶数偏移上PCM 模式下 web 端
// 需要 Int16 对齐Opus 模式无所谓但保留兼容旧 web 解析)
packet.push_back(0);
m_AudioFormatWeb = fmt;
m_bAudioFormatSent = TRUE;
Mprintf("[Audio Web] Format sent: ch=%d, sr=%d Hz, compression=%d\n",
fmt.channels, fmt.sampleRate, fmt.compression);
}
} // 释放 mutex
// 添加音频数据(此操作不需要 mutex因为我们已经复制了所有需要的共享状态
packet.insert(packet.end(), pAudioData, pAudioData + len);
// 构造完整帧:[DeviceID:4][FrameType:1][DataLen:4][audio payload...]
// FrameType: 96 = TOKEN_SCREEN_AUDIO用于在网页端识别音频
std::vector<BYTE> frame;
uint64_t deviceID = GetClientID();
uint32_t audioDataLen = (uint32_t)packet.size();
uint8_t frameType = 96; // TOKEN_SCREEN_AUDIO
// [DeviceID:4] little-endian
frame.push_back((BYTE)(deviceID & 0xFF));
frame.push_back((BYTE)((deviceID >> 8) & 0xFF));
frame.push_back((BYTE)((deviceID >> 16) & 0xFF));
frame.push_back((BYTE)((deviceID >> 24) & 0xFF));
// [FrameType:1]
frame.push_back(frameType);
// [DataLen:4] little-endian
frame.push_back((BYTE)(audioDataLen & 0xFF));
frame.push_back((BYTE)((audioDataLen >> 8) & 0xFF));
frame.push_back((BYTE)((audioDataLen >> 16) & 0xFF));
frame.push_back((BYTE)((audioDataLen >> 24) & 0xFF));
// [audio payload]
frame.insert(frame.end(), packet.begin(), packet.end());
// 广播到所有网页客户端
WebService().BroadcastH264Frame(deviceID, frame.data(), frame.size());
}
void CScreenSpyDlg::FeedAudioBuffers() void CScreenSpyDlg::FeedAudioBuffers()
{ {
if (!m_bAudioPlaying || !m_hWaveOut || !m_pRingBuf) return; if (!m_bAudioPlaying || !m_hWaveOut || !m_pRingBuf) return;

View File

@@ -9,6 +9,7 @@
#include "2015RemoteDlg.h" #include "2015RemoteDlg.h"
#include "common/config.h" #include "common/config.h"
#include "common/commands.h" // 包含 AudioFormat 定义
extern "C" extern "C"
{ {
@@ -349,11 +350,22 @@ public:
short* m_pOpusDecodeBuffer = nullptr; // Opus 解码输出缓冲区 short* m_pOpusDecodeBuffer = nullptr; // Opus 解码输出缓冲区
#endif #endif
// 网页端音频发送状态
BOOL m_bAudioFormatSent = FALSE; // 是否已发送格式信息到网页
AudioFormat m_AudioFormatWeb = {}; // 上次发送给网页的格式
// 音频到网页的多线程同步
std::mutex m_AudioWebMutex; // 保护音频发送状态的互斥锁
// 注意m_Settings.AudioEnabled 是全局的音频启用/禁用状态
void OnAudioData(BYTE* pData, UINT32 len); // 处理音频数据 void OnAudioData(BYTE* pData, UINT32 len); // 处理音频数据
BOOL InitAudioPlayback(const AudioFormat* fmt); // 初始化音频播放 BOOL InitAudioPlayback(const AudioFormat* fmt); // 初始化音频播放
void StopAudioPlayback(); // 停止音频播放 void StopAudioPlayback(); // 停止音频播放
void DisableAudio(); // 禁用音频(从网页命令)
void EnableAudio(); // 启用音频(从网页命令)
void SendAudioCtrl(BYTE enable, BYTE persist); // 发送音频控制命令 void SendAudioCtrl(BYTE enable, BYTE persist); // 发送音频控制命令
void FeedAudioBuffers(); // 填充音频缓冲区 void FeedAudioBuffers(); // 填充音频缓冲区
void SendAudioToWeb(const BYTE* pAudioData, UINT32 len, const WAVEFORMATEX* pFormat, BYTE compression); // 发送音频到网页 (compression=AudioCompression)
int GetClientRTT(); // 获取客户端RTT(ms) int GetClientRTT(); // 获取客户端RTT(ms)
void EvaluateQuality(); // 评估并调整质量 void EvaluateQuality(); // 评估并调整质量

View File

@@ -1283,12 +1283,74 @@
<script src="/static/xterm.js"></script> <script src="/static/xterm.js"></script>
<script src="/static/xterm-fit.js"></script> <script src="/static/xterm-fit.js"></script>
<!-- Opus codec for audio decompression -->
<script src="https://cdn.jsdelivr.net/npm/opus.js@0.5.0/dist/opus.js"></script>
<script> <script>
let ws = null, token = null, decoder = null, devices = [], currentDevice = null; let ws = null, token = null, decoder = null, devices = [], currentDevice = null;
let frameCount = 0, lastFrameTime = 0, fps = 0, pingInterval = null; let frameCount = 0, lastFrameTime = 0, fps = 0, pingInterval = null;
const canvas = document.getElementById('screen-canvas'); const canvas = document.getElementById('screen-canvas');
const ctx = canvas.getContext('2d'); const ctx = canvas.getContext('2d');
// ====== Audio & Video Implementation ======
//
// - Video: H.264 / AV1 → VideoDecoder Web API → canvas
// - Audio: client encodes PCM → Opus, server forwards raw Opus packets
// to web, web wraps each packet in a WebM SimpleBlock and
// feeds it to MediaSource → <audio> element (browser decodes
// Opus natively, plays via standard media-element pipeline).
//
// WS binary frame layout (matches C++ ScreenSpyDlg.cpp):
// Video : [deviceID:4][frameType:1][dataLen:4][videoData:N]
// Audio : [deviceID:4][frameType=96:1][dataLen:4]
// [hasFormat:1][AudioFormat:12][padding:1]?[opusPacket:N]
// Term : [magic:4='TRM1'][terminalData:N]
//
// AudioFormat (12 bytes, commands.h, pack(1)):
// channels:2 sampleRate:4 bitsPerSample:2 blockAlign:2
// compression:1 (0=PCM unsupported by web, 1=Opus) reserved:1
// MSE + WebM/Opus playback. Raw Opus packets arrive over WS; we wrap
// each one in a minimal WebM container in JS and feed it to a
// SourceBuffer attached to a hidden <audio> element. The browser
// decodes Opus natively. Tested on desktop Chrome; mobile playback
// is a known follow-up (see commit notes).
let audioFormat = null; // { compression, channels, sampleRate, bitsPerSample, blockAlign }
let audioEnabled = true; // Audio on/off flag (set by UI)
let syncDrift = 0; // A/V sync monitoring (milliseconds)
let _audioElement = null; // hidden <audio> sink
let _mediaSource = null; // MediaSource attached to _audioElement
let _sourceBuffer = null; // SourceBuffer (Opus in WebM)
const _sourceBufferQueue = []; // appendBuffer queue (one in-flight at a time)
let _sourceBufferBusy = false;
let _initSegmentSent = false; // first init segment appended for current format
let _opusTimestampMs = 0; // running absolute cluster timestamp (ms)
const OPUS_FRAME_MS = 20; // 960 samples @ 48k — matches client encoder
const _pendingOpusPackets = []; // packets received before SourceBuffer is ready
// Browser autoplay policies require an HTMLAudioElement to be created
// and .play()'d synchronously inside a user-gesture event handler.
// We hook the first click/keydown to spin up the element + MediaSource.
// Subsequent activity (e.g. tab regaining focus) re-issues play().
function installAudioGestureUnlock() {
const onGesture = () => {
if (!_audioElement) {
try {
_setupAudioElementAndMediaSource();
console.log('[MSE] <audio> + MediaSource set up by gesture');
} catch (e) {
console.error('[MSE] setup failed:', e && e.message);
}
} else if (_audioElement.paused) {
_audioElement.play().catch(() => {});
}
};
const opts = { passive: true, capture: true };
window.addEventListener('click', onGesture, opts);
window.addEventListener('keydown', onGesture, opts);
}
installAudioGestureUnlock();
// Pagination and filter state // Pagination and filter state
let currentPage = 1; let currentPage = 1;
let viewMode = 'grid'; // 'grid' or 'list' let viewMode = 'grid'; // 'grid' or 'list'
@@ -1409,7 +1471,7 @@
} }
} }
}; };
ws.onclose = () => { stopPingInterval(); updateWsStatus('disconnected'); scheduleReconnect(); }; ws.onclose = () => { stopPingInterval(); updateWsStatus('disconnected'); stopAllAudio(); audioFormat = null; scheduleReconnect(); };
ws.onerror = (e) => console.error('WS error:', e); ws.onerror = (e) => console.error('WS error:', e);
ws.onmessage = (event) => { ws.onmessage = (event) => {
if (typeof event.data === 'string') handleSignaling(JSON.parse(event.data)); if (typeof event.data === 'string') handleSignaling(JSON.parse(event.data));
@@ -1649,16 +1711,294 @@
return videoBytes[0] === 0x00 ? 'avc' : 'av1'; return videoBytes[0] === 0x00 ? 'avc' : 'av1';
} }
// ============================================================
// Minimal WebM-Opus muxer: wraps each Opus packet in a one-block
// Cluster so it can be fed to a SourceBuffer of type
// 'audio/webm; codecs="opus"'. The init segment (EBML header +
// Segment header + Tracks with OpusHead) is built once when the
// format is known and appended before any media clusters.
// ============================================================
const WebMMuxer = (function () {
// Variable-length integer (EBML VINT). Marker bit selects byte count.
function vint(value) {
if (value < 0x7F) return [0x80 | value];
if (value < 0x3FFF) return [0x40 | (value >> 8), value & 0xFF];
if (value < 0x1FFFFF) return [0x20 | (value >> 16), (value >> 8) & 0xFF, value & 0xFF];
if (value < 0x0FFFFFFF) return [0x10 | (value >> 24), (value >> 16) & 0xFF, (value >> 8) & 0xFF, value & 0xFF];
// 8-byte VINT for larger values (we don't usually need this)
const out = [0x01];
for (let i = 6; i >= 0; i--) out.push(Math.floor(value / Math.pow(2, i * 8)) & 0xFF);
return out;
}
// Unsigned int big-endian, n bytes
function uintBE(value, n) {
const out = new Array(n);
for (let i = n - 1; i >= 0; i--) { out[i] = value & 0xFF; value = Math.floor(value / 256); }
return out;
}
// 64-bit float big-endian
function f64BE(value) {
const buf = new ArrayBuffer(8);
new DataView(buf).setFloat64(0, value, false);
return Array.from(new Uint8Array(buf));
}
// EBML element = ID + size(VINT) + payload
function elem(idBytes, payload) {
const sz = vint(payload.length);
const out = new Array(idBytes.length + sz.length + payload.length);
let i = 0;
for (const b of idBytes) out[i++] = b;
for (const b of sz) out[i++] = b;
for (const b of payload) out[i++] = b;
return out;
}
// OpusHead codec-private structure (19 bytes). Per WebM/Opus spec,
// the authoritative encoder delay is CodecDelay (in ns) in the
// TrackEntry; pre-skip here is left at 0 to avoid double-skipping.
function opusHead(sampleRate, channels) {
return [
0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64, // "OpusHead"
0x01, // version
channels & 0xFF, // channel count
0x00, 0x00, // pre-skip (use CodecDelay instead)
sampleRate & 0xFF, (sampleRate >> 8) & 0xFF,
(sampleRate >> 16) & 0xFF, (sampleRate >> 24) & 0xFF,
0x00, 0x00, // output gain (LE)
0x00 // channel mapping family
];
}
function buildInitSegment(sampleRate, channels) {
const ebml = elem([0x1A, 0x45, 0xDF, 0xA3], [].concat(
elem([0x42, 0x86], [0x01]), // EBMLVersion
elem([0x42, 0xF7], [0x01]), // EBMLReadVersion
elem([0x42, 0xF2], [0x04]), // EBMLMaxIDLength
elem([0x42, 0xF3], [0x08]), // EBMLMaxSizeLength
elem([0x42, 0x82], [0x77, 0x65, 0x62, 0x6D]), // DocType "webm"
elem([0x42, 0x87], [0x04]), // DocTypeVersion
elem([0x42, 0x85], [0x02]) // DocTypeReadVersion
));
const info = elem([0x15, 0x49, 0xA9, 0x66], [].concat(
elem([0x2A, 0xD7, 0xB1], uintBE(1000000, 3)), // TimecodeScale 1ms
elem([0x4D, 0x80], [0x59, 0x61, 0x6D, 0x61]), // MuxingApp "Yama"
elem([0x57, 0x41], [0x59, 0x61, 0x6D, 0x61]) // WritingApp "Yama"
));
const trackEntry = [].concat(
elem([0xD7], [0x01]), // TrackNumber 1
elem([0x73, 0xC5], uintBE(1, 1)), // TrackUID 1
elem([0x83], [0x02]), // TrackType 2 (audio)
elem([0xB9], [0x01]), // FlagEnabled
elem([0x88], [0x01]), // FlagDefault
elem([0x9C], [0x00]), // FlagLacing 0
elem([0x86], [0x41, 0x5F, 0x4F, 0x50, 0x55, 0x53]), // CodecID "A_OPUS"
elem([0x63, 0xA2], opusHead(sampleRate, channels)), // CodecPrivate
elem([0x56, 0xAA], uintBE(6500000, 3)), // CodecDelay 6.5ms (ns)
elem([0x56, 0xBB], uintBE(80000000, 4)), // SeekPreRoll 80ms (ns)
elem([0xE1], [].concat( // Audio
elem([0xB5], f64BE(sampleRate)), // SamplingFrequency
elem([0x9F], [channels & 0xFF]) // Channels
))
);
const tracks = elem([0x16, 0x54, 0xAE, 0x6B], elem([0xAE], trackEntry));
// Segment uses unknown-size signal so we can stream clusters indefinitely
const segmentOpen = [0x18, 0x53, 0x80, 0x67,
0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
return new Uint8Array([].concat(ebml, segmentOpen, info, tracks));
}
function buildCluster(opusBytes, absMs) {
const simpleBlock = elem([0xA3], [].concat(
[0x81, 0x00, 0x00, 0x80], // TrackNumber=1, ts=0, flags=keyframe
Array.from(opusBytes)
));
const cluster = elem([0x1F, 0x43, 0xB6, 0x75], [].concat(
elem([0xE7], uintBE(absMs, 4)), // Timestamp (absolute, ms)
simpleBlock
));
return new Uint8Array(cluster);
}
return { buildInitSegment, buildCluster };
})();
// Create the hidden <audio> + MediaSource pair INSIDE a user-gesture
// call stack. Must complete .play() synchronously before any await.
function _setupAudioElementAndMediaSource() {
_audioElement = document.createElement('audio');
_audioElement.autoplay = true;
_audioElement.volume = 1.0;
_audioElement.style.display = 'none';
document.body.appendChild(_audioElement);
_mediaSource = new MediaSource();
_mediaSource.addEventListener('sourceopen', _onSourceOpen);
_audioElement.src = URL.createObjectURL(_mediaSource);
_audioElement.play().then(
() => console.log('[MSE] audio.play() ok'),
e => console.error('[MSE] audio.play() rejected:', e && e.message)
);
}
function _onSourceOpen() {
console.log('[MSE] sourceopen, readyState=' + (_mediaSource && _mediaSource.readyState));
if (audioFormat && audioFormat.compression === 1) {
_addSourceBufferAndInit();
}
}
function _addSourceBufferAndInit() {
if (!_mediaSource || _mediaSource.readyState !== 'open' || _sourceBuffer) return;
const mime = 'audio/webm; codecs="opus"';
if (!window.MediaSource || !MediaSource.isTypeSupported(mime)) {
console.error('[MSE] ' + mime + ' not supported by this browser');
return;
}
try {
_sourceBuffer = _mediaSource.addSourceBuffer(mime);
} catch (e) {
console.error('[MSE] addSourceBuffer failed:', e && e.message);
return;
}
_sourceBuffer.addEventListener('updateend', () => {
_sourceBufferBusy = false;
_flushSourceBufferQueue();
});
_sourceBuffer.addEventListener('error', e => console.error('[MSE] sourceBuffer error', e));
// Init segment first
_enqueueAppend(WebMMuxer.buildInitSegment(audioFormat.sampleRate, audioFormat.channels));
_initSegmentSent = true;
_opusTimestampMs = 0;
// Flush packets that arrived before SourceBuffer was ready
while (_pendingOpusPackets.length > 0) {
const pkt = _pendingOpusPackets.shift();
_enqueueAppend(WebMMuxer.buildCluster(pkt, _opusTimestampMs));
_opusTimestampMs += OPUS_FRAME_MS;
}
console.log('[MSE] SourceBuffer ready, init segment + ' +
(_opusTimestampMs / OPUS_FRAME_MS) + ' queued packets appended');
}
function _enqueueAppend(data) {
_sourceBufferQueue.push(data);
_flushSourceBufferQueue();
}
function _flushSourceBufferQueue() {
if (!_sourceBuffer || _sourceBufferBusy) return;
if (_sourceBufferQueue.length === 0) return;
const next = _sourceBufferQueue.shift();
_sourceBufferBusy = true;
try {
_sourceBuffer.appendBuffer(next);
} catch (e) {
console.error('[MSE] appendBuffer threw:', e && e.message);
_sourceBufferBusy = false;
}
}
function pushOpusPacket(opusBytes) {
if (!audioFormat || audioFormat.compression !== 1) return;
if (_sourceBuffer && _initSegmentSent) {
_enqueueAppend(WebMMuxer.buildCluster(opusBytes, _opusTimestampMs));
_opusTimestampMs += OPUS_FRAME_MS;
} else {
// Stash until SourceBuffer is ready. Cap at ~3s of audio.
const maxQueued = Math.ceil(3000 / OPUS_FRAME_MS);
while (_pendingOpusPackets.length >= maxQueued) _pendingOpusPackets.shift();
_pendingOpusPackets.push(new Uint8Array(opusBytes));
}
}
// Remove the SourceBuffer (so a new format/codec can be set up) but
// KEEP the same MediaSource and <audio> element. They hold our
// gesture-acquired play() permission — recreating either would
// require a fresh user tap on iOS. Never call endOfStream(), that
// transitions MediaSource to 'ended' which forbids future
// addSourceBuffer().
function stopAllAudio() {
if (_sourceBuffer && _mediaSource && _mediaSource.readyState === 'open') {
try { _mediaSource.removeSourceBuffer(_sourceBuffer); } catch (e) {}
}
_sourceBuffer = null;
_sourceBufferQueue.length = 0;
_sourceBufferBusy = false;
_initSegmentSent = false;
_opusTimestampMs = 0;
_pendingOpusPackets.length = 0;
}
function handleAudioFrame(data) {
if (!audioEnabled) return;
const u8 = new Uint8Array(data);
if (u8.length < 1) return;
let offset = 0;
const hasFormat = u8[offset++];
if (hasFormat) {
if (u8.length < offset + 12) {
console.warn('[Audio] truncated format header');
return;
}
// AudioFormat (12 bytes, commands.h, pack(1))
const view = new DataView(data, offset, 12);
const channels = view.getUint16(0, true);
const sampleRate = view.getUint32(2, true);
const bitsPerSample = view.getUint16(6, true);
const blockAlign = view.getUint16(8, true);
const compression = view.getUint8(10);
offset += 12;
offset += 1; // padding byte
if (channels === 0 || channels > 8) { console.error('[Audio] bad channels:', channels); return; }
if (sampleRate < 8000 || sampleRate > 48000) { console.error('[Audio] bad sampleRate:', sampleRate); return; }
const fmt = { compression, channels, sampleRate, bitsPerSample, blockAlign };
const needReinit = !audioFormat ||
audioFormat.sampleRate !== fmt.sampleRate ||
audioFormat.channels !== fmt.channels ||
audioFormat.compression !== fmt.compression;
audioFormat = fmt;
if (needReinit) {
if (fmt.compression !== 1) {
console.error('[Audio] PCM payload not supported by web; set USING_OPUS=1 on client');
stopAllAudio();
return;
}
stopAllAudio();
if (_mediaSource && _mediaSource.readyState === 'open') {
_addSourceBufferAndInit();
}
// else: sourceopen handler will pick up audioFormat when it fires
console.log('[Audio] Format → ch=' + fmt.channels +
' sr=' + fmt.sampleRate + ' compression=' + fmt.compression);
}
}
if (!audioFormat || audioFormat.compression !== 1) return;
if (u8.length <= offset) return;
// The remaining bytes are one Opus packet (variable length).
const opusBytes = new Uint8Array(data, offset);
pushOpusPacket(opusBytes);
}
function handleBinaryFrame(data) { function handleBinaryFrame(data) {
// 终端输出帧4 字节 magic 'TRM1' (0x54 0x52 0x4D 0x31) → 转发到 xterm。 // 终端输出帧4 字节 magic 'TRM1' (0x54 0x52 0x4D 0x31) → 转发到 xterm。
// 视频帧首 4 字节是 deviceID (uint32 LE)撞这个具体值的概率极低4 字节 magic
// 比单字节前缀安全得多,无需额外的状态校验。
const u8 = new Uint8Array(data); const u8 = new Uint8Array(data);
if (u8.length >= 4 && if (u8.length >= 4 &&
u8[0] === 0x54 && u8[1] === 0x52 && u8[2] === 0x4D && u8[3] === 0x31) { u8[0] === 0x54 && u8[1] === 0x52 && u8[2] === 0x4D && u8[3] === 0x31) {
if (termState && termState.term) termState.term.write(u8.subarray(4)); if (termState && termState.term) termState.term.write(u8.subarray(4));
return; return;
} }
// Audio frame: frameType byte at offset 4 indicates audio (96 = TOKEN_SCREEN_AUDIO)
// Full frame format: [deviceID:4][frameType:1][dataLen:4][hasFormat:1][AudioFormat?][audio_data...]
if (u8.length > 4 && u8[4] === 96) {
// Skip frame header (9 bytes) and pass audio payload to handler
const audioPayload = data.slice(9);
handleAudioFrame(audioPayload);
return;
}
// Video frame: [deviceID:4][frameType:1][dataLen:4][videoData...]
const view = new DataView(data); const view = new DataView(data);
const deviceId = view.getUint32(0, true); const deviceId = view.getUint32(0, true);
const frameType = view.getUint8(4); const frameType = view.getUint8(4);