Feature(audio): forward client PCM to web viewers with continuous playback
This commit is contained in:
@@ -2609,7 +2609,8 @@ DWORD WINAPI CScreenManager::AudioThreadProc(LPVOID lpParam)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (pThis->m_pCaptureClient == nullptr)
|
||||
break;
|
||||
pThis->m_pCaptureClient->ReleaseBuffer(numFramesAvailable);
|
||||
|
||||
hr = pThis->m_pCaptureClient->GetNextPacketSize(&packetLength);
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <md5.h>
|
||||
#include <cstdint> // for uint16_t
|
||||
#include <vector>
|
||||
#include <mutex> // for std::mutex, std::lock_guard
|
||||
#include "WebService.h"
|
||||
|
||||
// 文件接收消息数据结构
|
||||
@@ -3494,9 +3495,53 @@ void CScreenSpyDlg::StopAudioPlayback()
|
||||
#endif
|
||||
m_nAudioCompression = 0;
|
||||
|
||||
// 重置网页端音频格式标志(线程安全的清理)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
|
||||
m_bAudioFormatSent = FALSE;
|
||||
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
|
||||
}
|
||||
|
||||
Mprintf("[ScreenSpy] 音频播放已停止\n");
|
||||
}
|
||||
|
||||
void CScreenSpyDlg::DisableAudio()
|
||||
{
|
||||
// 复用 IDM_AUDIO_TOGGLE 的逻辑,但仅禁用
|
||||
if (m_Settings.AudioEnabled) {
|
||||
m_Settings.AudioEnabled = FALSE;
|
||||
SendAudioCtrl(CYCLEAUDIO_DISABLE, 1);
|
||||
StopAudioPlayback();
|
||||
|
||||
// 清理网页端格式状态(在 mutex 保护下)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
|
||||
m_bAudioFormatSent = FALSE;
|
||||
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
|
||||
}
|
||||
|
||||
Mprintf("[Audio Web] 禁用音频(来自 web 命令)\n");
|
||||
}
|
||||
}
|
||||
|
||||
void CScreenSpyDlg::EnableAudio()
|
||||
{
|
||||
// 复用 IDM_AUDIO_TOGGLE 的逻辑,但仅启用
|
||||
if (!m_Settings.AudioEnabled) {
|
||||
m_Settings.AudioEnabled = TRUE;
|
||||
SendAudioCtrl(CYCLEAUDIO_ENABLE, 1);
|
||||
|
||||
// 强制重新发送格式信息(清理缓存)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
|
||||
m_bAudioFormatSent = FALSE;
|
||||
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
|
||||
}
|
||||
|
||||
Mprintf("[Audio Web] 启用音频(来自 web 命令)\n");
|
||||
}
|
||||
}
|
||||
|
||||
void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
|
||||
{
|
||||
if (len < 1) return;
|
||||
@@ -3535,12 +3580,20 @@ void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
|
||||
UINT32 audioLen = len - offset;
|
||||
if (audioLen == 0) return;
|
||||
|
||||
// 保存"上线格式"字节(Opus 模式下是原始压缩包,PCM 模式下是原始 PCM)。
|
||||
// 这就是要透传给 web 的数据 —— web 端用 MSE+WebM 直接播 Opus,
|
||||
// 不需要服务器解码后再发 PCM。本地 waveOut 仍然需要 PCM,因此下面
|
||||
// 还是会解码一遍。
|
||||
BYTE* pWireData = pAudioData;
|
||||
UINT32 wireLen = audioLen;
|
||||
BYTE wireCompression = (BYTE)m_nAudioCompression;
|
||||
|
||||
// 帧对齐参数
|
||||
DWORD blockAlign = m_AudioFormat.nBlockAlign;
|
||||
if (blockAlign == 0) blockAlign = 4; // 默认 stereo 16-bit
|
||||
|
||||
#if USING_OPUS
|
||||
// Opus 解码
|
||||
// Opus 解码(仅供本地 waveOut 使用;web 仍会收到原始压缩包)
|
||||
if (m_nAudioCompression == AUDIO_COMPRESS_OPUS && m_pOpusDecoder && m_pOpusDecodeBuffer) {
|
||||
COpusDecoder* pDecoder = (COpusDecoder*)m_pOpusDecoder;
|
||||
int decodedSamples = pDecoder->Decode(pAudioData, audioLen, m_pOpusDecodeBuffer, 960 * 2);
|
||||
@@ -3583,10 +3636,104 @@ void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
|
||||
Mprintf("[Audio] 预缓冲完成,开始播放 (缓冲: %u bytes)\n", m_nRingDataLen);
|
||||
}
|
||||
|
||||
// 发送上线格式(Opus 压缩包 / 或原始 PCM)到网页
|
||||
SendAudioToWeb(pWireData, wireLen, &m_AudioFormat, wireCompression);
|
||||
|
||||
// 填充可用的 waveOut 缓冲区
|
||||
FeedAudioBuffers();
|
||||
}
|
||||
|
||||
void CScreenSpyDlg::SendAudioToWeb(const BYTE* pAudioData, UINT32 len, const WAVEFORMATEX* pFormat, BYTE compression)
|
||||
{
|
||||
if (!WebService().IsRunning()) return;
|
||||
if (!pAudioData || len == 0) return;
|
||||
if (!m_ContextObject) return;
|
||||
if (!m_Settings.AudioEnabled) return;
|
||||
|
||||
std::vector<BYTE> packet;
|
||||
BOOL formatChanged = FALSE;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
|
||||
|
||||
if (!m_bAudioFormatSent) {
|
||||
formatChanged = TRUE;
|
||||
} else if (pFormat && (
|
||||
pFormat->nChannels != m_AudioFormatWeb.channels ||
|
||||
pFormat->nSamplesPerSec != m_AudioFormatWeb.sampleRate ||
|
||||
pFormat->wBitsPerSample != m_AudioFormatWeb.bitsPerSample ||
|
||||
compression != m_AudioFormatWeb.compression)) {
|
||||
formatChanged = TRUE;
|
||||
}
|
||||
|
||||
// 第1字节:是否包含格式信息
|
||||
packet.push_back(formatChanged ? 1 : 0);
|
||||
|
||||
if (formatChanged && pFormat) {
|
||||
if (pFormat->nChannels < 1 || pFormat->nChannels > 8 ||
|
||||
pFormat->nSamplesPerSec < 8000 || pFormat->nSamplesPerSec > 48000 ||
|
||||
pFormat->wBitsPerSample != 16) {
|
||||
Mprintf("[Audio Web] Invalid format: ch=%d, sr=%d, bps=%d\n",
|
||||
pFormat->nChannels, pFormat->nSamplesPerSec, pFormat->wBitsPerSample);
|
||||
return;
|
||||
}
|
||||
|
||||
// 12-byte AudioFormat 结构(commands.h, pack(1))
|
||||
AudioFormat fmt;
|
||||
fmt.channels = (WORD)pFormat->nChannels;
|
||||
fmt.sampleRate = (DWORD)pFormat->nSamplesPerSec;
|
||||
fmt.bitsPerSample = (WORD)pFormat->wBitsPerSample;
|
||||
// blockAlign 对 Opus 是 informational 的(包是变长压缩),按 PCM 推算填上即可。
|
||||
fmt.blockAlign = (WORD)(fmt.channels * fmt.bitsPerSample / 8);
|
||||
fmt.compression = compression;
|
||||
fmt.reserved = 0;
|
||||
|
||||
BYTE* pFmt = (BYTE*)&fmt;
|
||||
packet.insert(packet.end(), pFmt, pFmt + sizeof(fmt));
|
||||
// padding byte: 保持后续音频数据落在偶数偏移上(PCM 模式下 web 端
|
||||
// 需要 Int16 对齐;Opus 模式无所谓但保留兼容旧 web 解析)
|
||||
packet.push_back(0);
|
||||
|
||||
m_AudioFormatWeb = fmt;
|
||||
m_bAudioFormatSent = TRUE;
|
||||
|
||||
Mprintf("[Audio Web] Format sent: ch=%d, sr=%d Hz, compression=%d\n",
|
||||
fmt.channels, fmt.sampleRate, fmt.compression);
|
||||
}
|
||||
} // 释放 mutex
|
||||
|
||||
// 添加音频数据(此操作不需要 mutex,因为我们已经复制了所有需要的共享状态)
|
||||
packet.insert(packet.end(), pAudioData, pAudioData + len);
|
||||
|
||||
// 构造完整帧:[DeviceID:4][FrameType:1][DataLen:4][audio payload...]
|
||||
// FrameType: 96 = TOKEN_SCREEN_AUDIO,用于在网页端识别音频
|
||||
std::vector<BYTE> frame;
|
||||
|
||||
uint64_t deviceID = GetClientID();
|
||||
uint32_t audioDataLen = (uint32_t)packet.size();
|
||||
uint8_t frameType = 96; // TOKEN_SCREEN_AUDIO
|
||||
|
||||
// [DeviceID:4] little-endian
|
||||
frame.push_back((BYTE)(deviceID & 0xFF));
|
||||
frame.push_back((BYTE)((deviceID >> 8) & 0xFF));
|
||||
frame.push_back((BYTE)((deviceID >> 16) & 0xFF));
|
||||
frame.push_back((BYTE)((deviceID >> 24) & 0xFF));
|
||||
|
||||
// [FrameType:1]
|
||||
frame.push_back(frameType);
|
||||
|
||||
// [DataLen:4] little-endian
|
||||
frame.push_back((BYTE)(audioDataLen & 0xFF));
|
||||
frame.push_back((BYTE)((audioDataLen >> 8) & 0xFF));
|
||||
frame.push_back((BYTE)((audioDataLen >> 16) & 0xFF));
|
||||
frame.push_back((BYTE)((audioDataLen >> 24) & 0xFF));
|
||||
|
||||
// [audio payload]
|
||||
frame.insert(frame.end(), packet.begin(), packet.end());
|
||||
|
||||
// 广播到所有网页客户端
|
||||
WebService().BroadcastH264Frame(deviceID, frame.data(), frame.size());
|
||||
}
|
||||
|
||||
void CScreenSpyDlg::FeedAudioBuffers()
|
||||
{
|
||||
if (!m_bAudioPlaying || !m_hWaveOut || !m_pRingBuf) return;
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "2015RemoteDlg.h"
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/commands.h" // 包含 AudioFormat 定义
|
||||
|
||||
extern "C"
|
||||
{
|
||||
@@ -349,11 +350,22 @@ public:
|
||||
short* m_pOpusDecodeBuffer = nullptr; // Opus 解码输出缓冲区
|
||||
#endif
|
||||
|
||||
// 网页端音频发送状态
|
||||
BOOL m_bAudioFormatSent = FALSE; // 是否已发送格式信息到网页
|
||||
AudioFormat m_AudioFormatWeb = {}; // 上次发送给网页的格式
|
||||
|
||||
// 音频到网页的多线程同步
|
||||
std::mutex m_AudioWebMutex; // 保护音频发送状态的互斥锁
|
||||
// 注意:m_Settings.AudioEnabled 是全局的音频启用/禁用状态
|
||||
|
||||
void OnAudioData(BYTE* pData, UINT32 len); // 处理音频数据
|
||||
BOOL InitAudioPlayback(const AudioFormat* fmt); // 初始化音频播放
|
||||
void StopAudioPlayback(); // 停止音频播放
|
||||
void DisableAudio(); // 禁用音频(从网页命令)
|
||||
void EnableAudio(); // 启用音频(从网页命令)
|
||||
void SendAudioCtrl(BYTE enable, BYTE persist); // 发送音频控制命令
|
||||
void FeedAudioBuffers(); // 填充音频缓冲区
|
||||
void SendAudioToWeb(const BYTE* pAudioData, UINT32 len, const WAVEFORMATEX* pFormat, BYTE compression); // 发送音频到网页 (compression=AudioCompression)
|
||||
|
||||
int GetClientRTT(); // 获取客户端RTT(ms)
|
||||
void EvaluateQuality(); // 评估并调整质量
|
||||
|
||||
@@ -1283,12 +1283,74 @@
|
||||
<script src="/static/xterm.js"></script>
|
||||
<script src="/static/xterm-fit.js"></script>
|
||||
|
||||
<!-- Opus codec for audio decompression -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/opus.js@0.5.0/dist/opus.js"></script>
|
||||
|
||||
<script>
|
||||
let ws = null, token = null, decoder = null, devices = [], currentDevice = null;
|
||||
let frameCount = 0, lastFrameTime = 0, fps = 0, pingInterval = null;
|
||||
const canvas = document.getElementById('screen-canvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
// ====== Audio & Video Implementation ======
|
||||
//
|
||||
// - Video: H.264 / AV1 → VideoDecoder Web API → canvas
|
||||
// - Audio: client encodes PCM → Opus, server forwards raw Opus packets
|
||||
// to web, web wraps each packet in a WebM SimpleBlock and
|
||||
// feeds it to MediaSource → <audio> element (browser decodes
|
||||
// Opus natively, plays via standard media-element pipeline).
|
||||
//
|
||||
// WS binary frame layout (matches C++ ScreenSpyDlg.cpp):
|
||||
// Video : [deviceID:4][frameType:1][dataLen:4][videoData:N]
|
||||
// Audio : [deviceID:4][frameType=96:1][dataLen:4]
|
||||
// [hasFormat:1][AudioFormat:12][padding:1]?[opusPacket:N]
|
||||
// Term : [magic:4='TRM1'][terminalData:N]
|
||||
//
|
||||
// AudioFormat (12 bytes, commands.h, pack(1)):
|
||||
// channels:2 sampleRate:4 bitsPerSample:2 blockAlign:2
|
||||
// compression:1 (0=PCM unsupported by web, 1=Opus) reserved:1
|
||||
|
||||
// MSE + WebM/Opus playback. Raw Opus packets arrive over WS; we wrap
|
||||
// each one in a minimal WebM container in JS and feed it to a
|
||||
// SourceBuffer attached to a hidden <audio> element. The browser
|
||||
// decodes Opus natively. Tested on desktop Chrome; mobile playback
|
||||
// is a known follow-up (see commit notes).
|
||||
let audioFormat = null; // { compression, channels, sampleRate, bitsPerSample, blockAlign }
|
||||
let audioEnabled = true; // Audio on/off flag (set by UI)
|
||||
let syncDrift = 0; // A/V sync monitoring (milliseconds)
|
||||
let _audioElement = null; // hidden <audio> sink
|
||||
let _mediaSource = null; // MediaSource attached to _audioElement
|
||||
let _sourceBuffer = null; // SourceBuffer (Opus in WebM)
|
||||
const _sourceBufferQueue = []; // appendBuffer queue (one in-flight at a time)
|
||||
let _sourceBufferBusy = false;
|
||||
let _initSegmentSent = false; // first init segment appended for current format
|
||||
let _opusTimestampMs = 0; // running absolute cluster timestamp (ms)
|
||||
const OPUS_FRAME_MS = 20; // 960 samples @ 48k — matches client encoder
|
||||
const _pendingOpusPackets = []; // packets received before SourceBuffer is ready
|
||||
|
||||
// Browser autoplay policies require an HTMLAudioElement to be created
|
||||
// and .play()'d synchronously inside a user-gesture event handler.
|
||||
// We hook the first click/keydown to spin up the element + MediaSource.
|
||||
// Subsequent activity (e.g. tab regaining focus) re-issues play().
|
||||
function installAudioGestureUnlock() {
|
||||
const onGesture = () => {
|
||||
if (!_audioElement) {
|
||||
try {
|
||||
_setupAudioElementAndMediaSource();
|
||||
console.log('[MSE] <audio> + MediaSource set up by gesture');
|
||||
} catch (e) {
|
||||
console.error('[MSE] setup failed:', e && e.message);
|
||||
}
|
||||
} else if (_audioElement.paused) {
|
||||
_audioElement.play().catch(() => {});
|
||||
}
|
||||
};
|
||||
const opts = { passive: true, capture: true };
|
||||
window.addEventListener('click', onGesture, opts);
|
||||
window.addEventListener('keydown', onGesture, opts);
|
||||
}
|
||||
installAudioGestureUnlock();
|
||||
|
||||
// Pagination and filter state
|
||||
let currentPage = 1;
|
||||
let viewMode = 'grid'; // 'grid' or 'list'
|
||||
@@ -1409,7 +1471,7 @@
|
||||
}
|
||||
}
|
||||
};
|
||||
ws.onclose = () => { stopPingInterval(); updateWsStatus('disconnected'); scheduleReconnect(); };
|
||||
ws.onclose = () => { stopPingInterval(); updateWsStatus('disconnected'); stopAllAudio(); audioFormat = null; scheduleReconnect(); };
|
||||
ws.onerror = (e) => console.error('WS error:', e);
|
||||
ws.onmessage = (event) => {
|
||||
if (typeof event.data === 'string') handleSignaling(JSON.parse(event.data));
|
||||
@@ -1649,16 +1711,294 @@
|
||||
return videoBytes[0] === 0x00 ? 'avc' : 'av1';
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Minimal WebM-Opus muxer: wraps each Opus packet in a one-block
|
||||
// Cluster so it can be fed to a SourceBuffer of type
|
||||
// 'audio/webm; codecs="opus"'. The init segment (EBML header +
|
||||
// Segment header + Tracks with OpusHead) is built once when the
|
||||
// format is known and appended before any media clusters.
|
||||
// ============================================================
|
||||
const WebMMuxer = (function () {
|
||||
// Variable-length integer (EBML VINT). Marker bit selects byte count.
|
||||
function vint(value) {
|
||||
if (value < 0x7F) return [0x80 | value];
|
||||
if (value < 0x3FFF) return [0x40 | (value >> 8), value & 0xFF];
|
||||
if (value < 0x1FFFFF) return [0x20 | (value >> 16), (value >> 8) & 0xFF, value & 0xFF];
|
||||
if (value < 0x0FFFFFFF) return [0x10 | (value >> 24), (value >> 16) & 0xFF, (value >> 8) & 0xFF, value & 0xFF];
|
||||
// 8-byte VINT for larger values (we don't usually need this)
|
||||
const out = [0x01];
|
||||
for (let i = 6; i >= 0; i--) out.push(Math.floor(value / Math.pow(2, i * 8)) & 0xFF);
|
||||
return out;
|
||||
}
|
||||
// Unsigned int big-endian, n bytes
|
||||
function uintBE(value, n) {
|
||||
const out = new Array(n);
|
||||
for (let i = n - 1; i >= 0; i--) { out[i] = value & 0xFF; value = Math.floor(value / 256); }
|
||||
return out;
|
||||
}
|
||||
// 64-bit float big-endian
|
||||
function f64BE(value) {
|
||||
const buf = new ArrayBuffer(8);
|
||||
new DataView(buf).setFloat64(0, value, false);
|
||||
return Array.from(new Uint8Array(buf));
|
||||
}
|
||||
// EBML element = ID + size(VINT) + payload
|
||||
function elem(idBytes, payload) {
|
||||
const sz = vint(payload.length);
|
||||
const out = new Array(idBytes.length + sz.length + payload.length);
|
||||
let i = 0;
|
||||
for (const b of idBytes) out[i++] = b;
|
||||
for (const b of sz) out[i++] = b;
|
||||
for (const b of payload) out[i++] = b;
|
||||
return out;
|
||||
}
|
||||
// OpusHead codec-private structure (19 bytes). Per WebM/Opus spec,
|
||||
// the authoritative encoder delay is CodecDelay (in ns) in the
|
||||
// TrackEntry; pre-skip here is left at 0 to avoid double-skipping.
|
||||
function opusHead(sampleRate, channels) {
|
||||
return [
|
||||
0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64, // "OpusHead"
|
||||
0x01, // version
|
||||
channels & 0xFF, // channel count
|
||||
0x00, 0x00, // pre-skip (use CodecDelay instead)
|
||||
sampleRate & 0xFF, (sampleRate >> 8) & 0xFF,
|
||||
(sampleRate >> 16) & 0xFF, (sampleRate >> 24) & 0xFF,
|
||||
0x00, 0x00, // output gain (LE)
|
||||
0x00 // channel mapping family
|
||||
];
|
||||
}
|
||||
function buildInitSegment(sampleRate, channels) {
|
||||
const ebml = elem([0x1A, 0x45, 0xDF, 0xA3], [].concat(
|
||||
elem([0x42, 0x86], [0x01]), // EBMLVersion
|
||||
elem([0x42, 0xF7], [0x01]), // EBMLReadVersion
|
||||
elem([0x42, 0xF2], [0x04]), // EBMLMaxIDLength
|
||||
elem([0x42, 0xF3], [0x08]), // EBMLMaxSizeLength
|
||||
elem([0x42, 0x82], [0x77, 0x65, 0x62, 0x6D]), // DocType "webm"
|
||||
elem([0x42, 0x87], [0x04]), // DocTypeVersion
|
||||
elem([0x42, 0x85], [0x02]) // DocTypeReadVersion
|
||||
));
|
||||
const info = elem([0x15, 0x49, 0xA9, 0x66], [].concat(
|
||||
elem([0x2A, 0xD7, 0xB1], uintBE(1000000, 3)), // TimecodeScale 1ms
|
||||
elem([0x4D, 0x80], [0x59, 0x61, 0x6D, 0x61]), // MuxingApp "Yama"
|
||||
elem([0x57, 0x41], [0x59, 0x61, 0x6D, 0x61]) // WritingApp "Yama"
|
||||
));
|
||||
const trackEntry = [].concat(
|
||||
elem([0xD7], [0x01]), // TrackNumber 1
|
||||
elem([0x73, 0xC5], uintBE(1, 1)), // TrackUID 1
|
||||
elem([0x83], [0x02]), // TrackType 2 (audio)
|
||||
elem([0xB9], [0x01]), // FlagEnabled
|
||||
elem([0x88], [0x01]), // FlagDefault
|
||||
elem([0x9C], [0x00]), // FlagLacing 0
|
||||
elem([0x86], [0x41, 0x5F, 0x4F, 0x50, 0x55, 0x53]), // CodecID "A_OPUS"
|
||||
elem([0x63, 0xA2], opusHead(sampleRate, channels)), // CodecPrivate
|
||||
elem([0x56, 0xAA], uintBE(6500000, 3)), // CodecDelay 6.5ms (ns)
|
||||
elem([0x56, 0xBB], uintBE(80000000, 4)), // SeekPreRoll 80ms (ns)
|
||||
elem([0xE1], [].concat( // Audio
|
||||
elem([0xB5], f64BE(sampleRate)), // SamplingFrequency
|
||||
elem([0x9F], [channels & 0xFF]) // Channels
|
||||
))
|
||||
);
|
||||
const tracks = elem([0x16, 0x54, 0xAE, 0x6B], elem([0xAE], trackEntry));
|
||||
// Segment uses unknown-size signal so we can stream clusters indefinitely
|
||||
const segmentOpen = [0x18, 0x53, 0x80, 0x67,
|
||||
0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
|
||||
return new Uint8Array([].concat(ebml, segmentOpen, info, tracks));
|
||||
}
|
||||
function buildCluster(opusBytes, absMs) {
|
||||
const simpleBlock = elem([0xA3], [].concat(
|
||||
[0x81, 0x00, 0x00, 0x80], // TrackNumber=1, ts=0, flags=keyframe
|
||||
Array.from(opusBytes)
|
||||
));
|
||||
const cluster = elem([0x1F, 0x43, 0xB6, 0x75], [].concat(
|
||||
elem([0xE7], uintBE(absMs, 4)), // Timestamp (absolute, ms)
|
||||
simpleBlock
|
||||
));
|
||||
return new Uint8Array(cluster);
|
||||
}
|
||||
return { buildInitSegment, buildCluster };
|
||||
})();
|
||||
|
||||
// Create the hidden <audio> + MediaSource pair INSIDE a user-gesture
|
||||
// call stack. Must complete .play() synchronously before any await.
|
||||
function _setupAudioElementAndMediaSource() {
|
||||
_audioElement = document.createElement('audio');
|
||||
_audioElement.autoplay = true;
|
||||
_audioElement.volume = 1.0;
|
||||
_audioElement.style.display = 'none';
|
||||
document.body.appendChild(_audioElement);
|
||||
_mediaSource = new MediaSource();
|
||||
_mediaSource.addEventListener('sourceopen', _onSourceOpen);
|
||||
_audioElement.src = URL.createObjectURL(_mediaSource);
|
||||
_audioElement.play().then(
|
||||
() => console.log('[MSE] audio.play() ok'),
|
||||
e => console.error('[MSE] audio.play() rejected:', e && e.message)
|
||||
);
|
||||
}
|
||||
|
||||
function _onSourceOpen() {
|
||||
console.log('[MSE] sourceopen, readyState=' + (_mediaSource && _mediaSource.readyState));
|
||||
if (audioFormat && audioFormat.compression === 1) {
|
||||
_addSourceBufferAndInit();
|
||||
}
|
||||
}
|
||||
|
||||
function _addSourceBufferAndInit() {
|
||||
if (!_mediaSource || _mediaSource.readyState !== 'open' || _sourceBuffer) return;
|
||||
const mime = 'audio/webm; codecs="opus"';
|
||||
if (!window.MediaSource || !MediaSource.isTypeSupported(mime)) {
|
||||
console.error('[MSE] ' + mime + ' not supported by this browser');
|
||||
return;
|
||||
}
|
||||
try {
|
||||
_sourceBuffer = _mediaSource.addSourceBuffer(mime);
|
||||
} catch (e) {
|
||||
console.error('[MSE] addSourceBuffer failed:', e && e.message);
|
||||
return;
|
||||
}
|
||||
_sourceBuffer.addEventListener('updateend', () => {
|
||||
_sourceBufferBusy = false;
|
||||
_flushSourceBufferQueue();
|
||||
});
|
||||
_sourceBuffer.addEventListener('error', e => console.error('[MSE] sourceBuffer error', e));
|
||||
// Init segment first
|
||||
_enqueueAppend(WebMMuxer.buildInitSegment(audioFormat.sampleRate, audioFormat.channels));
|
||||
_initSegmentSent = true;
|
||||
_opusTimestampMs = 0;
|
||||
// Flush packets that arrived before SourceBuffer was ready
|
||||
while (_pendingOpusPackets.length > 0) {
|
||||
const pkt = _pendingOpusPackets.shift();
|
||||
_enqueueAppend(WebMMuxer.buildCluster(pkt, _opusTimestampMs));
|
||||
_opusTimestampMs += OPUS_FRAME_MS;
|
||||
}
|
||||
console.log('[MSE] SourceBuffer ready, init segment + ' +
|
||||
(_opusTimestampMs / OPUS_FRAME_MS) + ' queued packets appended');
|
||||
}
|
||||
|
||||
function _enqueueAppend(data) {
|
||||
_sourceBufferQueue.push(data);
|
||||
_flushSourceBufferQueue();
|
||||
}
|
||||
function _flushSourceBufferQueue() {
|
||||
if (!_sourceBuffer || _sourceBufferBusy) return;
|
||||
if (_sourceBufferQueue.length === 0) return;
|
||||
const next = _sourceBufferQueue.shift();
|
||||
_sourceBufferBusy = true;
|
||||
try {
|
||||
_sourceBuffer.appendBuffer(next);
|
||||
} catch (e) {
|
||||
console.error('[MSE] appendBuffer threw:', e && e.message);
|
||||
_sourceBufferBusy = false;
|
||||
}
|
||||
}
|
||||
|
||||
function pushOpusPacket(opusBytes) {
|
||||
if (!audioFormat || audioFormat.compression !== 1) return;
|
||||
if (_sourceBuffer && _initSegmentSent) {
|
||||
_enqueueAppend(WebMMuxer.buildCluster(opusBytes, _opusTimestampMs));
|
||||
_opusTimestampMs += OPUS_FRAME_MS;
|
||||
} else {
|
||||
// Stash until SourceBuffer is ready. Cap at ~3s of audio.
|
||||
const maxQueued = Math.ceil(3000 / OPUS_FRAME_MS);
|
||||
while (_pendingOpusPackets.length >= maxQueued) _pendingOpusPackets.shift();
|
||||
_pendingOpusPackets.push(new Uint8Array(opusBytes));
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the SourceBuffer (so a new format/codec can be set up) but
|
||||
// KEEP the same MediaSource and <audio> element. They hold our
|
||||
// gesture-acquired play() permission — recreating either would
|
||||
// require a fresh user tap on iOS. Never call endOfStream(), that
|
||||
// transitions MediaSource to 'ended' which forbids future
|
||||
// addSourceBuffer().
|
||||
function stopAllAudio() {
|
||||
if (_sourceBuffer && _mediaSource && _mediaSource.readyState === 'open') {
|
||||
try { _mediaSource.removeSourceBuffer(_sourceBuffer); } catch (e) {}
|
||||
}
|
||||
_sourceBuffer = null;
|
||||
_sourceBufferQueue.length = 0;
|
||||
_sourceBufferBusy = false;
|
||||
_initSegmentSent = false;
|
||||
_opusTimestampMs = 0;
|
||||
_pendingOpusPackets.length = 0;
|
||||
}
|
||||
|
||||
function handleAudioFrame(data) {
|
||||
if (!audioEnabled) return;
|
||||
|
||||
const u8 = new Uint8Array(data);
|
||||
if (u8.length < 1) return;
|
||||
|
||||
let offset = 0;
|
||||
const hasFormat = u8[offset++];
|
||||
|
||||
if (hasFormat) {
|
||||
if (u8.length < offset + 12) {
|
||||
console.warn('[Audio] truncated format header');
|
||||
return;
|
||||
}
|
||||
// AudioFormat (12 bytes, commands.h, pack(1))
|
||||
const view = new DataView(data, offset, 12);
|
||||
const channels = view.getUint16(0, true);
|
||||
const sampleRate = view.getUint32(2, true);
|
||||
const bitsPerSample = view.getUint16(6, true);
|
||||
const blockAlign = view.getUint16(8, true);
|
||||
const compression = view.getUint8(10);
|
||||
offset += 12;
|
||||
offset += 1; // padding byte
|
||||
|
||||
if (channels === 0 || channels > 8) { console.error('[Audio] bad channels:', channels); return; }
|
||||
if (sampleRate < 8000 || sampleRate > 48000) { console.error('[Audio] bad sampleRate:', sampleRate); return; }
|
||||
|
||||
const fmt = { compression, channels, sampleRate, bitsPerSample, blockAlign };
|
||||
const needReinit = !audioFormat ||
|
||||
audioFormat.sampleRate !== fmt.sampleRate ||
|
||||
audioFormat.channels !== fmt.channels ||
|
||||
audioFormat.compression !== fmt.compression;
|
||||
audioFormat = fmt;
|
||||
|
||||
if (needReinit) {
|
||||
if (fmt.compression !== 1) {
|
||||
console.error('[Audio] PCM payload not supported by web; set USING_OPUS=1 on client');
|
||||
stopAllAudio();
|
||||
return;
|
||||
}
|
||||
stopAllAudio();
|
||||
if (_mediaSource && _mediaSource.readyState === 'open') {
|
||||
_addSourceBufferAndInit();
|
||||
}
|
||||
// else: sourceopen handler will pick up audioFormat when it fires
|
||||
console.log('[Audio] Format → ch=' + fmt.channels +
|
||||
' sr=' + fmt.sampleRate + ' compression=' + fmt.compression);
|
||||
}
|
||||
}
|
||||
|
||||
if (!audioFormat || audioFormat.compression !== 1) return;
|
||||
if (u8.length <= offset) return;
|
||||
|
||||
// The remaining bytes are one Opus packet (variable length).
|
||||
const opusBytes = new Uint8Array(data, offset);
|
||||
pushOpusPacket(opusBytes);
|
||||
}
|
||||
|
||||
function handleBinaryFrame(data) {
|
||||
// 终端输出帧:4 字节 magic 'TRM1' (0x54 0x52 0x4D 0x31) → 转发到 xterm。
|
||||
// 视频帧首 4 字节是 deviceID (uint32 LE),撞这个具体值的概率极低;4 字节 magic
|
||||
// 比单字节前缀安全得多,无需额外的状态校验。
|
||||
const u8 = new Uint8Array(data);
|
||||
if (u8.length >= 4 &&
|
||||
u8[0] === 0x54 && u8[1] === 0x52 && u8[2] === 0x4D && u8[3] === 0x31) {
|
||||
if (termState && termState.term) termState.term.write(u8.subarray(4));
|
||||
return;
|
||||
}
|
||||
|
||||
// Audio frame: frameType byte at offset 4 indicates audio (96 = TOKEN_SCREEN_AUDIO)
|
||||
// Full frame format: [deviceID:4][frameType:1][dataLen:4][hasFormat:1][AudioFormat?][audio_data...]
|
||||
if (u8.length > 4 && u8[4] === 96) {
|
||||
// Skip frame header (9 bytes) and pass audio payload to handler
|
||||
const audioPayload = data.slice(9);
|
||||
handleAudioFrame(audioPayload);
|
||||
return;
|
||||
}
|
||||
|
||||
// Video frame: [deviceID:4][frameType:1][dataLen:4][videoData...]
|
||||
const view = new DataView(data);
|
||||
const deviceId = view.getUint32(0, true);
|
||||
const frameType = view.getUint8(4);
|
||||
|
||||
Reference in New Issue
Block a user