Feature(audio): forward client PCM to web viewers with continuous playback
This commit is contained in:
@@ -2609,7 +2609,8 @@ DWORD WINAPI CScreenManager::AudioThreadProc(LPVOID lpParam)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
if (pThis->m_pCaptureClient == nullptr)
|
||||||
|
break;
|
||||||
pThis->m_pCaptureClient->ReleaseBuffer(numFramesAvailable);
|
pThis->m_pCaptureClient->ReleaseBuffer(numFramesAvailable);
|
||||||
|
|
||||||
hr = pThis->m_pCaptureClient->GetNextPacketSize(&packetLength);
|
hr = pThis->m_pCaptureClient->GetNextPacketSize(&packetLength);
|
||||||
|
|||||||
@@ -18,6 +18,7 @@
|
|||||||
#include <md5.h>
|
#include <md5.h>
|
||||||
#include <cstdint> // for uint16_t
|
#include <cstdint> // for uint16_t
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <mutex> // for std::mutex, std::lock_guard
|
||||||
#include "WebService.h"
|
#include "WebService.h"
|
||||||
|
|
||||||
// 文件接收消息数据结构
|
// 文件接收消息数据结构
|
||||||
@@ -3494,9 +3495,53 @@ void CScreenSpyDlg::StopAudioPlayback()
|
|||||||
#endif
|
#endif
|
||||||
m_nAudioCompression = 0;
|
m_nAudioCompression = 0;
|
||||||
|
|
||||||
|
// 重置网页端音频格式标志(线程安全的清理)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
|
||||||
|
m_bAudioFormatSent = FALSE;
|
||||||
|
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
|
||||||
|
}
|
||||||
|
|
||||||
Mprintf("[ScreenSpy] 音频播放已停止\n");
|
Mprintf("[ScreenSpy] 音频播放已停止\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CScreenSpyDlg::DisableAudio()
|
||||||
|
{
|
||||||
|
// 复用 IDM_AUDIO_TOGGLE 的逻辑,但仅禁用
|
||||||
|
if (m_Settings.AudioEnabled) {
|
||||||
|
m_Settings.AudioEnabled = FALSE;
|
||||||
|
SendAudioCtrl(CYCLEAUDIO_DISABLE, 1);
|
||||||
|
StopAudioPlayback();
|
||||||
|
|
||||||
|
// 清理网页端格式状态(在 mutex 保护下)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
|
||||||
|
m_bAudioFormatSent = FALSE;
|
||||||
|
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
|
||||||
|
}
|
||||||
|
|
||||||
|
Mprintf("[Audio Web] 禁用音频(来自 web 命令)\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CScreenSpyDlg::EnableAudio()
|
||||||
|
{
|
||||||
|
// 复用 IDM_AUDIO_TOGGLE 的逻辑,但仅启用
|
||||||
|
if (!m_Settings.AudioEnabled) {
|
||||||
|
m_Settings.AudioEnabled = TRUE;
|
||||||
|
SendAudioCtrl(CYCLEAUDIO_ENABLE, 1);
|
||||||
|
|
||||||
|
// 强制重新发送格式信息(清理缓存)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
|
||||||
|
m_bAudioFormatSent = FALSE;
|
||||||
|
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
|
||||||
|
}
|
||||||
|
|
||||||
|
Mprintf("[Audio Web] 启用音频(来自 web 命令)\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
|
void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
|
||||||
{
|
{
|
||||||
if (len < 1) return;
|
if (len < 1) return;
|
||||||
@@ -3535,12 +3580,20 @@ void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
|
|||||||
UINT32 audioLen = len - offset;
|
UINT32 audioLen = len - offset;
|
||||||
if (audioLen == 0) return;
|
if (audioLen == 0) return;
|
||||||
|
|
||||||
|
// 保存"上线格式"字节(Opus 模式下是原始压缩包,PCM 模式下是原始 PCM)。
|
||||||
|
// 这就是要透传给 web 的数据 —— web 端用 MSE+WebM 直接播 Opus,
|
||||||
|
// 不需要服务器解码后再发 PCM。本地 waveOut 仍然需要 PCM,因此下面
|
||||||
|
// 还是会解码一遍。
|
||||||
|
BYTE* pWireData = pAudioData;
|
||||||
|
UINT32 wireLen = audioLen;
|
||||||
|
BYTE wireCompression = (BYTE)m_nAudioCompression;
|
||||||
|
|
||||||
// 帧对齐参数
|
// 帧对齐参数
|
||||||
DWORD blockAlign = m_AudioFormat.nBlockAlign;
|
DWORD blockAlign = m_AudioFormat.nBlockAlign;
|
||||||
if (blockAlign == 0) blockAlign = 4; // 默认 stereo 16-bit
|
if (blockAlign == 0) blockAlign = 4; // 默认 stereo 16-bit
|
||||||
|
|
||||||
#if USING_OPUS
|
#if USING_OPUS
|
||||||
// Opus 解码
|
// Opus 解码(仅供本地 waveOut 使用;web 仍会收到原始压缩包)
|
||||||
if (m_nAudioCompression == AUDIO_COMPRESS_OPUS && m_pOpusDecoder && m_pOpusDecodeBuffer) {
|
if (m_nAudioCompression == AUDIO_COMPRESS_OPUS && m_pOpusDecoder && m_pOpusDecodeBuffer) {
|
||||||
COpusDecoder* pDecoder = (COpusDecoder*)m_pOpusDecoder;
|
COpusDecoder* pDecoder = (COpusDecoder*)m_pOpusDecoder;
|
||||||
int decodedSamples = pDecoder->Decode(pAudioData, audioLen, m_pOpusDecodeBuffer, 960 * 2);
|
int decodedSamples = pDecoder->Decode(pAudioData, audioLen, m_pOpusDecodeBuffer, 960 * 2);
|
||||||
@@ -3583,10 +3636,104 @@ void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
|
|||||||
Mprintf("[Audio] 预缓冲完成,开始播放 (缓冲: %u bytes)\n", m_nRingDataLen);
|
Mprintf("[Audio] 预缓冲完成,开始播放 (缓冲: %u bytes)\n", m_nRingDataLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 发送上线格式(Opus 压缩包 / 或原始 PCM)到网页
|
||||||
|
SendAudioToWeb(pWireData, wireLen, &m_AudioFormat, wireCompression);
|
||||||
|
|
||||||
// 填充可用的 waveOut 缓冲区
|
// 填充可用的 waveOut 缓冲区
|
||||||
FeedAudioBuffers();
|
FeedAudioBuffers();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CScreenSpyDlg::SendAudioToWeb(const BYTE* pAudioData, UINT32 len, const WAVEFORMATEX* pFormat, BYTE compression)
|
||||||
|
{
|
||||||
|
if (!WebService().IsRunning()) return;
|
||||||
|
if (!pAudioData || len == 0) return;
|
||||||
|
if (!m_ContextObject) return;
|
||||||
|
if (!m_Settings.AudioEnabled) return;
|
||||||
|
|
||||||
|
std::vector<BYTE> packet;
|
||||||
|
BOOL formatChanged = FALSE;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
|
||||||
|
|
||||||
|
if (!m_bAudioFormatSent) {
|
||||||
|
formatChanged = TRUE;
|
||||||
|
} else if (pFormat && (
|
||||||
|
pFormat->nChannels != m_AudioFormatWeb.channels ||
|
||||||
|
pFormat->nSamplesPerSec != m_AudioFormatWeb.sampleRate ||
|
||||||
|
pFormat->wBitsPerSample != m_AudioFormatWeb.bitsPerSample ||
|
||||||
|
compression != m_AudioFormatWeb.compression)) {
|
||||||
|
formatChanged = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 第1字节:是否包含格式信息
|
||||||
|
packet.push_back(formatChanged ? 1 : 0);
|
||||||
|
|
||||||
|
if (formatChanged && pFormat) {
|
||||||
|
if (pFormat->nChannels < 1 || pFormat->nChannels > 8 ||
|
||||||
|
pFormat->nSamplesPerSec < 8000 || pFormat->nSamplesPerSec > 48000 ||
|
||||||
|
pFormat->wBitsPerSample != 16) {
|
||||||
|
Mprintf("[Audio Web] Invalid format: ch=%d, sr=%d, bps=%d\n",
|
||||||
|
pFormat->nChannels, pFormat->nSamplesPerSec, pFormat->wBitsPerSample);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 12-byte AudioFormat 结构(commands.h, pack(1))
|
||||||
|
AudioFormat fmt;
|
||||||
|
fmt.channels = (WORD)pFormat->nChannels;
|
||||||
|
fmt.sampleRate = (DWORD)pFormat->nSamplesPerSec;
|
||||||
|
fmt.bitsPerSample = (WORD)pFormat->wBitsPerSample;
|
||||||
|
// blockAlign 对 Opus 是 informational 的(包是变长压缩),按 PCM 推算填上即可。
|
||||||
|
fmt.blockAlign = (WORD)(fmt.channels * fmt.bitsPerSample / 8);
|
||||||
|
fmt.compression = compression;
|
||||||
|
fmt.reserved = 0;
|
||||||
|
|
||||||
|
BYTE* pFmt = (BYTE*)&fmt;
|
||||||
|
packet.insert(packet.end(), pFmt, pFmt + sizeof(fmt));
|
||||||
|
// padding byte: 保持后续音频数据落在偶数偏移上(PCM 模式下 web 端
|
||||||
|
// 需要 Int16 对齐;Opus 模式无所谓但保留兼容旧 web 解析)
|
||||||
|
packet.push_back(0);
|
||||||
|
|
||||||
|
m_AudioFormatWeb = fmt;
|
||||||
|
m_bAudioFormatSent = TRUE;
|
||||||
|
|
||||||
|
Mprintf("[Audio Web] Format sent: ch=%d, sr=%d Hz, compression=%d\n",
|
||||||
|
fmt.channels, fmt.sampleRate, fmt.compression);
|
||||||
|
}
|
||||||
|
} // 释放 mutex
|
||||||
|
|
||||||
|
// 添加音频数据(此操作不需要 mutex,因为我们已经复制了所有需要的共享状态)
|
||||||
|
packet.insert(packet.end(), pAudioData, pAudioData + len);
|
||||||
|
|
||||||
|
// 构造完整帧:[DeviceID:4][FrameType:1][DataLen:4][audio payload...]
|
||||||
|
// FrameType: 96 = TOKEN_SCREEN_AUDIO,用于在网页端识别音频
|
||||||
|
std::vector<BYTE> frame;
|
||||||
|
|
||||||
|
uint64_t deviceID = GetClientID();
|
||||||
|
uint32_t audioDataLen = (uint32_t)packet.size();
|
||||||
|
uint8_t frameType = 96; // TOKEN_SCREEN_AUDIO
|
||||||
|
|
||||||
|
// [DeviceID:4] little-endian
|
||||||
|
frame.push_back((BYTE)(deviceID & 0xFF));
|
||||||
|
frame.push_back((BYTE)((deviceID >> 8) & 0xFF));
|
||||||
|
frame.push_back((BYTE)((deviceID >> 16) & 0xFF));
|
||||||
|
frame.push_back((BYTE)((deviceID >> 24) & 0xFF));
|
||||||
|
|
||||||
|
// [FrameType:1]
|
||||||
|
frame.push_back(frameType);
|
||||||
|
|
||||||
|
// [DataLen:4] little-endian
|
||||||
|
frame.push_back((BYTE)(audioDataLen & 0xFF));
|
||||||
|
frame.push_back((BYTE)((audioDataLen >> 8) & 0xFF));
|
||||||
|
frame.push_back((BYTE)((audioDataLen >> 16) & 0xFF));
|
||||||
|
frame.push_back((BYTE)((audioDataLen >> 24) & 0xFF));
|
||||||
|
|
||||||
|
// [audio payload]
|
||||||
|
frame.insert(frame.end(), packet.begin(), packet.end());
|
||||||
|
|
||||||
|
// 广播到所有网页客户端
|
||||||
|
WebService().BroadcastH264Frame(deviceID, frame.data(), frame.size());
|
||||||
|
}
|
||||||
|
|
||||||
void CScreenSpyDlg::FeedAudioBuffers()
|
void CScreenSpyDlg::FeedAudioBuffers()
|
||||||
{
|
{
|
||||||
if (!m_bAudioPlaying || !m_hWaveOut || !m_pRingBuf) return;
|
if (!m_bAudioPlaying || !m_hWaveOut || !m_pRingBuf) return;
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
#include "2015RemoteDlg.h"
|
#include "2015RemoteDlg.h"
|
||||||
|
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
|
#include "common/commands.h" // 包含 AudioFormat 定义
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
@@ -349,11 +350,22 @@ public:
|
|||||||
short* m_pOpusDecodeBuffer = nullptr; // Opus 解码输出缓冲区
|
short* m_pOpusDecodeBuffer = nullptr; // Opus 解码输出缓冲区
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// 网页端音频发送状态
|
||||||
|
BOOL m_bAudioFormatSent = FALSE; // 是否已发送格式信息到网页
|
||||||
|
AudioFormat m_AudioFormatWeb = {}; // 上次发送给网页的格式
|
||||||
|
|
||||||
|
// 音频到网页的多线程同步
|
||||||
|
std::mutex m_AudioWebMutex; // 保护音频发送状态的互斥锁
|
||||||
|
// 注意:m_Settings.AudioEnabled 是全局的音频启用/禁用状态
|
||||||
|
|
||||||
void OnAudioData(BYTE* pData, UINT32 len); // 处理音频数据
|
void OnAudioData(BYTE* pData, UINT32 len); // 处理音频数据
|
||||||
BOOL InitAudioPlayback(const AudioFormat* fmt); // 初始化音频播放
|
BOOL InitAudioPlayback(const AudioFormat* fmt); // 初始化音频播放
|
||||||
void StopAudioPlayback(); // 停止音频播放
|
void StopAudioPlayback(); // 停止音频播放
|
||||||
|
void DisableAudio(); // 禁用音频(从网页命令)
|
||||||
|
void EnableAudio(); // 启用音频(从网页命令)
|
||||||
void SendAudioCtrl(BYTE enable, BYTE persist); // 发送音频控制命令
|
void SendAudioCtrl(BYTE enable, BYTE persist); // 发送音频控制命令
|
||||||
void FeedAudioBuffers(); // 填充音频缓冲区
|
void FeedAudioBuffers(); // 填充音频缓冲区
|
||||||
|
void SendAudioToWeb(const BYTE* pAudioData, UINT32 len, const WAVEFORMATEX* pFormat, BYTE compression); // 发送音频到网页 (compression=AudioCompression)
|
||||||
|
|
||||||
int GetClientRTT(); // 获取客户端RTT(ms)
|
int GetClientRTT(); // 获取客户端RTT(ms)
|
||||||
void EvaluateQuality(); // 评估并调整质量
|
void EvaluateQuality(); // 评估并调整质量
|
||||||
|
|||||||
@@ -1283,12 +1283,74 @@
|
|||||||
<script src="/static/xterm.js"></script>
|
<script src="/static/xterm.js"></script>
|
||||||
<script src="/static/xterm-fit.js"></script>
|
<script src="/static/xterm-fit.js"></script>
|
||||||
|
|
||||||
|
<!-- Opus codec for audio decompression -->
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/opus.js@0.5.0/dist/opus.js"></script>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
let ws = null, token = null, decoder = null, devices = [], currentDevice = null;
|
let ws = null, token = null, decoder = null, devices = [], currentDevice = null;
|
||||||
let frameCount = 0, lastFrameTime = 0, fps = 0, pingInterval = null;
|
let frameCount = 0, lastFrameTime = 0, fps = 0, pingInterval = null;
|
||||||
const canvas = document.getElementById('screen-canvas');
|
const canvas = document.getElementById('screen-canvas');
|
||||||
const ctx = canvas.getContext('2d');
|
const ctx = canvas.getContext('2d');
|
||||||
|
|
||||||
|
// ====== Audio & Video Implementation ======
|
||||||
|
//
|
||||||
|
// - Video: H.264 / AV1 → VideoDecoder Web API → canvas
|
||||||
|
// - Audio: client encodes PCM → Opus, server forwards raw Opus packets
|
||||||
|
// to web, web wraps each packet in a WebM SimpleBlock and
|
||||||
|
// feeds it to MediaSource → <audio> element (browser decodes
|
||||||
|
// Opus natively, plays via standard media-element pipeline).
|
||||||
|
//
|
||||||
|
// WS binary frame layout (matches C++ ScreenSpyDlg.cpp):
|
||||||
|
// Video : [deviceID:4][frameType:1][dataLen:4][videoData:N]
|
||||||
|
// Audio : [deviceID:4][frameType=96:1][dataLen:4]
|
||||||
|
// [hasFormat:1][AudioFormat:12][padding:1]?[opusPacket:N]
|
||||||
|
// Term : [magic:4='TRM1'][terminalData:N]
|
||||||
|
//
|
||||||
|
// AudioFormat (12 bytes, commands.h, pack(1)):
|
||||||
|
// channels:2 sampleRate:4 bitsPerSample:2 blockAlign:2
|
||||||
|
// compression:1 (0=PCM unsupported by web, 1=Opus) reserved:1
|
||||||
|
|
||||||
|
// MSE + WebM/Opus playback. Raw Opus packets arrive over WS; we wrap
|
||||||
|
// each one in a minimal WebM container in JS and feed it to a
|
||||||
|
// SourceBuffer attached to a hidden <audio> element. The browser
|
||||||
|
// decodes Opus natively. Tested on desktop Chrome; mobile playback
|
||||||
|
// is a known follow-up (see commit notes).
|
||||||
|
let audioFormat = null; // { compression, channels, sampleRate, bitsPerSample, blockAlign }
|
||||||
|
let audioEnabled = true; // Audio on/off flag (set by UI)
|
||||||
|
let syncDrift = 0; // A/V sync monitoring (milliseconds)
|
||||||
|
let _audioElement = null; // hidden <audio> sink
|
||||||
|
let _mediaSource = null; // MediaSource attached to _audioElement
|
||||||
|
let _sourceBuffer = null; // SourceBuffer (Opus in WebM)
|
||||||
|
const _sourceBufferQueue = []; // appendBuffer queue (one in-flight at a time)
|
||||||
|
let _sourceBufferBusy = false;
|
||||||
|
let _initSegmentSent = false; // first init segment appended for current format
|
||||||
|
let _opusTimestampMs = 0; // running absolute cluster timestamp (ms)
|
||||||
|
const OPUS_FRAME_MS = 20; // 960 samples @ 48k — matches client encoder
|
||||||
|
const _pendingOpusPackets = []; // packets received before SourceBuffer is ready
|
||||||
|
|
||||||
|
// Browser autoplay policies require an HTMLAudioElement to be created
|
||||||
|
// and .play()'d synchronously inside a user-gesture event handler.
|
||||||
|
// We hook the first click/keydown to spin up the element + MediaSource.
|
||||||
|
// Subsequent activity (e.g. tab regaining focus) re-issues play().
|
||||||
|
function installAudioGestureUnlock() {
|
||||||
|
const onGesture = () => {
|
||||||
|
if (!_audioElement) {
|
||||||
|
try {
|
||||||
|
_setupAudioElementAndMediaSource();
|
||||||
|
console.log('[MSE] <audio> + MediaSource set up by gesture');
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[MSE] setup failed:', e && e.message);
|
||||||
|
}
|
||||||
|
} else if (_audioElement.paused) {
|
||||||
|
_audioElement.play().catch(() => {});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const opts = { passive: true, capture: true };
|
||||||
|
window.addEventListener('click', onGesture, opts);
|
||||||
|
window.addEventListener('keydown', onGesture, opts);
|
||||||
|
}
|
||||||
|
installAudioGestureUnlock();
|
||||||
|
|
||||||
// Pagination and filter state
|
// Pagination and filter state
|
||||||
let currentPage = 1;
|
let currentPage = 1;
|
||||||
let viewMode = 'grid'; // 'grid' or 'list'
|
let viewMode = 'grid'; // 'grid' or 'list'
|
||||||
@@ -1409,7 +1471,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
ws.onclose = () => { stopPingInterval(); updateWsStatus('disconnected'); scheduleReconnect(); };
|
ws.onclose = () => { stopPingInterval(); updateWsStatus('disconnected'); stopAllAudio(); audioFormat = null; scheduleReconnect(); };
|
||||||
ws.onerror = (e) => console.error('WS error:', e);
|
ws.onerror = (e) => console.error('WS error:', e);
|
||||||
ws.onmessage = (event) => {
|
ws.onmessage = (event) => {
|
||||||
if (typeof event.data === 'string') handleSignaling(JSON.parse(event.data));
|
if (typeof event.data === 'string') handleSignaling(JSON.parse(event.data));
|
||||||
@@ -1649,16 +1711,294 @@
|
|||||||
return videoBytes[0] === 0x00 ? 'avc' : 'av1';
|
return videoBytes[0] === 0x00 ? 'avc' : 'av1';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// Minimal WebM-Opus muxer: wraps each Opus packet in a one-block
|
||||||
|
// Cluster so it can be fed to a SourceBuffer of type
|
||||||
|
// 'audio/webm; codecs="opus"'. The init segment (EBML header +
|
||||||
|
// Segment header + Tracks with OpusHead) is built once when the
|
||||||
|
// format is known and appended before any media clusters.
|
||||||
|
// ============================================================
|
||||||
|
const WebMMuxer = (function () {
|
||||||
|
// Variable-length integer (EBML VINT). Marker bit selects byte count.
|
||||||
|
function vint(value) {
|
||||||
|
if (value < 0x7F) return [0x80 | value];
|
||||||
|
if (value < 0x3FFF) return [0x40 | (value >> 8), value & 0xFF];
|
||||||
|
if (value < 0x1FFFFF) return [0x20 | (value >> 16), (value >> 8) & 0xFF, value & 0xFF];
|
||||||
|
if (value < 0x0FFFFFFF) return [0x10 | (value >> 24), (value >> 16) & 0xFF, (value >> 8) & 0xFF, value & 0xFF];
|
||||||
|
// 8-byte VINT for larger values (we don't usually need this)
|
||||||
|
const out = [0x01];
|
||||||
|
for (let i = 6; i >= 0; i--) out.push(Math.floor(value / Math.pow(2, i * 8)) & 0xFF);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
// Unsigned int big-endian, n bytes
|
||||||
|
function uintBE(value, n) {
|
||||||
|
const out = new Array(n);
|
||||||
|
for (let i = n - 1; i >= 0; i--) { out[i] = value & 0xFF; value = Math.floor(value / 256); }
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
// 64-bit float big-endian
|
||||||
|
function f64BE(value) {
|
||||||
|
const buf = new ArrayBuffer(8);
|
||||||
|
new DataView(buf).setFloat64(0, value, false);
|
||||||
|
return Array.from(new Uint8Array(buf));
|
||||||
|
}
|
||||||
|
// EBML element = ID + size(VINT) + payload
|
||||||
|
function elem(idBytes, payload) {
|
||||||
|
const sz = vint(payload.length);
|
||||||
|
const out = new Array(idBytes.length + sz.length + payload.length);
|
||||||
|
let i = 0;
|
||||||
|
for (const b of idBytes) out[i++] = b;
|
||||||
|
for (const b of sz) out[i++] = b;
|
||||||
|
for (const b of payload) out[i++] = b;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
// OpusHead codec-private structure (19 bytes). Per WebM/Opus spec,
|
||||||
|
// the authoritative encoder delay is CodecDelay (in ns) in the
|
||||||
|
// TrackEntry; pre-skip here is left at 0 to avoid double-skipping.
|
||||||
|
function opusHead(sampleRate, channels) {
|
||||||
|
return [
|
||||||
|
0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64, // "OpusHead"
|
||||||
|
0x01, // version
|
||||||
|
channels & 0xFF, // channel count
|
||||||
|
0x00, 0x00, // pre-skip (use CodecDelay instead)
|
||||||
|
sampleRate & 0xFF, (sampleRate >> 8) & 0xFF,
|
||||||
|
(sampleRate >> 16) & 0xFF, (sampleRate >> 24) & 0xFF,
|
||||||
|
0x00, 0x00, // output gain (LE)
|
||||||
|
0x00 // channel mapping family
|
||||||
|
];
|
||||||
|
}
|
||||||
|
function buildInitSegment(sampleRate, channels) {
|
||||||
|
const ebml = elem([0x1A, 0x45, 0xDF, 0xA3], [].concat(
|
||||||
|
elem([0x42, 0x86], [0x01]), // EBMLVersion
|
||||||
|
elem([0x42, 0xF7], [0x01]), // EBMLReadVersion
|
||||||
|
elem([0x42, 0xF2], [0x04]), // EBMLMaxIDLength
|
||||||
|
elem([0x42, 0xF3], [0x08]), // EBMLMaxSizeLength
|
||||||
|
elem([0x42, 0x82], [0x77, 0x65, 0x62, 0x6D]), // DocType "webm"
|
||||||
|
elem([0x42, 0x87], [0x04]), // DocTypeVersion
|
||||||
|
elem([0x42, 0x85], [0x02]) // DocTypeReadVersion
|
||||||
|
));
|
||||||
|
const info = elem([0x15, 0x49, 0xA9, 0x66], [].concat(
|
||||||
|
elem([0x2A, 0xD7, 0xB1], uintBE(1000000, 3)), // TimecodeScale 1ms
|
||||||
|
elem([0x4D, 0x80], [0x59, 0x61, 0x6D, 0x61]), // MuxingApp "Yama"
|
||||||
|
elem([0x57, 0x41], [0x59, 0x61, 0x6D, 0x61]) // WritingApp "Yama"
|
||||||
|
));
|
||||||
|
const trackEntry = [].concat(
|
||||||
|
elem([0xD7], [0x01]), // TrackNumber 1
|
||||||
|
elem([0x73, 0xC5], uintBE(1, 1)), // TrackUID 1
|
||||||
|
elem([0x83], [0x02]), // TrackType 2 (audio)
|
||||||
|
elem([0xB9], [0x01]), // FlagEnabled
|
||||||
|
elem([0x88], [0x01]), // FlagDefault
|
||||||
|
elem([0x9C], [0x00]), // FlagLacing 0
|
||||||
|
elem([0x86], [0x41, 0x5F, 0x4F, 0x50, 0x55, 0x53]), // CodecID "A_OPUS"
|
||||||
|
elem([0x63, 0xA2], opusHead(sampleRate, channels)), // CodecPrivate
|
||||||
|
elem([0x56, 0xAA], uintBE(6500000, 3)), // CodecDelay 6.5ms (ns)
|
||||||
|
elem([0x56, 0xBB], uintBE(80000000, 4)), // SeekPreRoll 80ms (ns)
|
||||||
|
elem([0xE1], [].concat( // Audio
|
||||||
|
elem([0xB5], f64BE(sampleRate)), // SamplingFrequency
|
||||||
|
elem([0x9F], [channels & 0xFF]) // Channels
|
||||||
|
))
|
||||||
|
);
|
||||||
|
const tracks = elem([0x16, 0x54, 0xAE, 0x6B], elem([0xAE], trackEntry));
|
||||||
|
// Segment uses unknown-size signal so we can stream clusters indefinitely
|
||||||
|
const segmentOpen = [0x18, 0x53, 0x80, 0x67,
|
||||||
|
0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
|
||||||
|
return new Uint8Array([].concat(ebml, segmentOpen, info, tracks));
|
||||||
|
}
|
||||||
|
function buildCluster(opusBytes, absMs) {
|
||||||
|
const simpleBlock = elem([0xA3], [].concat(
|
||||||
|
[0x81, 0x00, 0x00, 0x80], // TrackNumber=1, ts=0, flags=keyframe
|
||||||
|
Array.from(opusBytes)
|
||||||
|
));
|
||||||
|
const cluster = elem([0x1F, 0x43, 0xB6, 0x75], [].concat(
|
||||||
|
elem([0xE7], uintBE(absMs, 4)), // Timestamp (absolute, ms)
|
||||||
|
simpleBlock
|
||||||
|
));
|
||||||
|
return new Uint8Array(cluster);
|
||||||
|
}
|
||||||
|
return { buildInitSegment, buildCluster };
|
||||||
|
})();
|
||||||
|
|
||||||
|
// Create the hidden <audio> + MediaSource pair INSIDE a user-gesture
|
||||||
|
// call stack. Must complete .play() synchronously before any await.
|
||||||
|
function _setupAudioElementAndMediaSource() {
|
||||||
|
_audioElement = document.createElement('audio');
|
||||||
|
_audioElement.autoplay = true;
|
||||||
|
_audioElement.volume = 1.0;
|
||||||
|
_audioElement.style.display = 'none';
|
||||||
|
document.body.appendChild(_audioElement);
|
||||||
|
_mediaSource = new MediaSource();
|
||||||
|
_mediaSource.addEventListener('sourceopen', _onSourceOpen);
|
||||||
|
_audioElement.src = URL.createObjectURL(_mediaSource);
|
||||||
|
_audioElement.play().then(
|
||||||
|
() => console.log('[MSE] audio.play() ok'),
|
||||||
|
e => console.error('[MSE] audio.play() rejected:', e && e.message)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function _onSourceOpen() {
|
||||||
|
console.log('[MSE] sourceopen, readyState=' + (_mediaSource && _mediaSource.readyState));
|
||||||
|
if (audioFormat && audioFormat.compression === 1) {
|
||||||
|
_addSourceBufferAndInit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function _addSourceBufferAndInit() {
|
||||||
|
if (!_mediaSource || _mediaSource.readyState !== 'open' || _sourceBuffer) return;
|
||||||
|
const mime = 'audio/webm; codecs="opus"';
|
||||||
|
if (!window.MediaSource || !MediaSource.isTypeSupported(mime)) {
|
||||||
|
console.error('[MSE] ' + mime + ' not supported by this browser');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
_sourceBuffer = _mediaSource.addSourceBuffer(mime);
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[MSE] addSourceBuffer failed:', e && e.message);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
_sourceBuffer.addEventListener('updateend', () => {
|
||||||
|
_sourceBufferBusy = false;
|
||||||
|
_flushSourceBufferQueue();
|
||||||
|
});
|
||||||
|
_sourceBuffer.addEventListener('error', e => console.error('[MSE] sourceBuffer error', e));
|
||||||
|
// Init segment first
|
||||||
|
_enqueueAppend(WebMMuxer.buildInitSegment(audioFormat.sampleRate, audioFormat.channels));
|
||||||
|
_initSegmentSent = true;
|
||||||
|
_opusTimestampMs = 0;
|
||||||
|
// Flush packets that arrived before SourceBuffer was ready
|
||||||
|
while (_pendingOpusPackets.length > 0) {
|
||||||
|
const pkt = _pendingOpusPackets.shift();
|
||||||
|
_enqueueAppend(WebMMuxer.buildCluster(pkt, _opusTimestampMs));
|
||||||
|
_opusTimestampMs += OPUS_FRAME_MS;
|
||||||
|
}
|
||||||
|
console.log('[MSE] SourceBuffer ready, init segment + ' +
|
||||||
|
(_opusTimestampMs / OPUS_FRAME_MS) + ' queued packets appended');
|
||||||
|
}
|
||||||
|
|
||||||
|
function _enqueueAppend(data) {
|
||||||
|
_sourceBufferQueue.push(data);
|
||||||
|
_flushSourceBufferQueue();
|
||||||
|
}
|
||||||
|
function _flushSourceBufferQueue() {
|
||||||
|
if (!_sourceBuffer || _sourceBufferBusy) return;
|
||||||
|
if (_sourceBufferQueue.length === 0) return;
|
||||||
|
const next = _sourceBufferQueue.shift();
|
||||||
|
_sourceBufferBusy = true;
|
||||||
|
try {
|
||||||
|
_sourceBuffer.appendBuffer(next);
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[MSE] appendBuffer threw:', e && e.message);
|
||||||
|
_sourceBufferBusy = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function pushOpusPacket(opusBytes) {
|
||||||
|
if (!audioFormat || audioFormat.compression !== 1) return;
|
||||||
|
if (_sourceBuffer && _initSegmentSent) {
|
||||||
|
_enqueueAppend(WebMMuxer.buildCluster(opusBytes, _opusTimestampMs));
|
||||||
|
_opusTimestampMs += OPUS_FRAME_MS;
|
||||||
|
} else {
|
||||||
|
// Stash until SourceBuffer is ready. Cap at ~3s of audio.
|
||||||
|
const maxQueued = Math.ceil(3000 / OPUS_FRAME_MS);
|
||||||
|
while (_pendingOpusPackets.length >= maxQueued) _pendingOpusPackets.shift();
|
||||||
|
_pendingOpusPackets.push(new Uint8Array(opusBytes));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove the SourceBuffer (so a new format/codec can be set up) but
|
||||||
|
// KEEP the same MediaSource and <audio> element. They hold our
|
||||||
|
// gesture-acquired play() permission — recreating either would
|
||||||
|
// require a fresh user tap on iOS. Never call endOfStream(), that
|
||||||
|
// transitions MediaSource to 'ended' which forbids future
|
||||||
|
// addSourceBuffer().
|
||||||
|
function stopAllAudio() {
|
||||||
|
if (_sourceBuffer && _mediaSource && _mediaSource.readyState === 'open') {
|
||||||
|
try { _mediaSource.removeSourceBuffer(_sourceBuffer); } catch (e) {}
|
||||||
|
}
|
||||||
|
_sourceBuffer = null;
|
||||||
|
_sourceBufferQueue.length = 0;
|
||||||
|
_sourceBufferBusy = false;
|
||||||
|
_initSegmentSent = false;
|
||||||
|
_opusTimestampMs = 0;
|
||||||
|
_pendingOpusPackets.length = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleAudioFrame(data) {
|
||||||
|
if (!audioEnabled) return;
|
||||||
|
|
||||||
|
const u8 = new Uint8Array(data);
|
||||||
|
if (u8.length < 1) return;
|
||||||
|
|
||||||
|
let offset = 0;
|
||||||
|
const hasFormat = u8[offset++];
|
||||||
|
|
||||||
|
if (hasFormat) {
|
||||||
|
if (u8.length < offset + 12) {
|
||||||
|
console.warn('[Audio] truncated format header');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// AudioFormat (12 bytes, commands.h, pack(1))
|
||||||
|
const view = new DataView(data, offset, 12);
|
||||||
|
const channels = view.getUint16(0, true);
|
||||||
|
const sampleRate = view.getUint32(2, true);
|
||||||
|
const bitsPerSample = view.getUint16(6, true);
|
||||||
|
const blockAlign = view.getUint16(8, true);
|
||||||
|
const compression = view.getUint8(10);
|
||||||
|
offset += 12;
|
||||||
|
offset += 1; // padding byte
|
||||||
|
|
||||||
|
if (channels === 0 || channels > 8) { console.error('[Audio] bad channels:', channels); return; }
|
||||||
|
if (sampleRate < 8000 || sampleRate > 48000) { console.error('[Audio] bad sampleRate:', sampleRate); return; }
|
||||||
|
|
||||||
|
const fmt = { compression, channels, sampleRate, bitsPerSample, blockAlign };
|
||||||
|
const needReinit = !audioFormat ||
|
||||||
|
audioFormat.sampleRate !== fmt.sampleRate ||
|
||||||
|
audioFormat.channels !== fmt.channels ||
|
||||||
|
audioFormat.compression !== fmt.compression;
|
||||||
|
audioFormat = fmt;
|
||||||
|
|
||||||
|
if (needReinit) {
|
||||||
|
if (fmt.compression !== 1) {
|
||||||
|
console.error('[Audio] PCM payload not supported by web; set USING_OPUS=1 on client');
|
||||||
|
stopAllAudio();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
stopAllAudio();
|
||||||
|
if (_mediaSource && _mediaSource.readyState === 'open') {
|
||||||
|
_addSourceBufferAndInit();
|
||||||
|
}
|
||||||
|
// else: sourceopen handler will pick up audioFormat when it fires
|
||||||
|
console.log('[Audio] Format → ch=' + fmt.channels +
|
||||||
|
' sr=' + fmt.sampleRate + ' compression=' + fmt.compression);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!audioFormat || audioFormat.compression !== 1) return;
|
||||||
|
if (u8.length <= offset) return;
|
||||||
|
|
||||||
|
// The remaining bytes are one Opus packet (variable length).
|
||||||
|
const opusBytes = new Uint8Array(data, offset);
|
||||||
|
pushOpusPacket(opusBytes);
|
||||||
|
}
|
||||||
|
|
||||||
function handleBinaryFrame(data) {
|
function handleBinaryFrame(data) {
|
||||||
// 终端输出帧:4 字节 magic 'TRM1' (0x54 0x52 0x4D 0x31) → 转发到 xterm。
|
// 终端输出帧:4 字节 magic 'TRM1' (0x54 0x52 0x4D 0x31) → 转发到 xterm。
|
||||||
// 视频帧首 4 字节是 deviceID (uint32 LE),撞这个具体值的概率极低;4 字节 magic
|
|
||||||
// 比单字节前缀安全得多,无需额外的状态校验。
|
|
||||||
const u8 = new Uint8Array(data);
|
const u8 = new Uint8Array(data);
|
||||||
if (u8.length >= 4 &&
|
if (u8.length >= 4 &&
|
||||||
u8[0] === 0x54 && u8[1] === 0x52 && u8[2] === 0x4D && u8[3] === 0x31) {
|
u8[0] === 0x54 && u8[1] === 0x52 && u8[2] === 0x4D && u8[3] === 0x31) {
|
||||||
if (termState && termState.term) termState.term.write(u8.subarray(4));
|
if (termState && termState.term) termState.term.write(u8.subarray(4));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Audio frame: frameType byte at offset 4 indicates audio (96 = TOKEN_SCREEN_AUDIO)
|
||||||
|
// Full frame format: [deviceID:4][frameType:1][dataLen:4][hasFormat:1][AudioFormat?][audio_data...]
|
||||||
|
if (u8.length > 4 && u8[4] === 96) {
|
||||||
|
// Skip frame header (9 bytes) and pass audio payload to handler
|
||||||
|
const audioPayload = data.slice(9);
|
||||||
|
handleAudioFrame(audioPayload);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Video frame: [deviceID:4][frameType:1][dataLen:4][videoData...]
|
||||||
const view = new DataView(data);
|
const view = new DataView(data);
|
||||||
const deviceId = view.getUint32(0, true);
|
const deviceId = view.getUint32(0, true);
|
||||||
const frameType = view.getUint8(4);
|
const frameType = view.getUint8(4);
|
||||||
|
|||||||
Reference in New Issue
Block a user