Feature(audio): forward client PCM to web viewers with continuous playback

This commit is contained in:
yuanyuanxiang
2026-06-02 01:56:10 +02:00
parent da024fb3fb
commit 9aca587654
4 changed files with 505 additions and 5 deletions

View File

@@ -18,6 +18,7 @@
#include <md5.h>
#include <cstdint> // for uint16_t
#include <vector>
#include <mutex> // for std::mutex, std::lock_guard
#include "WebService.h"
// 文件接收消息数据结构
@@ -3494,9 +3495,53 @@ void CScreenSpyDlg::StopAudioPlayback()
#endif
m_nAudioCompression = 0;
// 重置网页端音频格式标志(线程安全的清理)
{
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
m_bAudioFormatSent = FALSE;
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
}
Mprintf("[ScreenSpy] 音频播放已停止\n");
}
void CScreenSpyDlg::DisableAudio()
{
// 复用 IDM_AUDIO_TOGGLE 的逻辑,但仅禁用
if (m_Settings.AudioEnabled) {
m_Settings.AudioEnabled = FALSE;
SendAudioCtrl(CYCLEAUDIO_DISABLE, 1);
StopAudioPlayback();
// 清理网页端格式状态(在 mutex 保护下)
{
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
m_bAudioFormatSent = FALSE;
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
}
Mprintf("[Audio Web] 禁用音频(来自 web 命令)\n");
}
}
void CScreenSpyDlg::EnableAudio()
{
// 复用 IDM_AUDIO_TOGGLE 的逻辑,但仅启用
if (!m_Settings.AudioEnabled) {
m_Settings.AudioEnabled = TRUE;
SendAudioCtrl(CYCLEAUDIO_ENABLE, 1);
// 强制重新发送格式信息(清理缓存)
{
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
m_bAudioFormatSent = FALSE;
memset(&m_AudioFormatWeb, 0, sizeof(m_AudioFormatWeb));
}
Mprintf("[Audio Web] 启用音频(来自 web 命令)\n");
}
}
void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
{
if (len < 1) return;
@@ -3535,12 +3580,20 @@ void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
UINT32 audioLen = len - offset;
if (audioLen == 0) return;
// 保存"上线格式"字节Opus 模式下是原始压缩包PCM 模式下是原始 PCM
// 这就是要透传给 web 的数据 —— web 端用 MSE+WebM 直接播 Opus
// 不需要服务器解码后再发 PCM。本地 waveOut 仍然需要 PCM因此下面
// 还是会解码一遍。
BYTE* pWireData = pAudioData;
UINT32 wireLen = audioLen;
BYTE wireCompression = (BYTE)m_nAudioCompression;
// 帧对齐参数
DWORD blockAlign = m_AudioFormat.nBlockAlign;
if (blockAlign == 0) blockAlign = 4; // 默认 stereo 16-bit
#if USING_OPUS
// Opus 解码
// Opus 解码(仅供本地 waveOut 使用web 仍会收到原始压缩包)
if (m_nAudioCompression == AUDIO_COMPRESS_OPUS && m_pOpusDecoder && m_pOpusDecodeBuffer) {
COpusDecoder* pDecoder = (COpusDecoder*)m_pOpusDecoder;
int decodedSamples = pDecoder->Decode(pAudioData, audioLen, m_pOpusDecodeBuffer, 960 * 2);
@@ -3583,10 +3636,104 @@ void CScreenSpyDlg::OnAudioData(BYTE* pData, UINT32 len)
Mprintf("[Audio] 预缓冲完成,开始播放 (缓冲: %u bytes)\n", m_nRingDataLen);
}
// 发送上线格式Opus 压缩包 / 或原始 PCM到网页
SendAudioToWeb(pWireData, wireLen, &m_AudioFormat, wireCompression);
// 填充可用的 waveOut 缓冲区
FeedAudioBuffers();
}
void CScreenSpyDlg::SendAudioToWeb(const BYTE* pAudioData, UINT32 len, const WAVEFORMATEX* pFormat, BYTE compression)
{
if (!WebService().IsRunning()) return;
if (!pAudioData || len == 0) return;
if (!m_ContextObject) return;
if (!m_Settings.AudioEnabled) return;
std::vector<BYTE> packet;
BOOL formatChanged = FALSE;
{
std::lock_guard<std::mutex> lock(m_AudioWebMutex);
if (!m_bAudioFormatSent) {
formatChanged = TRUE;
} else if (pFormat && (
pFormat->nChannels != m_AudioFormatWeb.channels ||
pFormat->nSamplesPerSec != m_AudioFormatWeb.sampleRate ||
pFormat->wBitsPerSample != m_AudioFormatWeb.bitsPerSample ||
compression != m_AudioFormatWeb.compression)) {
formatChanged = TRUE;
}
// 第1字节是否包含格式信息
packet.push_back(formatChanged ? 1 : 0);
if (formatChanged && pFormat) {
if (pFormat->nChannels < 1 || pFormat->nChannels > 8 ||
pFormat->nSamplesPerSec < 8000 || pFormat->nSamplesPerSec > 48000 ||
pFormat->wBitsPerSample != 16) {
Mprintf("[Audio Web] Invalid format: ch=%d, sr=%d, bps=%d\n",
pFormat->nChannels, pFormat->nSamplesPerSec, pFormat->wBitsPerSample);
return;
}
// 12-byte AudioFormat 结构commands.h, pack(1)
AudioFormat fmt;
fmt.channels = (WORD)pFormat->nChannels;
fmt.sampleRate = (DWORD)pFormat->nSamplesPerSec;
fmt.bitsPerSample = (WORD)pFormat->wBitsPerSample;
// blockAlign 对 Opus 是 informational 的(包是变长压缩),按 PCM 推算填上即可。
fmt.blockAlign = (WORD)(fmt.channels * fmt.bitsPerSample / 8);
fmt.compression = compression;
fmt.reserved = 0;
BYTE* pFmt = (BYTE*)&fmt;
packet.insert(packet.end(), pFmt, pFmt + sizeof(fmt));
// padding byte: 保持后续音频数据落在偶数偏移上PCM 模式下 web 端
// 需要 Int16 对齐Opus 模式无所谓但保留兼容旧 web 解析)
packet.push_back(0);
m_AudioFormatWeb = fmt;
m_bAudioFormatSent = TRUE;
Mprintf("[Audio Web] Format sent: ch=%d, sr=%d Hz, compression=%d\n",
fmt.channels, fmt.sampleRate, fmt.compression);
}
} // 释放 mutex
// 添加音频数据(此操作不需要 mutex因为我们已经复制了所有需要的共享状态
packet.insert(packet.end(), pAudioData, pAudioData + len);
// 构造完整帧:[DeviceID:4][FrameType:1][DataLen:4][audio payload...]
// FrameType: 96 = TOKEN_SCREEN_AUDIO用于在网页端识别音频
std::vector<BYTE> frame;
uint64_t deviceID = GetClientID();
uint32_t audioDataLen = (uint32_t)packet.size();
uint8_t frameType = 96; // TOKEN_SCREEN_AUDIO
// [DeviceID:4] little-endian
frame.push_back((BYTE)(deviceID & 0xFF));
frame.push_back((BYTE)((deviceID >> 8) & 0xFF));
frame.push_back((BYTE)((deviceID >> 16) & 0xFF));
frame.push_back((BYTE)((deviceID >> 24) & 0xFF));
// [FrameType:1]
frame.push_back(frameType);
// [DataLen:4] little-endian
frame.push_back((BYTE)(audioDataLen & 0xFF));
frame.push_back((BYTE)((audioDataLen >> 8) & 0xFF));
frame.push_back((BYTE)((audioDataLen >> 16) & 0xFF));
frame.push_back((BYTE)((audioDataLen >> 24) & 0xFF));
// [audio payload]
frame.insert(frame.end(), packet.begin(), packet.end());
// 广播到所有网页客户端
WebService().BroadcastH264Frame(deviceID, frame.data(), frame.size());
}
void CScreenSpyDlg::FeedAudioBuffers()
{
if (!m_bAudioPlaying || !m_hWaveOut || !m_pRingBuf) return;