Feature: Implement H.264 and AV1 hardware encoding for remote control

Remark: Need to update FFmpeg static libraries to take effort
This commit is contained in:
yuanyuanxiang
2026-05-28 11:41:33 +02:00
parent d1aa7a2c02
commit 8c7f612449
30 changed files with 2113 additions and 68 deletions

View File

@@ -43,6 +43,66 @@ IMPLEMENT_DYNAMIC(CScreenSpyDlg, CDialog)
#define TIMER_ID 132
// H.264 Annex B keyframe 探测:扫描 start code (00 00 01 / 00 00 00 01)
// 取后续 NAL header low 5 bits命中 5 (IDR) / 7 (SPS) / 8 (PPS) 即认定为关键帧。
static bool IsH264Keyframe(const uint8_t* data, size_t len)
{
for (size_t i = 0; i + 4 < len; ++i) {
size_t nalOffset = 0;
if (data[i] == 0 && data[i+1] == 0 && data[i+2] == 0 && data[i+3] == 1) {
nalOffset = i + 4;
} else if (data[i] == 0 && data[i+1] == 0 && data[i+2] == 1) {
nalOffset = i + 3;
} else {
continue;
}
if (nalOffset >= len) continue;
uint8_t nalType = data[nalOffset] & 0x1F;
if (nalType == 5 || nalType == 7 || nalType == 8) return true;
}
return false;
}
// AV1 OBU keyframe 探测:扫描 OBU 链,遇到 OBU_SEQUENCE_HEADER (type 1) 即认定为关键帧。
// FFmpeg AV1 编码器在每个 IDR 前必定插入 SEQ HDR因此该判定与 H.264 NAL 5/7/8 语义对齐。
static bool IsAv1Keyframe(const uint8_t* data, size_t len)
{
size_t pos = 0;
while (pos < len) {
uint8_t hdr = data[pos];
uint8_t obu_type = (hdr >> 3) & 0x0F;
bool has_ext = (hdr & 0x04) != 0;
bool has_size = (hdr & 0x02) != 0;
if (obu_type == 1 /*OBU_SEQUENCE_HEADER*/) return true;
pos++;
if (has_ext) {
if (pos >= len) return false;
pos++;
}
if (!has_size) return false; // 无 size 字段OBU 占满到包尾,无法继续解析
// LEB128 size
uint64_t sz = 0;
for (int i = 0; i < 8; ++i) {
if (pos >= len) return false;
uint8_t b = data[pos++];
sz |= (uint64_t)(b & 0x7F) << (7 * i);
if ((b & 0x80) == 0) break;
}
if (pos + sz > len) return false;
pos += (size_t)sz;
}
return false;
}
// 首字节嗅探H.264 Annex B 首字节恒为 0x00起始码AV1 OBU header 首字节
// bit7=0、bits[3:6]=obu_type 1-15典型值 0x08-0x78绝不为 0x00。
// 一字节即可干净区分两套码流,无需协议字段或编码端协商。
static bool IsAnyKeyframe(const uint8_t* data, size_t len)
{
if (len == 0) return false;
return data[0] == 0x00 ? IsH264Keyframe(data, len) : IsAv1Keyframe(data, len);
}
// 静态成员变量定义
int CScreenSpyDlg::s_nFastStretch = -1; // -1 表示未初始化
@@ -675,6 +735,12 @@ BOOL CScreenSpyDlg::OnInitDialog()
// 音频菜单项
SysMenu->AppendMenuL(MF_STRING, IDM_AUDIO_TOGGLE, "系统音频(&U)");
SysMenu->CheckMenuItem(IDM_AUDIO_TOGGLE, m_Settings.AudioEnabled ? MF_CHECKED : MF_UNCHECKED);
SysMenu->AppendMenuL(MF_STRING, IDM_ENABLE_H264_HARD, "启用 H264 硬编码");
SysMenu->CheckMenuItem(IDM_ENABLE_H264_HARD, m_Settings.EncodeLevel == LEVEL_H264_HARD ? MF_CHECKED : MF_UNCHECKED);
SysMenu->EnableMenuItem(IDM_ENABLE_H264_HARD, m_Settings.EncodeLevel == LEVEL_AV1_HARD ? MF_GRAYED : MF_ENABLED);
SysMenu->AppendMenuL(MF_STRING, IDM_ENABLE_AV1_HARD, "启用 AV1 硬编码");
SysMenu->CheckMenuItem(IDM_ENABLE_AV1_HARD, m_Settings.EncodeLevel == LEVEL_AV1_HARD ? MF_CHECKED : MF_UNCHECKED);
SysMenu->EnableMenuItem(IDM_ENABLE_AV1_HARD, m_Settings.EncodeLevel == LEVEL_H264_HARD ? MF_GRAYED : MF_ENABLED);
// 初始化勾选状态
UpdateQualityMenuCheck(SysMenu);
@@ -1410,27 +1476,11 @@ VOID CScreenSpyDlg::DrawNextScreenDiff(bool keyFrame)
bChange = TRUE;
}
}
// Broadcast H264 frame to web clients (only for Web session dialogs)
// Format: [DeviceID:4][FrameType:1][DataLen:4][H264Data:N]
// Broadcast video frame to web clients (only for Web session dialogs)
// Format: [DeviceID:4][FrameType:1][DataLen:4][VideoData:N]
// 浏览器侧按首字节嗅探区分 H.264 / AV1因此 packet 内不需要 codec 字段。
if (m_bIsWebSession && NextScreenLength > 0 && WebService().IsRunning()) {
// Detect H264 keyframe by checking NAL unit type
// NAL type 5 = IDR slice (keyframe), NAL type 7 = SPS, NAL type 8 = PPS
bool isKeyFrame = false;
LPBYTE h264Data = (LPBYTE)NextScreenData;
for (ULONG i = 0; i + 4 < NextScreenLength; i++) {
// Look for start code: 0x00 0x00 0x00 0x01 or 0x00 0x00 0x01
if ((h264Data[i] == 0 && h264Data[i+1] == 0 && h264Data[i+2] == 0 && h264Data[i+3] == 1) ||
(h264Data[i] == 0 && h264Data[i+1] == 0 && h264Data[i+2] == 1)) {
int nalOffset = (h264Data[i+2] == 1) ? i + 3 : i + 4;
if (nalOffset < (int)NextScreenLength) {
int nalType = h264Data[nalOffset] & 0x1F;
if (nalType == 5 || nalType == 7 || nalType == 8) {
isKeyFrame = true;
break;
}
}
}
}
bool isKeyFrame = IsAnyKeyframe((const uint8_t*)NextScreenData, NextScreenLength);
std::vector<uint8_t> packet(4 + 1 + 4 + NextScreenLength);
uint32_t deviceIdLow = (uint32_t)(m_ClientID & 0xFFFFFFFF);
@@ -2134,6 +2184,26 @@ void CScreenSpyDlg::OnSysCommand(UINT nID, LPARAM lParam)
}
break;
}
case IDM_ENABLE_H264_HARD: {
m_Settings.EncodeLevel = m_Settings.EncodeLevel ? LEVEL_H264_SOFT : LEVEL_H264_HARD;
SysMenu->CheckMenuItem(IDM_ENABLE_H264_HARD, m_Settings.EncodeLevel == LEVEL_H264_HARD ? MF_CHECKED : MF_UNCHECKED);
SysMenu->CheckMenuItem(IDM_ENABLE_AV1_HARD, m_Settings.EncodeLevel == LEVEL_AV1_HARD ? MF_CHECKED : MF_UNCHECKED);
SysMenu->EnableMenuItem(IDM_ENABLE_H264_HARD, m_Settings.EncodeLevel == LEVEL_AV1_HARD ? MF_GRAYED : MF_ENABLED);
SysMenu->EnableMenuItem(IDM_ENABLE_AV1_HARD, m_Settings.EncodeLevel == LEVEL_H264_HARD ? MF_GRAYED : MF_ENABLED);
BYTE bToken[] = {COMMAND_ENCODE_LEVEL, m_Settings.EncodeLevel };
m_ContextObject->Send2Client(bToken, sizeof(bToken));
break;
}
case IDM_ENABLE_AV1_HARD: {
m_Settings.EncodeLevel = m_Settings.EncodeLevel ? LEVEL_H264_SOFT : LEVEL_AV1_HARD;
SysMenu->CheckMenuItem(IDM_ENABLE_H264_HARD, m_Settings.EncodeLevel == LEVEL_H264_HARD ? MF_CHECKED : MF_UNCHECKED);
SysMenu->CheckMenuItem(IDM_ENABLE_AV1_HARD, m_Settings.EncodeLevel == LEVEL_AV1_HARD ? MF_CHECKED : MF_UNCHECKED);
SysMenu->EnableMenuItem(IDM_ENABLE_H264_HARD, m_Settings.EncodeLevel == LEVEL_AV1_HARD ? MF_GRAYED : MF_ENABLED);
SysMenu->EnableMenuItem(IDM_ENABLE_AV1_HARD, m_Settings.EncodeLevel == LEVEL_H264_HARD ? MF_GRAYED : MF_ENABLED);
BYTE bToken[] = { COMMAND_ENCODE_LEVEL, m_Settings.EncodeLevel };
m_ContextObject->Send2Client(bToken, sizeof(bToken));
break;
}
}
__super::OnSysCommand(nID, lParam);

View File

@@ -135,6 +135,8 @@ enum {
IDM_RESTORE_CONSOLE, // RDP会话归位
IDM_RESET_VIRTUAL_DESKTOP, // 重置虚拟桌面
IDM_AUDIO_TOGGLE, // 音频开关
IDM_ENABLE_H264_HARD,
IDM_ENABLE_AV1_HARD,
};
// 状态信息窗口 - 全屏时显示帧率/速度/质量

View File

@@ -1892,3 +1892,5 @@ FRPC Զ
不支持的位深度需要24位或32位=Bitmap depth is unsupported
未安装x264编解码器 \n下载地址https://sourceforge.net/projects/x264vfw=x264 Encoder is required \nDownload viahttps://sourceforge.net/projects/x264vfw
创建AVI文件失败=Create AVI file failed
启用 H264 硬编码=Enable HW H264 Encoding
启用 AV1 硬编码=Enable HW AV1 Encoding

View File

@@ -1883,3 +1883,5 @@ FRPC Զ
不支持的位深度需要24位或32位=不支持的位深度需要24位或32位
未安装x264编解码器 \n下载地址https://sourceforge.net/projects/x264vfw=未安装x264编解码器 \n下载地址https://sourceforge.net/projects/x264vfw
创建AVI文件失败=创建AVI文件失败
启用 H264 硬编码=启用 H264 硬编码
启用 AV1 硬编码=启用 AV1 硬编码

View File

@@ -290,11 +290,13 @@ func (h *MyHandler) handleBitmapInfo(ctx *connection.Context, data []byte) {
// handleScreenFrame relays one TOKEN_FIRSTSCREEN / TOKEN_NEXTSCREEN packet
// to all browsers watching this device. The on-the-wire packet starts with
// the token byte then a small fixed header (algorithm, cursor pos, cursor
// index) before the H.264 NAL payload. The browser-facing WS packet uses
// the C++-compatible layout: [deviceID:4 LE][frameType:1][dataLen:4 LE][H264:N].
// index) before the video payload (H.264 Annex B or AV1 OBU). The browser-
// facing WS packet uses the C++-compatible layout:
// [deviceID:4 LE][frameType:1][dataLen:4 LE][Video:N].
//
// alwaysKey=true is used for TOKEN_FIRSTSCREEN (always IDR by construction);
// TOKEN_NEXTSCREEN is keyframe iff the NAL stream contains a 5/7/8 unit.
// TOKEN_NEXTSCREEN keyframe detection is delegated to protocol.IsAnyKeyframe
// which sniffs the codec from the first byte (0x00 → H.264, else AV1).
func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwaysKey bool) {
deviceID := h.hub.ScreenDeviceID(ctx)
if deviceID == "" {
@@ -310,8 +312,10 @@ func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwa
// browser sees cursor updates even if we end up dropping frames later.
h.hub.PublishCursor(deviceID, data[10])
h264 := data[skip:]
isKey := alwaysKey || protocol.IsH264Keyframe(h264)
video := data[skip:]
// 按首字节嗅探 H.264 / AV1分发到对应的 keyframe 探测器。浏览器侧用同样方式
// 决定 VideoDecoder codec string因此 server 不必感知客户端实际编码器。
isKey := alwaysKey || protocol.IsAnyKeyframe(video)
// Build the WS packet exactly as the C++ ScreenSpyDlg does — the front-end
// decoder reads these offsets directly.
@@ -321,13 +325,13 @@ func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwa
if isKey {
frameType = 1
}
dataLen := uint32(len(h264))
dataLen := uint32(len(video))
packet := make([]byte, 9+len(h264))
packet := make([]byte, 9+len(video))
binary.LittleEndian.PutUint32(packet[0:4], idLow)
packet[4] = frameType
binary.LittleEndian.PutUint32(packet[5:9], dataLen)
copy(packet[9:], h264)
copy(packet[9:], video)
h.hub.PublishScreenFrame(deviceID, packet, isKey)
}

View File

@@ -350,6 +350,69 @@ func IsH264Keyframe(data []byte) bool {
return false
}
// IsAnyKeyframe sniffs the codec from the first byte then dispatches to the
// matching keyframe detector. H.264 Annex B always starts with 0x00 (start
// code prefix); AV1 OBU headers have bit7=0 and bits[3:6]=obu_type in [1,15]
// so the first byte is in [0x08,0x78] and never 0x00. Lets the server stay
// codec-agnostic so the browser can run H.264 and AV1 sessions side by side.
func IsAnyKeyframe(data []byte) bool {
if len(data) == 0 {
return false
}
if data[0] == 0x00 {
return IsH264Keyframe(data)
}
return IsAv1Keyframe(data)
}
// IsAv1Keyframe walks the OBU chain and returns true on the first
// OBU_SEQUENCE_HEADER (type 1). FFmpeg's AV1 encoders prepend SEQ HDR to
// every IDR, so seeing one is equivalent to "this packet contains a key
// frame". Mirrors the C++ IsAv1Keyframe helper in ScreenSpyDlg.cpp.
//
// AV1 OBU header byte layout: 0|type:4|ext:1|size:1|reserved:1
func IsAv1Keyframe(data []byte) bool {
n := len(data)
pos := 0
for pos < n {
hdr := data[pos]
obuType := (hdr >> 3) & 0x0F
hasExt := hdr&0x04 != 0
hasSize := hdr&0x02 != 0
if obuType == 1 { // OBU_SEQUENCE_HEADER
return true
}
pos++
if hasExt {
if pos >= n {
return false
}
pos++
}
if !hasSize {
return false // unsized OBU runs to end of packet
}
// LEB128 size
var sz uint64
for i := range 8 {
if pos >= n {
return false
}
b := data[pos]
pos++
sz |= uint64(b&0x7F) << (7 * i)
if b&0x80 == 0 {
break
}
}
if uint64(pos)+sz > uint64(n) {
return false
}
pos += int(sz)
}
return false
}
// LOGIN_INFOR structure size and offsets (matching C++ struct with default alignment)
// Note: C++ struct uses default alignment (4-byte for uint32/int)
const (

View File

@@ -1624,8 +1624,14 @@
},
error: (e) => { console.error('Decoder error:', e); needKeyframe = true; }
});
// codec string 由首帧嗅探得到的 currentCodec 决定:
// 'avc' → 'avc1.42E01E' (H.264 Constrained Baseline Level 3.0)
// 'av1' → 'av01.0.08M.08' (AV1 Main Profile Level 4.0 8-bit)
// 客户端硬件支持 AV1 编码时浏览器收到 AV1 流fallback 到 H.264 时浏览器
// 收到 H.264 流。两条路径在同一前端代码中并存,运维侧无须感知。
const codecStr = currentCodec === 'av1' ? 'av01.0.08M.08' : 'avc1.42E01E';
decoder.configure({
codec: 'avc1.42E01E',
codec: codecStr,
codedWidth: width,
codedHeight: height,
optimizeForLatency: true
@@ -1634,6 +1640,14 @@
let decoderWidth = 0, decoderHeight = 0, needKeyframe = false;
let decodeTimestamp = 0; // Monotonically increasing timestamp for decoder
let currentCodec = null; // 'avc' | 'av1' | nullinitDecoder 读取)
// 首字节嗅探H.264 Annex B 起始码必以 0x00 开头AV1 OBU header
// bit7=0 且 bits[3:6] = obu_type ∈ [1,15],首字节落在 [0x08,0x78] 区间且
// 绝不为 0x00。单字节即可干净区分。
function detectCodec(videoBytes) {
return videoBytes[0] === 0x00 ? 'avc' : 'av1';
}
function handleBinaryFrame(data) {
// 终端输出帧4 字节 magic 'TRM1' (0x54 0x52 0x4D 0x31) → 转发到 xterm。
@@ -1650,11 +1664,26 @@
const frameType = view.getUint8(4);
const dataLen = view.getUint32(5, true);
const isKeyframe = frameType === 1;
const videoData = new Uint8Array(data, 9, dataLen);
const frameCodec = dataLen > 0 ? detectCodec(videoData) : currentCodec;
// codec 切换(客户端硬件 fallback、首次连接等必须等到 keyframe 才能
// 重建 decoderdelta 帧没有 SPS/PPS 或 SEQ HDR无法独立初始化。
if (decoder && currentCodec && frameCodec !== currentCodec) {
if (!isKeyframe) {
needKeyframe = true;
return;
}
try { decoder.close(); } catch (e) {}
decoder = null;
currentCodec = null;
}
// If decoder is closed or errored, wait for keyframe to reinitialize
if (!decoder || decoder.state === 'closed') {
if (isKeyframe && decoderWidth > 0) {
console.log('Reinitializing decoder on keyframe');
currentCodec = frameCodec;
console.log('Reinitializing decoder on keyframe, codec=' + currentCodec);
initDecoder(decoderWidth, decoderHeight);
needKeyframe = false;
} else {
@@ -1669,7 +1698,6 @@
if (needKeyframe && !isKeyframe) return;
if (isKeyframe) needKeyframe = false;
const h264Data = new Uint8Array(data, 9, dataLen);
try {
// Check decoder queue to avoid overwhelming it (but never skip keyframes)
if (!isKeyframe && decoder.decodeQueueSize > 10) {
@@ -1679,7 +1707,7 @@
decoder.decode(new EncodedVideoChunk({
type: isKeyframe ? 'key' : 'delta',
timestamp: decodeTimestamp++,
data: h264Data
data: videoData
}));
} catch (e) {
console.error('Decode error:', e);