Feature: Implement H.264 and AV1 hardware encoding for remote control

Remark: Need to update FFmpeg static libraries to take effort
This commit is contained in:
yuanyuanxiang
2026-05-28 11:41:33 +02:00
parent d1aa7a2c02
commit 8c7f612449
30 changed files with 2113 additions and 68 deletions

View File

@@ -290,11 +290,13 @@ func (h *MyHandler) handleBitmapInfo(ctx *connection.Context, data []byte) {
// handleScreenFrame relays one TOKEN_FIRSTSCREEN / TOKEN_NEXTSCREEN packet
// to all browsers watching this device. The on-the-wire packet starts with
// the token byte then a small fixed header (algorithm, cursor pos, cursor
// index) before the H.264 NAL payload. The browser-facing WS packet uses
// the C++-compatible layout: [deviceID:4 LE][frameType:1][dataLen:4 LE][H264:N].
// index) before the video payload (H.264 Annex B or AV1 OBU). The browser-
// facing WS packet uses the C++-compatible layout:
// [deviceID:4 LE][frameType:1][dataLen:4 LE][Video:N].
//
// alwaysKey=true is used for TOKEN_FIRSTSCREEN (always IDR by construction);
// TOKEN_NEXTSCREEN is keyframe iff the NAL stream contains a 5/7/8 unit.
// TOKEN_NEXTSCREEN keyframe detection is delegated to protocol.IsAnyKeyframe
// which sniffs the codec from the first byte (0x00 → H.264, else AV1).
func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwaysKey bool) {
deviceID := h.hub.ScreenDeviceID(ctx)
if deviceID == "" {
@@ -310,8 +312,10 @@ func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwa
// browser sees cursor updates even if we end up dropping frames later.
h.hub.PublishCursor(deviceID, data[10])
h264 := data[skip:]
isKey := alwaysKey || protocol.IsH264Keyframe(h264)
video := data[skip:]
// 按首字节嗅探 H.264 / AV1分发到对应的 keyframe 探测器。浏览器侧用同样方式
// 决定 VideoDecoder codec string因此 server 不必感知客户端实际编码器。
isKey := alwaysKey || protocol.IsAnyKeyframe(video)
// Build the WS packet exactly as the C++ ScreenSpyDlg does — the front-end
// decoder reads these offsets directly.
@@ -321,13 +325,13 @@ func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwa
if isKey {
frameType = 1
}
dataLen := uint32(len(h264))
dataLen := uint32(len(video))
packet := make([]byte, 9+len(h264))
packet := make([]byte, 9+len(video))
binary.LittleEndian.PutUint32(packet[0:4], idLow)
packet[4] = frameType
binary.LittleEndian.PutUint32(packet[5:9], dataLen)
copy(packet[9:], h264)
copy(packet[9:], video)
h.hub.PublishScreenFrame(deviceID, packet, isKey)
}