Feature: Implement H.264 and AV1 hardware encoding for remote control

Remark: Need to update FFmpeg static libraries to take effort
This commit is contained in:
yuanyuanxiang
2026-05-28 11:41:33 +02:00
parent d1aa7a2c02
commit 8c7f612449
30 changed files with 2113 additions and 68 deletions

View File

@@ -290,11 +290,13 @@ func (h *MyHandler) handleBitmapInfo(ctx *connection.Context, data []byte) {
// handleScreenFrame relays one TOKEN_FIRSTSCREEN / TOKEN_NEXTSCREEN packet
// to all browsers watching this device. The on-the-wire packet starts with
// the token byte then a small fixed header (algorithm, cursor pos, cursor
// index) before the H.264 NAL payload. The browser-facing WS packet uses
// the C++-compatible layout: [deviceID:4 LE][frameType:1][dataLen:4 LE][H264:N].
// index) before the video payload (H.264 Annex B or AV1 OBU). The browser-
// facing WS packet uses the C++-compatible layout:
// [deviceID:4 LE][frameType:1][dataLen:4 LE][Video:N].
//
// alwaysKey=true is used for TOKEN_FIRSTSCREEN (always IDR by construction);
// TOKEN_NEXTSCREEN is keyframe iff the NAL stream contains a 5/7/8 unit.
// TOKEN_NEXTSCREEN keyframe detection is delegated to protocol.IsAnyKeyframe
// which sniffs the codec from the first byte (0x00 → H.264, else AV1).
func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwaysKey bool) {
deviceID := h.hub.ScreenDeviceID(ctx)
if deviceID == "" {
@@ -310,8 +312,10 @@ func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwa
// browser sees cursor updates even if we end up dropping frames later.
h.hub.PublishCursor(deviceID, data[10])
h264 := data[skip:]
isKey := alwaysKey || protocol.IsH264Keyframe(h264)
video := data[skip:]
// 按首字节嗅探 H.264 / AV1分发到对应的 keyframe 探测器。浏览器侧用同样方式
// 决定 VideoDecoder codec string因此 server 不必感知客户端实际编码器。
isKey := alwaysKey || protocol.IsAnyKeyframe(video)
// Build the WS packet exactly as the C++ ScreenSpyDlg does — the front-end
// decoder reads these offsets directly.
@@ -321,13 +325,13 @@ func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwa
if isKey {
frameType = 1
}
dataLen := uint32(len(h264))
dataLen := uint32(len(video))
packet := make([]byte, 9+len(h264))
packet := make([]byte, 9+len(video))
binary.LittleEndian.PutUint32(packet[0:4], idLow)
packet[4] = frameType
binary.LittleEndian.PutUint32(packet[5:9], dataLen)
copy(packet[9:], h264)
copy(packet[9:], video)
h.hub.PublishScreenFrame(deviceID, packet, isKey)
}

View File

@@ -350,6 +350,69 @@ func IsH264Keyframe(data []byte) bool {
return false
}
// IsAnyKeyframe sniffs the codec from the first byte then dispatches to the
// matching keyframe detector. H.264 Annex B always starts with 0x00 (start
// code prefix); AV1 OBU headers have bit7=0 and bits[3:6]=obu_type in [1,15]
// so the first byte is in [0x08,0x78] and never 0x00. Lets the server stay
// codec-agnostic so the browser can run H.264 and AV1 sessions side by side.
func IsAnyKeyframe(data []byte) bool {
if len(data) == 0 {
return false
}
if data[0] == 0x00 {
return IsH264Keyframe(data)
}
return IsAv1Keyframe(data)
}
// IsAv1Keyframe walks the OBU chain and returns true on the first
// OBU_SEQUENCE_HEADER (type 1). FFmpeg's AV1 encoders prepend SEQ HDR to
// every IDR, so seeing one is equivalent to "this packet contains a key
// frame". Mirrors the C++ IsAv1Keyframe helper in ScreenSpyDlg.cpp.
//
// AV1 OBU header byte layout: 0|type:4|ext:1|size:1|reserved:1
func IsAv1Keyframe(data []byte) bool {
n := len(data)
pos := 0
for pos < n {
hdr := data[pos]
obuType := (hdr >> 3) & 0x0F
hasExt := hdr&0x04 != 0
hasSize := hdr&0x02 != 0
if obuType == 1 { // OBU_SEQUENCE_HEADER
return true
}
pos++
if hasExt {
if pos >= n {
return false
}
pos++
}
if !hasSize {
return false // unsized OBU runs to end of packet
}
// LEB128 size
var sz uint64
for i := range 8 {
if pos >= n {
return false
}
b := data[pos]
pos++
sz |= uint64(b&0x7F) << (7 * i)
if b&0x80 == 0 {
break
}
}
if uint64(pos)+sz > uint64(n) {
return false
}
pos += int(sz)
}
return false
}
// LOGIN_INFOR structure size and offsets (matching C++ struct with default alignment)
// Note: C++ struct uses default alignment (4-byte for uint32/int)
const (