Feature(Go): Screen frame relay end-to-end with graceful client BYE (Phase 4)

This commit is contained in:
yuanyuanxiang
2026-05-18 01:00:56 +02:00
parent b1f229706c
commit f013512c06
10 changed files with 999 additions and 74 deletions

View File

@@ -2,7 +2,10 @@ package protocol
import (
"bytes"
"crypto/hmac"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"strings"
"golang.org/x/text/encoding/simplifiedchinese"
@@ -44,19 +47,126 @@ func cleanString(s string) string {
return strings.TrimSpace(result.String())
}
// Command tokens - matching the C++ definitions
// Command tokens - matching the C++ definitions (common/commands.h).
const (
// Server -> Client commands
CommandActived byte = 0 // COMMAND_ACTIVED
CommandBye byte = 204 // COMMAND_BYE - disconnect
CommandHeartbeat byte = 216 // CMD_HEARTBEAT_ACK
CommandActived byte = 0 // COMMAND_ACTIVED
CommandScreenSpy byte = 16 // COMMAND_SCREEN_SPY - start screen capture
CommandNext byte = 30 // COMMAND_NEXT - "control-side dialog is open, you may stream"
CommandBye byte = 204 // COMMAND_BYE - disconnect
CommandHeartbeat byte = 216 // CMD_HEARTBEAT_ACK
// Client -> Server tokens
TokenAuth byte = 100 // TOKEN_AUTH - authorization required
TokenHeartbeat byte = 101 // TOKEN_HEARTBEAT
TokenLogin byte = 102 // TOKEN_LOGIN - login packet
TokenAuth byte = 100 // TOKEN_AUTH - authorization required
TokenHeartbeat byte = 101 // TOKEN_HEARTBEAT
TokenLogin byte = 102 // TOKEN_LOGIN - login packet
TokenBitmapInfo byte = 115 // TOKEN_BITMAPINFO - screen sub-connection header
TokenFirstScreen byte = 116 // TOKEN_FIRSTSCREEN - raw BGRA baseline frame (NOT H264)
TokenNextScreen byte = 117 // TOKEN_NEXTSCREEN - non-keyframe H264 (P-frame)
TokenKeyframe byte = 134 // TOKEN_KEYFRAME - H264 IDR (sent on GOP boundary)
TokenConnAuth byte = 246 // TOKEN_CONN_AUTH - sub-connection identity handshake
CmdCursorImage byte = 93 // CMD_CURSOR_IMAGE - custom cursor bitmap (Phase 5+ feature)
)
// Sub-connection authentication (matches common/commands.h ConnAuth* structs).
// Each newly-opened sub-conn first sends a 512-byte ConnAuthPacket, then waits
// for a 256-byte ConnAuthAck before any further command is meaningful.
const (
ConnAuthPacketSize = 512
ConnAuthAckSize = 256
// ConnAuthAck field offsets within the 256-byte buffer.
ConnAuthAckOffStatus = 1 // uint8
ConnAuthAckOffServerTime = 2 // uint64 LE
// Status codes.
ConnAuthStatusOK byte = 0
)
// CMD_MASTERSETTING is the server's reply to a fresh client login. The
// client uses the Signature field to prove this server has the shared
// secret; without a valid signature the client's private FileUpload init
// aborts the process. Struct layout matches MasterSettings in
// common/commands.h (pragma pack 4, total 1000 bytes).
const (
CmdMasterSetting byte = 215
MasterSettingsSize = 1000
MasterSettingsOffReportInterval = 0 // int32, seconds
MasterSettingsOffSignature = 508 // Signature[64]
MasterSettingsSignatureLen = 64
// DefaultReportIntervalSec matches the C++ default. Sending 0 makes the
// client disable its active-window heartbeat field, breaking RTT /
// ActiveWindow live updates on the web UI.
DefaultReportIntervalSec = 5
)
// SignMessage computes HMAC-SHA256(key, msg) and returns the 64-char
// lowercase hex digest. Used to sign CMD_MASTERSETTING replies so the
// client can verify the response came from a legitimate server.
//
// The key is a deployment-time shared secret loaded from the
// YAMA_SIGN_PASSWORD env var so the binary doesn't carry the literal in
// cleartext; provision out-of-band and never commit it.
func SignMessage(password string, msg []byte) string {
mac := hmac.New(sha256.New, []byte(password))
mac.Write(msg)
return hex.EncodeToString(mac.Sum(nil))
}
// Screen-spy parameters that match the C++ ScreenSpy implementation.
const (
AlgorithmH264 byte = 2 // ALGORITHM_H264 — H264 encoding (the algorithm web uses)
)
// Reserved-field indices we care about (see common/commands.h RES_* enum).
// LOGIN_INFOR.szReserved is a '|'-separated list; clients fill known slots
// even when leaving others blank ("?").
const (
ResFieldClientType = 0 // RES_CLIENT_TYPE — client kind (Windows / macOS / ...)
ResFieldFilePath = 4 // RES_FILE_PATH — install path
ResFieldInstallTime = 6 // RES_INSTALL_TIME
ResFieldClientLoc = 10 // RES_CLIENT_LOC — geo string
ResFieldClientPubIP = 11 // RES_CLIENT_PUBIP — public IP
ResFieldClientID = 16 // RES_CLIENT_ID — uint64 decimal, matches TOKEN_BITMAPINFO clientID
)
// ScreenFrameHeaderLen is the size of the small per-frame header prepended by
// the device on every TOKEN_NEXTSCREEN buffer, before the H.264 NAL payload.
// Layout (excluding the leading TOKEN_* byte):
//
// [algorithm:1][cursorPos:8 (int32 x, int32 y)][cursorIdx:1] = 10 bytes
//
// (The C++ side counts the token byte into its ulHeadLength=11; we keep the
// constant strictly post-token so the call site reads `skip := 1 + headerLen`
// without confusion.) SCREENYSPY_IMPROVE adds a 4-byte frameID after the
// cursor index, which is the production-off setting per common/commands.h.
const ScreenFrameHeaderLen = 1 + 8 + 1
// IsH264Keyframe scans an Annex-B H.264 bitstream for a NAL unit indicating
// a keyframe boundary — IDR (type 5), SPS (7) or PPS (8). Returns true on
// the first hit. Matches the detection used by the C++ ScreenSpy broadcast
// path so frame-type bytes stay consistent across server implementations.
func IsH264Keyframe(data []byte) bool {
n := len(data)
for i := 0; i+4 < n; i++ {
var nalOffset int
switch {
case data[i] == 0 && data[i+1] == 0 && data[i+2] == 0 && data[i+3] == 1:
nalOffset = i + 4
case data[i] == 0 && data[i+1] == 0 && data[i+2] == 1:
nalOffset = i + 3
default:
continue
}
if nalOffset >= n {
continue
}
nalType := data[nalOffset] & 0x1F
if nalType == 5 || nalType == 7 || nalType == 8 {
return true
}
}
return false
}
// LOGIN_INFOR structure size and offsets (matching C++ struct with default alignment)
// Note: C++ struct uses default alignment (4-byte for uint32/int)
const (

View File

@@ -0,0 +1,47 @@
package protocol
import "testing"
func TestSignMessageHMACVector(t *testing.T) {
// Standard HMAC-SHA256 sanity vector. Anchors that SignMessage matches
// the canonical RFC 4231 algorithm so signatures stay interoperable
// with peers that compute the same digest.
got := SignMessage("key", []byte("hello"))
want := "9307b3b915efb5171ff14d8cb55fbcc798c6c0ef1456d66ded1a6aa723a58b7b"
if got != want {
t.Fatalf("SignMessage(key, hello) = %s, want %s", got, want)
}
}
func TestSignMessageDeterministic(t *testing.T) {
a := SignMessage("test-key", []byte("2026-01-01 12:00:00|123456789"))
b := SignMessage("test-key", []byte("2026-01-01 12:00:00|123456789"))
if a != b {
t.Fatalf("non-deterministic: %s != %s", a, b)
}
if len(a) != 64 {
t.Fatalf("expected 64 hex chars, got %d (%s)", len(a), a)
}
}
func TestIsH264KeyframeBasic(t *testing.T) {
// 4-byte start code + IDR (NAL type 5)
idr := []byte{0x00, 0x00, 0x00, 0x01, 0x65, 0x88}
if !IsH264Keyframe(idr) {
t.Fatal("IDR should be detected as keyframe")
}
// 3-byte start code + SPS (NAL type 7)
sps := []byte{0x00, 0x00, 0x01, 0x67, 0x42}
if !IsH264Keyframe(sps) {
t.Fatal("SPS should be detected as keyframe")
}
// 4-byte start code + non-IDR slice (NAL type 1)
pframe := []byte{0x00, 0x00, 0x00, 0x01, 0x41, 0x9b}
if IsH264Keyframe(pframe) {
t.Fatal("non-IDR slice should not be detected as keyframe")
}
// Garbage
if IsH264Keyframe([]byte{0xde, 0xad, 0xbe, 0xef}) {
t.Fatal("non-H264 bytes should not match")
}
}

View File

@@ -20,6 +20,19 @@ type Parser struct {
codec *Codec
}
// findHTTPBodyOffset returns the byte offset of the HTTP body — i.e. one past
// the first `\r\n\r\n` separator. Returns -1 if the separator isn't present
// yet (caller should wait for more data). Matches the C++ UnMaskHttp scan in
// common/mask.h.
func findHTTPBodyOffset(data []byte) int {
for i := 0; i+4 <= len(data); i++ {
if data[i] == '\r' && data[i+1] == '\n' && data[i+2] == '\r' && data[i+3] == '\n' {
return i + 4
}
}
return -1
}
// NewParser creates a new parser
func NewParser() *Parser {
return &Parser{
@@ -38,6 +51,22 @@ func (p *Parser) Close() {
func (p *Parser) Parse(ctx *connection.Context) ([]byte, error) {
buf := ctx.InBuffer
// Strip optional HTTP-mask wrapper. The client may disguise each outbound
// chunk as a `POST /<random> HTTP/1.1\r\n...\r\n\r\n` envelope followed
// by the real binary body (see common/mask.h: HttpMask). Each chunk
// carries its own envelope so we strip every time we see the prefix.
if buf.Len() >= 5 {
head := buf.Peek(5)
if len(head) == 5 && head[0] == 'P' && head[1] == 'O' && head[2] == 'S' && head[3] == 'T' && head[4] == ' ' {
bodyOffset := findHTTPBodyOffset(buf.Bytes())
if bodyOffset < 0 {
// Headers not fully arrived yet — wait for more bytes.
return nil, ErrNeedMore
}
buf.Skip(bodyOffset)
}
}
// Need at least minimum bytes to determine protocol
if buf.Len() < MinComLen {
return nil, ErrNeedMore