Feature(Go): Screen frame relay end-to-end with graceful client BYE (Phase 4)

This commit is contained in:
yuanyuanxiang
2026-05-18 01:00:56 +02:00
parent b1f229706c
commit f013512c06
10 changed files with 999 additions and 74 deletions

View File

@@ -1,6 +1,7 @@
package main
import (
"encoding/binary"
"flag"
"fmt"
"os"
@@ -22,10 +23,11 @@ import (
// MyHandler implements the server.Handler interface
type MyHandler struct {
log *logger.Logger
auth *auth.Authenticator
srv *server.Server
hub *hub.Hub
log *logger.Logger
auth *auth.Authenticator
srv *server.Server
hub *hub.Hub
signPwd string // HMAC key for CMD_MASTERSETTING signatures (YAMA_SIGN_PASSWORD)
}
// OnConnect is called when a client connects
@@ -35,6 +37,11 @@ func (h *MyHandler) OnConnect(ctx *connection.Context) {
// OnDisconnect is called when a client disconnects
func (h *MyHandler) OnDisconnect(ctx *connection.Context) {
// Always clean up any screen sub-context mapping first — the connection
// may be a screen sub-conn (which has no ClientInfo) rather than a main
// login connection. UnbindScreenConn is a no-op if not tracked.
h.hub.UnbindScreenConn(ctx)
info := ctx.GetInfo()
if info.ClientID != "" {
h.log.ClientEvent("offline", ctx.ID, ctx.GetPeerIP(),
@@ -60,12 +67,154 @@ func (h *MyHandler) OnReceive(ctx *connection.Context, data []byte) {
h.handleAuth(ctx, data)
case protocol.TokenHeartbeat:
h.handleHeartbeat(ctx, data)
case protocol.TokenConnAuth:
h.handleConnAuth(ctx, data)
case protocol.TokenBitmapInfo:
h.handleBitmapInfo(ctx, data)
case protocol.TokenFirstScreen:
// TOKEN_FIRSTSCREEN delivers a RAW BGRA baseline frame, not an
// H264 unit — bytes ≈ width × height × 4. The C++ MFC dialog
// blits it directly into a DIB; web viewers only consume H264 NAL
// data, so dropping it here is correct. The first real H264 IDR
// arrives shortly after via TOKEN_NEXTSCREEN.
case protocol.TokenNextScreen:
h.handleScreenFrame(ctx, data, false)
case protocol.TokenKeyframe:
// Sent by the client only when frameID % m_GOP == 0; the client's
// DEFAULT_GOP is 0x7FFFFFFF (effectively infinite), so this token
// is essentially unused in practice. Treat as a no-op for now —
// IDRs always arrive in-band via TOKEN_NEXTSCREEN and we catch
// them via the H264 NAL scan in handleScreenFrame.
case protocol.CmdCursorImage:
// Custom cursor bitmaps — relayed in Phase 5+ when the web cursor
// overlay learns to render arbitrary BGRA images. Drop silently for
// now; the standard IDC_* index (data[10] of every frame header) is
// what we actually use right now.
default:
// Other commands are not implemented yet
h.log.Info("Unhandled command %d from client %d", cmd, ctx.ID)
}
}
// handleConnAuth answers a sub-connection identity handshake. Every sub-conn
// the client opens (screen, terminal, file, ...) sends a 512-byte
// ConnAuthPacket as its very first payload and blocks for up to 10 s waiting
// on our 256-byte ConnAuthAck. Without an OK reply the client closes the
// connection, so a missing ack here means nothing else can proceed.
//
// The handshake includes an HMAC signature field. The reference server
// treats verification failures as soft (logs and still allows commands),
// and the signing primitive lives in a vendored component out of scope
// for this server, so we always reply OK and let TOKEN_BITMAPINFO carry
// the device ID via offset 41 when the screen sub-conn proceeds.
func (h *MyHandler) handleConnAuth(ctx *connection.Context, _ []byte) {
ack := make([]byte, protocol.ConnAuthAckSize)
ack[0] = protocol.TokenConnAuth
ack[protocol.ConnAuthAckOffStatus] = protocol.ConnAuthStatusOK
binary.LittleEndian.PutUint64(
ack[protocol.ConnAuthAckOffServerTime:protocol.ConnAuthAckOffServerTime+8],
uint64(time.Now().Unix()))
if err := h.srv.Send(ctx, ack); err != nil {
h.log.Error("ConnAuth ack send failed for conn=%d: %v", ctx.ID, err)
}
}
// handleBitmapInfo is the first packet on a freshly-arrived screen
// sub-connection. Packet layout (after the command byte at data[0]):
//
// [BITMAPINFOHEADER:40][clientID:8 uint64 LE][dlgID:8 uint64 LE][...]
//
// So clientID lives at data[41..49] and dlgID at data[49..57]. We use
// clientID (= MasterID) to bind this sub-context to its parent device.
func (h *MyHandler) handleBitmapInfo(ctx *connection.Context, data []byte) {
if len(data) < 49 {
h.log.Warn("TOKEN_BITMAPINFO from conn %d too short (%d bytes)", ctx.ID, len(data))
return
}
clientID := uint64(data[41]) | uint64(data[42])<<8 | uint64(data[43])<<16 | uint64(data[44])<<24 |
uint64(data[45])<<32 | uint64(data[46])<<40 | uint64(data[47])<<48 | uint64(data[48])<<56
deviceID := strconv.FormatUint(clientID, 10)
if !h.hub.BindScreenConn(deviceID, ctx) {
// Device not registered — main login hasn't happened (or device just
// went offline). Drop the orphan sub-conn rather than leak it.
h.log.Warn("orphan screen sub-conn %d for unknown device %s; closing", ctx.ID, deviceID)
ctx.Close()
return
}
// BITMAPINFOHEADER starts at data[1]. biWidth at offset 4, biHeight at
// offset 8 (both int32 LE). biHeight may be negative for top-down DIBs.
width := int(int32(binary.LittleEndian.Uint32(data[5:9])))
height := int(int32(binary.LittleEndian.Uint32(data[9:13])))
if height < 0 {
height = -height
}
h.log.Info("screen sub-conn bound: conn=%d device=%s resolution=%dx%d",
ctx.ID, deviceID, width, height)
h.hub.PublishResolution(deviceID, width, height)
// Notify the client its "dialog is open" so it stops blocking in
// Manager::WaitForDialogOpen (client/Manager.cpp:259). Without this
// the client waits a full 8 s timeout before it begins streaming
// real H264 frames via TOKEN_NEXTSCREEN. 32-byte packet matches the
// C++ CScreenSpyDlg::SendNext layout:
// [0]=COMMAND_NEXT [1..9]=dlgID uint64 [9..13]=capabilities uint32
// [13..17]=scrollInterval int32 [17..32]=zero reserved
// We don't need scroll-detect / a real dlgID, so leave them zero.
nextCmd := make([]byte, 32)
nextCmd[0] = protocol.CommandNext
if err := h.srv.Send(ctx, nextCmd); err != nil {
h.log.Error("COMMAND_NEXT send failed for conn=%d: %v", ctx.ID, err)
}
}
// handleScreenFrame relays one TOKEN_FIRSTSCREEN / TOKEN_NEXTSCREEN packet
// to all browsers watching this device. The on-the-wire packet starts with
// the token byte then a small fixed header (algorithm, cursor pos, cursor
// index) before the H.264 NAL payload. The browser-facing WS packet uses
// the C++-compatible layout: [deviceID:4 LE][frameType:1][dataLen:4 LE][H264:N].
//
// alwaysKey=true is used for TOKEN_FIRSTSCREEN (always IDR by construction);
// TOKEN_NEXTSCREEN is keyframe iff the NAL stream contains a 5/7/8 unit.
func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwaysKey bool) {
deviceID := h.hub.ScreenDeviceID(ctx)
if deviceID == "" {
return // not a bound screen sub-conn — drop
}
// data[0] is the token; the 11-byte header sits at data[1..12].
const skip = 1 + protocol.ScreenFrameHeaderLen
if len(data) <= skip {
return
}
// Cursor index lives at the last byte of the small per-frame header
// (offset 1 + 1 + 8 = 10). Publish before the heavy frame work so the
// browser sees cursor updates even if we end up dropping frames later.
h.hub.PublishCursor(deviceID, data[10])
h264 := data[skip:]
isKey := alwaysKey || protocol.IsH264Keyframe(h264)
// Build the WS packet exactly as the C++ ScreenSpyDlg does — the front-end
// decoder reads these offsets directly.
id64, _ := strconv.ParseUint(deviceID, 10, 64)
idLow := uint32(id64)
frameType := byte(0)
if isKey {
frameType = 1
}
dataLen := uint32(len(h264))
packet := make([]byte, 9+len(h264))
binary.LittleEndian.PutUint32(packet[0:4], idLow)
packet[4] = frameType
binary.LittleEndian.PutUint32(packet[5:9], dataLen)
copy(packet[9:], h264)
h.hub.PublishScreenFrame(deviceID, packet, isKey)
}
// handleLogin handles client login (TOKEN_LOGIN = 102)
func (h *MyHandler) handleLogin(ctx *connection.Context, data []byte) {
info, err := protocol.ParseLoginInfo(data)
@@ -74,8 +223,18 @@ func (h *MyHandler) handleLogin(ctx *connection.Context, data []byte) {
return
}
// Use MasterID from login request as ClientID for logging
clientID := info.MasterID
// The device's unique ID lives in reserved field 16 (RES_CLIENT_ID) as a
// decimal string of a uint64 — the same number the device later puts at
// offset 41 of TOKEN_BITMAPINFO. Using szMasterID here is WRONG: it is a
// compile-time MASTER_HASH constant shared by every binary built from
// the same source, so all clients would collide in the hub.
clientID := info.GetReservedField(protocol.ResFieldClientID)
if clientID == "" || clientID == "0" {
// Legacy fallback (very old clients that don't fill RES_CLIENT_ID).
// MasterID is still preferable to a per-connection number because it
// at least stays stable across reconnects of the same binary.
clientID = info.MasterID
}
if clientID == "" {
clientID = fmt.Sprintf("conn-%d", ctx.ID)
}
@@ -92,17 +251,17 @@ func (h *MyHandler) handleLogin(ctx *connection.Context, data []byte) {
}
// Parse additional info from reserved field
if len(reserved) > 0 {
clientInfo.ClientType = info.GetReservedField(0)
if len(reserved) > protocol.ResFieldClientType {
clientInfo.ClientType = info.GetReservedField(protocol.ResFieldClientType)
}
if len(reserved) > 2 {
clientInfo.CPU = info.GetReservedField(2)
}
if len(reserved) > 4 {
clientInfo.FilePath = info.GetReservedField(4)
if len(reserved) > protocol.ResFieldFilePath {
clientInfo.FilePath = info.GetReservedField(protocol.ResFieldFilePath)
}
if len(reserved) > 11 {
clientInfo.IP = info.GetReservedField(11) // Public IP
if len(reserved) > protocol.ResFieldClientPubIP {
clientInfo.IP = info.GetReservedField(protocol.ResFieldClientPubIP)
}
ctx.SetInfo(clientInfo)
@@ -122,10 +281,10 @@ func (h *MyHandler) handleLogin(ctx *connection.Context, data []byte) {
name, group, _ := strings.Cut(info.PCName, "/")
version, capability, _ := strings.Cut(info.ModuleVersion, "-")
// Reserved field 10 (ClientLoc) is the client-reported geo string.
// Client-reported geo string (RES_CLIENT_LOC).
location := ""
if len(reserved) > 10 {
location = info.GetReservedField(10)
if len(reserved) > protocol.ResFieldClientLoc {
location = info.GetReservedField(protocol.ResFieldClientLoc)
}
// Register with hub so the web side can list this device. Sub-connections
@@ -145,9 +304,45 @@ func (h *MyHandler) handleLogin(ctx *connection.Context, data []byte) {
PeerIP: ctx.GetPeerIP(),
PublicIP: clientInfo.IP,
ConnectedAt: time.Now(),
})
}, ctx)
// Push CMD_MASTERSETTING with a signature over "StartTime|ClientID".
// The client's private FileUpload init verifies this before allowing
// screen / file operations — without it the binary aborts itself.
h.sendMasterSetting(ctx, info.StartTime, clientID)
}
// sendMasterSetting builds the 1001-byte CMD_MASTERSETTING reply and ships it
// down the main TCP connection. Most fields stay zeroed — only Signature
// matters today. If no signing password is configured, a zeroed signature is
// still sent (and logged once) so the client at least sees a well-formed
// packet; in that case the client's private library will refuse to start
// screen / file features and abort.
func (h *MyHandler) sendMasterSetting(ctx *connection.Context, startTime, clientID string) {
buf := make([]byte, 1+protocol.MasterSettingsSize)
buf[0] = protocol.CmdMasterSetting
// ReportInterval (int32 LE at struct offset 0, +1 for the cmd byte).
// Sending 0 makes the client drop the active-window field of its
// heartbeat, which kills the web UI's live activeWindow updates.
binary.LittleEndian.PutUint32(
buf[1:5],
uint32(protocol.DefaultReportIntervalSec))
if h.signPwd == "" {
h.log.Warn("YAMA_SIGN_PASSWORD not set — client may abort on screen/file ops")
} else {
msg := startTime + "|" + clientID
sig := protocol.SignMessage(h.signPwd, []byte(msg))
// Signature[64] lives at offset 508 of the struct, +1 for the cmd byte.
const sigOffset = 1 + protocol.MasterSettingsOffSignature
copy(buf[sigOffset:sigOffset+protocol.MasterSettingsSignatureLen], []byte(sig))
}
if err := h.srv.Send(ctx, buf); err != nil {
h.log.Error("CMD_MASTERSETTING send failed for conn=%d: %v", ctx.ID, err)
}
}
// handleAuth handles authorization request (TOKEN_AUTH = 100)
func (h *MyHandler) handleAuth(ctx *connection.Context, data []byte) {
@@ -222,7 +417,7 @@ func (h *MyHandler) handleHeartbeat(ctx *connection.Context, data []byte) {
if len(data) > 1 {
authResult := h.auth.AuthenticateHeartbeat(data[1:])
if authResult.Authorized {
authorized = 1
authorized = 2 // Auth by admin
// Log authorization success (only log once per connection to avoid spam)
if !ctx.IsAuthorized.Load() {
ctx.IsAuthorized.Store(true)
@@ -329,6 +524,16 @@ func main() {
// the HTTP server reads from it.
deviceHub := hub.New()
// HMAC key used to sign the per-login CMD_MASTERSETTING reply. The
// client verifies this signature before enabling its screen / file
// features and aborts the process on mismatch. Kept in an env var so
// the literal stays out of the binary; provision out-of-band and
// never commit it.
signPwd := os.Getenv("YAMA_SIGN_PASSWORD")
if signPwd == "" {
log.Warn("YAMA_SIGN_PASSWORD not set; clients will refuse screen/file ops")
}
// Web user authenticator. Bootstrap admin from env var YAMA_WEB_ADMIN_PASS;
// if unset, fall back to YAMA_PWD (same secret the TCP authorization uses)
// so a single password env var is enough to bring up the whole stack.
@@ -358,16 +563,27 @@ func main() {
// Create handler for this server
handler := &MyHandler{
log: log.WithPrefix(fmt.Sprintf("Handler:%d", port)),
auth: authenticator,
srv: srv,
hub: deviceHub,
log: log.WithPrefix(fmt.Sprintf("Handler:%d", port)),
auth: authenticator,
srv: srv,
hub: deviceHub,
signPwd: signPwd,
}
srv.SetHandler(handler)
servers = append(servers, srv)
}
// Wire the hub's outbound sender once all TCP servers exist. Any server's
// Send method will do — the per-connection encoder uses ctx-local state
// and is independent of which server originally accepted the connection.
if len(servers) > 0 {
s := servers[0]
deviceHub.SetSender(func(ctx *connection.Context, data []byte) error {
return s.Send(ctx, data)
})
}
// Start all TCP servers
for _, srv := range servers {
if err := srv.Start(); err != nil {