Feature(Go): Screen frame relay end-to-end with graceful client BYE (Phase 4)
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
@@ -22,10 +23,11 @@ import (
|
||||
|
||||
// MyHandler implements the server.Handler interface
|
||||
type MyHandler struct {
|
||||
log *logger.Logger
|
||||
auth *auth.Authenticator
|
||||
srv *server.Server
|
||||
hub *hub.Hub
|
||||
log *logger.Logger
|
||||
auth *auth.Authenticator
|
||||
srv *server.Server
|
||||
hub *hub.Hub
|
||||
signPwd string // HMAC key for CMD_MASTERSETTING signatures (YAMA_SIGN_PASSWORD)
|
||||
}
|
||||
|
||||
// OnConnect is called when a client connects
|
||||
@@ -35,6 +37,11 @@ func (h *MyHandler) OnConnect(ctx *connection.Context) {
|
||||
|
||||
// OnDisconnect is called when a client disconnects
|
||||
func (h *MyHandler) OnDisconnect(ctx *connection.Context) {
|
||||
// Always clean up any screen sub-context mapping first — the connection
|
||||
// may be a screen sub-conn (which has no ClientInfo) rather than a main
|
||||
// login connection. UnbindScreenConn is a no-op if not tracked.
|
||||
h.hub.UnbindScreenConn(ctx)
|
||||
|
||||
info := ctx.GetInfo()
|
||||
if info.ClientID != "" {
|
||||
h.log.ClientEvent("offline", ctx.ID, ctx.GetPeerIP(),
|
||||
@@ -60,12 +67,154 @@ func (h *MyHandler) OnReceive(ctx *connection.Context, data []byte) {
|
||||
h.handleAuth(ctx, data)
|
||||
case protocol.TokenHeartbeat:
|
||||
h.handleHeartbeat(ctx, data)
|
||||
case protocol.TokenConnAuth:
|
||||
h.handleConnAuth(ctx, data)
|
||||
case protocol.TokenBitmapInfo:
|
||||
h.handleBitmapInfo(ctx, data)
|
||||
case protocol.TokenFirstScreen:
|
||||
// TOKEN_FIRSTSCREEN delivers a RAW BGRA baseline frame, not an
|
||||
// H264 unit — bytes ≈ width × height × 4. The C++ MFC dialog
|
||||
// blits it directly into a DIB; web viewers only consume H264 NAL
|
||||
// data, so dropping it here is correct. The first real H264 IDR
|
||||
// arrives shortly after via TOKEN_NEXTSCREEN.
|
||||
case protocol.TokenNextScreen:
|
||||
h.handleScreenFrame(ctx, data, false)
|
||||
case protocol.TokenKeyframe:
|
||||
// Sent by the client only when frameID % m_GOP == 0; the client's
|
||||
// DEFAULT_GOP is 0x7FFFFFFF (effectively infinite), so this token
|
||||
// is essentially unused in practice. Treat as a no-op for now —
|
||||
// IDRs always arrive in-band via TOKEN_NEXTSCREEN and we catch
|
||||
// them via the H264 NAL scan in handleScreenFrame.
|
||||
case protocol.CmdCursorImage:
|
||||
// Custom cursor bitmaps — relayed in Phase 5+ when the web cursor
|
||||
// overlay learns to render arbitrary BGRA images. Drop silently for
|
||||
// now; the standard IDC_* index (data[10] of every frame header) is
|
||||
// what we actually use right now.
|
||||
default:
|
||||
// Other commands are not implemented yet
|
||||
h.log.Info("Unhandled command %d from client %d", cmd, ctx.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// handleConnAuth answers a sub-connection identity handshake. Every sub-conn
|
||||
// the client opens (screen, terminal, file, ...) sends a 512-byte
|
||||
// ConnAuthPacket as its very first payload and blocks for up to 10 s waiting
|
||||
// on our 256-byte ConnAuthAck. Without an OK reply the client closes the
|
||||
// connection, so a missing ack here means nothing else can proceed.
|
||||
//
|
||||
// The handshake includes an HMAC signature field. The reference server
|
||||
// treats verification failures as soft (logs and still allows commands),
|
||||
// and the signing primitive lives in a vendored component out of scope
|
||||
// for this server, so we always reply OK and let TOKEN_BITMAPINFO carry
|
||||
// the device ID via offset 41 when the screen sub-conn proceeds.
|
||||
func (h *MyHandler) handleConnAuth(ctx *connection.Context, _ []byte) {
|
||||
ack := make([]byte, protocol.ConnAuthAckSize)
|
||||
ack[0] = protocol.TokenConnAuth
|
||||
ack[protocol.ConnAuthAckOffStatus] = protocol.ConnAuthStatusOK
|
||||
binary.LittleEndian.PutUint64(
|
||||
ack[protocol.ConnAuthAckOffServerTime:protocol.ConnAuthAckOffServerTime+8],
|
||||
uint64(time.Now().Unix()))
|
||||
if err := h.srv.Send(ctx, ack); err != nil {
|
||||
h.log.Error("ConnAuth ack send failed for conn=%d: %v", ctx.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// handleBitmapInfo is the first packet on a freshly-arrived screen
|
||||
// sub-connection. Packet layout (after the command byte at data[0]):
|
||||
//
|
||||
// [BITMAPINFOHEADER:40][clientID:8 uint64 LE][dlgID:8 uint64 LE][...]
|
||||
//
|
||||
// So clientID lives at data[41..49] and dlgID at data[49..57]. We use
|
||||
// clientID (= MasterID) to bind this sub-context to its parent device.
|
||||
func (h *MyHandler) handleBitmapInfo(ctx *connection.Context, data []byte) {
|
||||
if len(data) < 49 {
|
||||
h.log.Warn("TOKEN_BITMAPINFO from conn %d too short (%d bytes)", ctx.ID, len(data))
|
||||
return
|
||||
}
|
||||
clientID := uint64(data[41]) | uint64(data[42])<<8 | uint64(data[43])<<16 | uint64(data[44])<<24 |
|
||||
uint64(data[45])<<32 | uint64(data[46])<<40 | uint64(data[47])<<48 | uint64(data[48])<<56
|
||||
deviceID := strconv.FormatUint(clientID, 10)
|
||||
|
||||
if !h.hub.BindScreenConn(deviceID, ctx) {
|
||||
// Device not registered — main login hasn't happened (or device just
|
||||
// went offline). Drop the orphan sub-conn rather than leak it.
|
||||
h.log.Warn("orphan screen sub-conn %d for unknown device %s; closing", ctx.ID, deviceID)
|
||||
ctx.Close()
|
||||
return
|
||||
}
|
||||
|
||||
// BITMAPINFOHEADER starts at data[1]. biWidth at offset 4, biHeight at
|
||||
// offset 8 (both int32 LE). biHeight may be negative for top-down DIBs.
|
||||
width := int(int32(binary.LittleEndian.Uint32(data[5:9])))
|
||||
height := int(int32(binary.LittleEndian.Uint32(data[9:13])))
|
||||
if height < 0 {
|
||||
height = -height
|
||||
}
|
||||
|
||||
h.log.Info("screen sub-conn bound: conn=%d device=%s resolution=%dx%d",
|
||||
ctx.ID, deviceID, width, height)
|
||||
h.hub.PublishResolution(deviceID, width, height)
|
||||
|
||||
// Notify the client its "dialog is open" so it stops blocking in
|
||||
// Manager::WaitForDialogOpen (client/Manager.cpp:259). Without this
|
||||
// the client waits a full 8 s timeout before it begins streaming
|
||||
// real H264 frames via TOKEN_NEXTSCREEN. 32-byte packet matches the
|
||||
// C++ CScreenSpyDlg::SendNext layout:
|
||||
// [0]=COMMAND_NEXT [1..9]=dlgID uint64 [9..13]=capabilities uint32
|
||||
// [13..17]=scrollInterval int32 [17..32]=zero reserved
|
||||
// We don't need scroll-detect / a real dlgID, so leave them zero.
|
||||
nextCmd := make([]byte, 32)
|
||||
nextCmd[0] = protocol.CommandNext
|
||||
if err := h.srv.Send(ctx, nextCmd); err != nil {
|
||||
h.log.Error("COMMAND_NEXT send failed for conn=%d: %v", ctx.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// handleScreenFrame relays one TOKEN_FIRSTSCREEN / TOKEN_NEXTSCREEN packet
|
||||
// to all browsers watching this device. The on-the-wire packet starts with
|
||||
// the token byte then a small fixed header (algorithm, cursor pos, cursor
|
||||
// index) before the H.264 NAL payload. The browser-facing WS packet uses
|
||||
// the C++-compatible layout: [deviceID:4 LE][frameType:1][dataLen:4 LE][H264:N].
|
||||
//
|
||||
// alwaysKey=true is used for TOKEN_FIRSTSCREEN (always IDR by construction);
|
||||
// TOKEN_NEXTSCREEN is keyframe iff the NAL stream contains a 5/7/8 unit.
|
||||
func (h *MyHandler) handleScreenFrame(ctx *connection.Context, data []byte, alwaysKey bool) {
|
||||
deviceID := h.hub.ScreenDeviceID(ctx)
|
||||
if deviceID == "" {
|
||||
return // not a bound screen sub-conn — drop
|
||||
}
|
||||
// data[0] is the token; the 11-byte header sits at data[1..12].
|
||||
const skip = 1 + protocol.ScreenFrameHeaderLen
|
||||
if len(data) <= skip {
|
||||
return
|
||||
}
|
||||
// Cursor index lives at the last byte of the small per-frame header
|
||||
// (offset 1 + 1 + 8 = 10). Publish before the heavy frame work so the
|
||||
// browser sees cursor updates even if we end up dropping frames later.
|
||||
h.hub.PublishCursor(deviceID, data[10])
|
||||
|
||||
h264 := data[skip:]
|
||||
isKey := alwaysKey || protocol.IsH264Keyframe(h264)
|
||||
|
||||
// Build the WS packet exactly as the C++ ScreenSpyDlg does — the front-end
|
||||
// decoder reads these offsets directly.
|
||||
id64, _ := strconv.ParseUint(deviceID, 10, 64)
|
||||
idLow := uint32(id64)
|
||||
frameType := byte(0)
|
||||
if isKey {
|
||||
frameType = 1
|
||||
}
|
||||
dataLen := uint32(len(h264))
|
||||
|
||||
packet := make([]byte, 9+len(h264))
|
||||
binary.LittleEndian.PutUint32(packet[0:4], idLow)
|
||||
packet[4] = frameType
|
||||
binary.LittleEndian.PutUint32(packet[5:9], dataLen)
|
||||
copy(packet[9:], h264)
|
||||
|
||||
h.hub.PublishScreenFrame(deviceID, packet, isKey)
|
||||
}
|
||||
|
||||
// handleLogin handles client login (TOKEN_LOGIN = 102)
|
||||
func (h *MyHandler) handleLogin(ctx *connection.Context, data []byte) {
|
||||
info, err := protocol.ParseLoginInfo(data)
|
||||
@@ -74,8 +223,18 @@ func (h *MyHandler) handleLogin(ctx *connection.Context, data []byte) {
|
||||
return
|
||||
}
|
||||
|
||||
// Use MasterID from login request as ClientID for logging
|
||||
clientID := info.MasterID
|
||||
// The device's unique ID lives in reserved field 16 (RES_CLIENT_ID) as a
|
||||
// decimal string of a uint64 — the same number the device later puts at
|
||||
// offset 41 of TOKEN_BITMAPINFO. Using szMasterID here is WRONG: it is a
|
||||
// compile-time MASTER_HASH constant shared by every binary built from
|
||||
// the same source, so all clients would collide in the hub.
|
||||
clientID := info.GetReservedField(protocol.ResFieldClientID)
|
||||
if clientID == "" || clientID == "0" {
|
||||
// Legacy fallback (very old clients that don't fill RES_CLIENT_ID).
|
||||
// MasterID is still preferable to a per-connection number because it
|
||||
// at least stays stable across reconnects of the same binary.
|
||||
clientID = info.MasterID
|
||||
}
|
||||
if clientID == "" {
|
||||
clientID = fmt.Sprintf("conn-%d", ctx.ID)
|
||||
}
|
||||
@@ -92,17 +251,17 @@ func (h *MyHandler) handleLogin(ctx *connection.Context, data []byte) {
|
||||
}
|
||||
|
||||
// Parse additional info from reserved field
|
||||
if len(reserved) > 0 {
|
||||
clientInfo.ClientType = info.GetReservedField(0)
|
||||
if len(reserved) > protocol.ResFieldClientType {
|
||||
clientInfo.ClientType = info.GetReservedField(protocol.ResFieldClientType)
|
||||
}
|
||||
if len(reserved) > 2 {
|
||||
clientInfo.CPU = info.GetReservedField(2)
|
||||
}
|
||||
if len(reserved) > 4 {
|
||||
clientInfo.FilePath = info.GetReservedField(4)
|
||||
if len(reserved) > protocol.ResFieldFilePath {
|
||||
clientInfo.FilePath = info.GetReservedField(protocol.ResFieldFilePath)
|
||||
}
|
||||
if len(reserved) > 11 {
|
||||
clientInfo.IP = info.GetReservedField(11) // Public IP
|
||||
if len(reserved) > protocol.ResFieldClientPubIP {
|
||||
clientInfo.IP = info.GetReservedField(protocol.ResFieldClientPubIP)
|
||||
}
|
||||
|
||||
ctx.SetInfo(clientInfo)
|
||||
@@ -122,10 +281,10 @@ func (h *MyHandler) handleLogin(ctx *connection.Context, data []byte) {
|
||||
name, group, _ := strings.Cut(info.PCName, "/")
|
||||
version, capability, _ := strings.Cut(info.ModuleVersion, "-")
|
||||
|
||||
// Reserved field 10 (ClientLoc) is the client-reported geo string.
|
||||
// Client-reported geo string (RES_CLIENT_LOC).
|
||||
location := ""
|
||||
if len(reserved) > 10 {
|
||||
location = info.GetReservedField(10)
|
||||
if len(reserved) > protocol.ResFieldClientLoc {
|
||||
location = info.GetReservedField(protocol.ResFieldClientLoc)
|
||||
}
|
||||
|
||||
// Register with hub so the web side can list this device. Sub-connections
|
||||
@@ -145,9 +304,45 @@ func (h *MyHandler) handleLogin(ctx *connection.Context, data []byte) {
|
||||
PeerIP: ctx.GetPeerIP(),
|
||||
PublicIP: clientInfo.IP,
|
||||
ConnectedAt: time.Now(),
|
||||
})
|
||||
}, ctx)
|
||||
|
||||
// Push CMD_MASTERSETTING with a signature over "StartTime|ClientID".
|
||||
// The client's private FileUpload init verifies this before allowing
|
||||
// screen / file operations — without it the binary aborts itself.
|
||||
h.sendMasterSetting(ctx, info.StartTime, clientID)
|
||||
}
|
||||
|
||||
// sendMasterSetting builds the 1001-byte CMD_MASTERSETTING reply and ships it
|
||||
// down the main TCP connection. Most fields stay zeroed — only Signature
|
||||
// matters today. If no signing password is configured, a zeroed signature is
|
||||
// still sent (and logged once) so the client at least sees a well-formed
|
||||
// packet; in that case the client's private library will refuse to start
|
||||
// screen / file features and abort.
|
||||
func (h *MyHandler) sendMasterSetting(ctx *connection.Context, startTime, clientID string) {
|
||||
buf := make([]byte, 1+protocol.MasterSettingsSize)
|
||||
buf[0] = protocol.CmdMasterSetting
|
||||
|
||||
// ReportInterval (int32 LE at struct offset 0, +1 for the cmd byte).
|
||||
// Sending 0 makes the client drop the active-window field of its
|
||||
// heartbeat, which kills the web UI's live activeWindow updates.
|
||||
binary.LittleEndian.PutUint32(
|
||||
buf[1:5],
|
||||
uint32(protocol.DefaultReportIntervalSec))
|
||||
|
||||
if h.signPwd == "" {
|
||||
h.log.Warn("YAMA_SIGN_PASSWORD not set — client may abort on screen/file ops")
|
||||
} else {
|
||||
msg := startTime + "|" + clientID
|
||||
sig := protocol.SignMessage(h.signPwd, []byte(msg))
|
||||
// Signature[64] lives at offset 508 of the struct, +1 for the cmd byte.
|
||||
const sigOffset = 1 + protocol.MasterSettingsOffSignature
|
||||
copy(buf[sigOffset:sigOffset+protocol.MasterSettingsSignatureLen], []byte(sig))
|
||||
}
|
||||
|
||||
if err := h.srv.Send(ctx, buf); err != nil {
|
||||
h.log.Error("CMD_MASTERSETTING send failed for conn=%d: %v", ctx.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// handleAuth handles authorization request (TOKEN_AUTH = 100)
|
||||
func (h *MyHandler) handleAuth(ctx *connection.Context, data []byte) {
|
||||
@@ -222,7 +417,7 @@ func (h *MyHandler) handleHeartbeat(ctx *connection.Context, data []byte) {
|
||||
if len(data) > 1 {
|
||||
authResult := h.auth.AuthenticateHeartbeat(data[1:])
|
||||
if authResult.Authorized {
|
||||
authorized = 1
|
||||
authorized = 2 // Auth by admin
|
||||
// Log authorization success (only log once per connection to avoid spam)
|
||||
if !ctx.IsAuthorized.Load() {
|
||||
ctx.IsAuthorized.Store(true)
|
||||
@@ -329,6 +524,16 @@ func main() {
|
||||
// the HTTP server reads from it.
|
||||
deviceHub := hub.New()
|
||||
|
||||
// HMAC key used to sign the per-login CMD_MASTERSETTING reply. The
|
||||
// client verifies this signature before enabling its screen / file
|
||||
// features and aborts the process on mismatch. Kept in an env var so
|
||||
// the literal stays out of the binary; provision out-of-band and
|
||||
// never commit it.
|
||||
signPwd := os.Getenv("YAMA_SIGN_PASSWORD")
|
||||
if signPwd == "" {
|
||||
log.Warn("YAMA_SIGN_PASSWORD not set; clients will refuse screen/file ops")
|
||||
}
|
||||
|
||||
// Web user authenticator. Bootstrap admin from env var YAMA_WEB_ADMIN_PASS;
|
||||
// if unset, fall back to YAMA_PWD (same secret the TCP authorization uses)
|
||||
// so a single password env var is enough to bring up the whole stack.
|
||||
@@ -358,16 +563,27 @@ func main() {
|
||||
|
||||
// Create handler for this server
|
||||
handler := &MyHandler{
|
||||
log: log.WithPrefix(fmt.Sprintf("Handler:%d", port)),
|
||||
auth: authenticator,
|
||||
srv: srv,
|
||||
hub: deviceHub,
|
||||
log: log.WithPrefix(fmt.Sprintf("Handler:%d", port)),
|
||||
auth: authenticator,
|
||||
srv: srv,
|
||||
hub: deviceHub,
|
||||
signPwd: signPwd,
|
||||
}
|
||||
srv.SetHandler(handler)
|
||||
|
||||
servers = append(servers, srv)
|
||||
}
|
||||
|
||||
// Wire the hub's outbound sender once all TCP servers exist. Any server's
|
||||
// Send method will do — the per-connection encoder uses ctx-local state
|
||||
// and is independent of which server originally accepted the connection.
|
||||
if len(servers) > 0 {
|
||||
s := servers[0]
|
||||
deviceHub.SetSender(func(ctx *connection.Context, data []byte) error {
|
||||
return s.Send(ctx, data)
|
||||
})
|
||||
}
|
||||
|
||||
// Start all TCP servers
|
||||
for _, srv := range servers {
|
||||
if err := srv.Start(); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user