Fix(Go): stable device list ordering + RDP-reset handler

Fix UTF-8 login text decode + stale screen sub-conn retirement
This commit is contained in:
yuanyuanxiang
2026-05-19 16:28:01 +02:00
parent 5af017bf09
commit d757c33bcb
3 changed files with 167 additions and 87 deletions

View File

@@ -10,6 +10,7 @@ package hub
import ( import (
"errors" "errors"
"sort"
"sync" "sync"
"time" "time"
@@ -280,6 +281,14 @@ func (h *Hub) SendToScreen(id string, data []byte) error {
// just sent TOKEN_BITMAPINFO) with the device identified by clientID. // just sent TOKEN_BITMAPINFO) with the device identified by clientID.
// Returns false if the device is not registered — callers should drop the // Returns false if the device is not registered — callers should drop the
// orphan connection in that case. // orphan connection in that case.
//
// If the device already has a screen sub-conn bound (typically because the
// client's monitor-poll logic opened a new one without the previous viewer
// going through CloseScreen — e.g. multiple open/close cycles where each
// cycle picks a different display), the previous sub-conn is retired via
// retireScreenConn. Without this, both sub-conns keep streaming under the
// same device ID and the browser sees H.264 frames from two encoders
// interleaved, rendering as a picture that jumps between monitors.
func (h *Hub) BindScreenConn(deviceID string, ctx *connection.Context) bool { func (h *Hub) BindScreenConn(deviceID string, ctx *connection.Context) bool {
if deviceID == "" || ctx == nil { if deviceID == "" || ctx == nil {
return false return false
@@ -290,12 +299,29 @@ func (h *Hub) BindScreenConn(deviceID string, ctx *connection.Context) bool {
h.mu.Unlock() h.mu.Unlock()
return false return false
} }
old := d.screenConn
d.screenConn = ctx d.screenConn = ctx
// The cached resolution and last IDR belong to the old encoder's
// stream. A viewer joining now must wait for the new sub-conn's
// TOKEN_BITMAPINFO + first IDR; serving the old monitor's keyframe
// to a decoder that's about to receive a different SPS/PPS would
// produce the same mixed-stream corruption retireScreenConn exists
// to prevent.
replacing := old != nil && old != ctx
if replacing {
d.screenWidth = 0
d.screenHeight = 0
d.lastKeyframe = nil
}
h.mu.Unlock() h.mu.Unlock()
h.screenIndexMu.Lock() h.screenIndexMu.Lock()
h.screenIndex[ctx] = deviceID h.screenIndex[ctx] = deviceID
h.screenIndexMu.Unlock() h.screenIndexMu.Unlock()
if replacing {
h.retireScreenConn(old)
}
return true return true
} }
@@ -332,6 +358,49 @@ func (h *Hub) UnbindScreenConn(ctx *connection.Context) {
h.mu.Unlock() h.mu.Unlock()
} }
// retireScreenConn tears down a screen sub-conn that's being replaced or
// closed. Shared by CloseScreen (last viewer left) and BindScreenConn (device
// opened a new sub-conn that's superseding this one, e.g. when the client's
// monitor-poll logic switches to a different display).
//
// Steps and ordering matter:
//
// 1. screenIndex entry is dropped FIRST so any in-flight frames still in the
// device's TCP send buffer arrive at handleScreenFrame with deviceID=""
// and are silently dropped. Without this they'd be relayed under the
// same deviceID as the new sub-conn — that's the "frames from two
// monitors interleaved in one stream" symptom: the browser decoder sees
// a mix of two independent x264 SPS/PPS sequences and renders an
// alternating / glitchy picture.
//
// 2. COMMAND_BYE is sent so the C++ client's IOCPClient exits via the
// clean StopRunning() path. Without it the client treats FIN as a
// network blip and fires m_ReconnectFunc, which opens a fresh sub-conn
// that never sends BITMAPINFO again, keeps the encoder thread alive,
// and ultimately wedges the device for ~10 s — see the original
// CloseScreen comment for the full failure mode.
//
// 3. The actual TCP Close happens 500 ms later on a goroutine, mirroring
// the C++ ScreenSpyDlg.cpp:842 (Sleep(500); CancelIO()) sequence so
// COMMAND_BYE has time to reach the device read loop before FIN does.
//
// No-op for a nil sc so callers can pass d.screenConn unconditionally.
func (h *Hub) retireScreenConn(sc *connection.Context) {
if sc == nil {
return
}
h.screenIndexMu.Lock()
delete(h.screenIndex, sc)
h.screenIndexMu.Unlock()
if h.sender != nil {
_ = h.sender(sc, []byte{protocol.CommandBye})
}
go func(c *connection.Context) {
time.Sleep(500 * time.Millisecond)
c.Close()
}(sc)
}
// Subscribe registers an EventHandler. The returned func removes it. // Subscribe registers an EventHandler. The returned func removes it.
// Multiple handlers are supported; each receives every event. // Multiple handlers are supported; each receives every event.
func (h *Hub) Subscribe(eh EventHandler) (unsubscribe func()) { func (h *Hub) Subscribe(eh EventHandler) (unsubscribe func()) {
@@ -397,6 +466,10 @@ func (h *Hub) Unregister(id string) {
// ListDevices returns a fresh snapshot slice. The caller may mutate it freely; // ListDevices returns a fresh snapshot slice. The caller may mutate it freely;
// it shares no state with the hub. // it shares no state with the hub.
//
// Sort by ConnectedAt asc (ID as tiebreaker) so the order stays stable across
// REST polls and WS pushes — Go's map iteration is intentionally randomized,
// which would otherwise reshuffle the UI list on every refresh.
func (h *Hub) ListDevices() []DeviceInfo { func (h *Hub) ListDevices() []DeviceInfo {
h.mu.RLock() h.mu.RLock()
defer h.mu.RUnlock() defer h.mu.RUnlock()
@@ -404,6 +477,12 @@ func (h *Hub) ListDevices() []DeviceInfo {
for _, d := range h.devices { for _, d := range h.devices {
out = append(out, deviceToInfo(d)) out = append(out, deviceToInfo(d))
} }
sort.Slice(out, func(i, j int) bool {
if out[i].ConnectedAt != out[j].ConnectedAt {
return out[i].ConnectedAt < out[j].ConnectedAt
}
return out[i].ID < out[j].ID
})
return out return out
} }
@@ -460,6 +539,9 @@ func (h *Hub) PublishCursor(deviceID string, index byte) {
// the disconnect callback would see Active=true with stale dimensions/IDR // the disconnect callback would see Active=true with stale dimensions/IDR
// and skip the COMMAND_SCREEN_SPY kick, leaving the page stuck on a "connected" // and skip the COMMAND_SCREEN_SPY kick, leaving the page stuck on a "connected"
// status with no frames ever arriving. // status with no frames ever arriving.
//
// The actual sub-conn teardown is delegated to retireScreenConn, which is
// shared with BindScreenConn's replacement path.
func (h *Hub) CloseScreen(deviceID string) { func (h *Hub) CloseScreen(deviceID string) {
h.mu.Lock() h.mu.Lock()
d, ok := h.devices[deviceID] d, ok := h.devices[deviceID]
@@ -473,52 +555,7 @@ func (h *Hub) CloseScreen(deviceID string) {
d.screenHeight = 0 d.screenHeight = 0
d.lastKeyframe = nil d.lastKeyframe = nil
h.mu.Unlock() h.mu.Unlock()
if sc != nil { h.retireScreenConn(sc)
// Drop the screenIndex entry SYNCHRONOUSLY so any in-flight frames
// still draining out of the device on this sub-conn (between our
// FIN and the device's clean-up) are silently dropped instead of
// being relayed to the freshly initialized browser decoder. Mixing
// frames from the old x264 SPS/PPS sequence with the new session's
// decoder produces the classic "every other quick reconnect goes
// black" symptom — old NAL units come in via the old ctx after we
// nulled d.screenConn but before OnDisconnect fires.
h.screenIndexMu.Lock()
delete(h.screenIndex, sc)
h.screenIndexMu.Unlock()
// Tell the client to shut its screen pipeline down gracefully.
// Without this, the client's IOCPClient sees recv()==0 as a network
// blip and fires m_ReconnectFunc, which:
// 1. Reconnects the sub-conn (~100 ms)
// 2. Re-sends ConnAuthPacket (no BITMAPINFO!)
// 3. Keeps the capture thread alive for ~10 s holding DXGI handles
// 4. ConnAuth eventually times out, ScreenManager exits
// Net effect: a second viewer arriving within ~10 s of leaving lands
// in the dead window where the device is still capturing for the old
// (now unrouted) sub-conn — page sits on "Waiting for video".
//
// COMMAND_BYE is what the C++ server sends via
// CDialogBase::SayByeBye (server/2015Remote/IOCPServer.h:248) before
// it tears down a sub-conn for the same reason. Client-side handler:
// CScreenManager::OnReceive case COMMAND_BYE
// (client/ScreenManager.cpp:812) sets m_bIsWorking=FALSE and calls
// StopRunning() — the clean exit path that does NOT trigger reconnect.
if h.sender != nil {
_ = h.sender(sc, []byte{protocol.CommandBye})
}
// Mirror the C++ flow (ScreenSpyDlg.cpp:842 — Sleep(500); CancelIO()).
// Give the device's read loop a moment to pull COMMAND_BYE off the
// wire before our FIN arrives; otherwise on a fast LAN the BYE byte
// can be coalesced with the FIN and the client's IOCPClient may
// observe recv()==0 first and trigger reconnect anyway.
// Run the close on a goroutine so the caller (web handler) isn't
// blocked for 500 ms. screenIndex is already cleared above, so
// in-flight frames during the grace window are silently dropped.
go func(c *connection.Context) {
time.Sleep(500 * time.Millisecond)
c.Close()
}(sc)
}
} }
// PublishResolution announces a new (or first-ever) screen geometry for a // PublishResolution announces a new (or first-ever) screen geometry for a

View File

@@ -109,6 +109,7 @@ const (
CommandNext byte = 30 // COMMAND_NEXT - "control-side dialog is open, you may stream" CommandNext byte = 30 // COMMAND_NEXT - "control-side dialog is open, you may stream"
CommandShell byte = 40 // COMMAND_SHELL - ask device to open a shell sub-connection CommandShell byte = 40 // COMMAND_SHELL - ask device to open a shell sub-connection
CommandTerminalRsize byte = 81 // CMD_TERMINAL_RESIZE - [cmd:1][cols:2 LE][rows:2 LE] CommandTerminalRsize byte = 81 // CMD_TERMINAL_RESIZE - [cmd:1][cols:2 LE][rows:2 LE]
CmdRestoreConsole byte = 82 // CMD_RESTORE_CONSOLE - RDP session "归位": switch back to the console session and restart capture
CommandBye byte = 204 // COMMAND_BYE - disconnect CommandBye byte = 204 // COMMAND_BYE - disconnect
CommandHeartbeat byte = 216 // CMD_HEARTBEAT_ACK CommandHeartbeat byte = 216 // CMD_HEARTBEAT_ACK
@@ -382,7 +383,22 @@ type LoginInfo struct {
Reserved string // Contains additional info separated by | Reserved string // Contains additional info separated by |
} }
// ParseLoginInfo parses LOGIN_INFOR from data // ParseLoginInfo parses LOGIN_INFOR from data.
//
// Encoding: text fields are GBK on legacy Windows clients and UTF-8 on modern
// clients that set CLIENT_CAP_UTF8 (always on for LNX / MAC). Picking the
// wrong codec mangles non-ASCII characters — e.g. a German location string
// "Nürnberg" sent as UTF-8 (4E C3 BC 72 ...) and force-decoded as GBK turns
// into mojibake. The heartbeat path already honors this via DecodeClientString
// (see cmd/main.go handleHeartbeat); ParseLoginInfo previously did not, so
// every login string from a UTF-8 client was being misread.
//
// To get encoding right we have a chicken-and-egg problem: capability lives
// in ModuleVersion (offset 164) and clientType lives in Reserved field 0
// (offset 476) — but Reserved itself needs that information to decode. Both
// "discriminator" values are pure ASCII (hex digits, "Windows"/"LNX"/"MAC"),
// so we can extract them with a UTF-8 read and then re-decode the actual
// user-text fields with the correct codec.
func ParseLoginInfo(data []byte) (*LoginInfo, error) { func ParseLoginInfo(data []byte) (*LoginInfo, error) {
if len(data) < 100 { // Minimum size check if len(data) < 100 { // Minimum size check
return nil, ErrInvalidData return nil, ErrInvalidData
@@ -392,64 +408,61 @@ func ParseLoginInfo(data []byte) (*LoginInfo, error) {
Token: data[0], Token: data[0],
} }
// Parse OS version info (offset 1, 156 bytes) // CPU MHz, WebCam, Speed — fixed-width binary, encoding-independent.
// The C++ client fills this with a readable string like "Windows 10" via getSystemName()
if len(data) >= OffsetOsVerInfoEx+156 {
info.OsVerInfo = parseOsVersionInfo(data[OffsetOsVerInfoEx : OffsetOsVerInfoEx+156])
}
// Parse CPU MHz (offset 160, 4 bytes)
if len(data) >= OffsetCPUMHz+4 { if len(data) >= OffsetCPUMHz+4 {
info.CPUMHz = binary.LittleEndian.Uint32(data[OffsetCPUMHz:]) info.CPUMHz = binary.LittleEndian.Uint32(data[OffsetCPUMHz:])
} }
// Parse module version (offset 164, 24 bytes)
// This contains date string like "Dec 19 2025"
if len(data) >= OffsetModuleVersion+24 {
info.ModuleVersion = GbkToUTF8(data[OffsetModuleVersion : OffsetModuleVersion+24])
}
// Parse PC name (offset 188, 240 bytes)
if len(data) >= OffsetPCName+240 {
info.PCName = GbkToUTF8(data[OffsetPCName : OffsetPCName+240])
}
// Parse Master ID (offset 428, 20 bytes)
if len(data) >= OffsetMasterID+20 {
info.MasterID = GbkToUTF8(data[OffsetMasterID : OffsetMasterID+20])
}
// Parse WebCam exist (offset 448, 4 bytes)
if len(data) >= OffsetWebCamExist+4 { if len(data) >= OffsetWebCamExist+4 {
info.WebCamExist = binary.LittleEndian.Uint32(data[OffsetWebCamExist:]) != 0 info.WebCamExist = binary.LittleEndian.Uint32(data[OffsetWebCamExist:]) != 0
} }
// Parse Speed (offset 452, 4 bytes)
if len(data) >= OffsetSpeed+4 { if len(data) >= OffsetSpeed+4 {
info.Speed = binary.LittleEndian.Uint32(data[OffsetSpeed:]) info.Speed = binary.LittleEndian.Uint32(data[OffsetSpeed:])
} }
// Parse Start time (offset 456, 20 bytes) // ModuleVersion is "version-capabilityHex" — pure ASCII (e.g. "Dec 19
if len(data) >= OffsetStartTime+20 { // 2025-0006"). Safe to read as UTF-8 regardless of client codec.
info.StartTime = GbkToUTF8(data[OffsetStartTime : OffsetStartTime+20]) if len(data) >= OffsetModuleVersion+24 {
info.ModuleVersion = Utf8CleanString(data[OffsetModuleVersion : OffsetModuleVersion+24])
}
_, capability, _ := strings.Cut(info.ModuleVersion, "-")
// Peek at Reserved field 0 (RES_CLIENT_TYPE: "Windows" / "LNX" / "MAC")
// — pure ASCII, so we can read raw bytes without knowing the codec.
// LNX / MAC clients are implicitly UTF-8 even when capability is absent.
clientType := ""
if len(data) > OffsetReserved {
raw := data[OffsetReserved:min(OffsetReserved+512, len(data))]
if nul := bytes.IndexByte(raw, 0); nul >= 0 {
raw = raw[:nul]
}
head, _, _ := bytes.Cut(raw, []byte("|"))
clientType = string(head)
} }
// Parse Reserved (offset 476, 512 bytes) - contains additional info // Now decode every user-text field with the client's actual codec.
decode := func(b []byte) string { return DecodeClientString(b, capability, clientType) }
if len(data) >= OffsetOsVerInfoEx+156 {
info.OsVerInfo = decode(data[OffsetOsVerInfoEx : OffsetOsVerInfoEx+156])
}
if len(data) >= OffsetPCName+240 {
info.PCName = decode(data[OffsetPCName : OffsetPCName+240])
}
if len(data) >= OffsetMasterID+20 {
info.MasterID = decode(data[OffsetMasterID : OffsetMasterID+20])
}
if len(data) >= OffsetStartTime+20 {
info.StartTime = decode(data[OffsetStartTime : OffsetStartTime+20])
}
if len(data) >= OffsetReserved+512 { if len(data) >= OffsetReserved+512 {
info.Reserved = GbkToUTF8(data[OffsetReserved : OffsetReserved+512]) info.Reserved = decode(data[OffsetReserved : OffsetReserved+512])
} else if len(data) > OffsetReserved { } else if len(data) > OffsetReserved {
info.Reserved = GbkToUTF8(data[OffsetReserved:]) info.Reserved = decode(data[OffsetReserved:])
} }
return info, nil return info, nil
} }
// parseOsVersionInfo parses the OS version info field
// The C++ client fills this with a readable string like "Windows 10" via getSystemName()
func parseOsVersionInfo(data []byte) string {
return GbkToUTF8(data)
}
// ParseReserved parses the reserved field into a slice of strings // ParseReserved parses the reserved field into a slice of strings
func (info *LoginInfo) ParseReserved() []string { func (info *LoginInfo) ParseReserved() []string {
if info.Reserved == "" { if info.Reserved == "" {

View File

@@ -33,7 +33,7 @@ func (h *wsHub) dispatch(c *wsClient, cmd string, raw []byte) {
case "connect": case "connect":
h.handleConnect(c, raw) h.handleConnect(c, raw)
case "rdp_reset": case "rdp_reset":
// silently ignored — UI uses this as a fire-and-forget h.handleRdpReset(c, raw)
case "mouse": case "mouse":
h.handleMouse(c, raw) h.handleMouse(c, raw)
case "key": case "key":
@@ -311,6 +311,36 @@ func (h *wsHub) handleConnect(c *wsClient, raw []byte) {
c.queue(mustJSON(map[string]any{"cmd": "connect_result", "ok": true})) c.queue(mustJSON(map[string]any{"cmd": "connect_result", "ok": true}))
} }
// handleRdpReset asks the device to switch its screen capture back to the
// physical console session ("RDP 会话归位"). Useful when someone has RDP'd into
// the box: the device's screen thread is by default attached to whatever WTS
// session is currently active, so the operator may otherwise see a login
// screen or a different user's desktop instead of the local console.
//
// Fire-and-forget on purpose, matching the C++ server and the browser UI —
// front-end ignores any reply, so we don't send one. Failures (device offline,
// no active screen session, browser hasn't called `connect` yet) are warn-
// logged server-side only.
func (h *wsHub) handleRdpReset(c *wsClient, raw []byte) {
if !h.requireAuth(c, raw, "rdp_reset_result") {
return
}
deviceID := c.watching
if deviceID == "" {
h.log.Warn("rdp_reset: no device watched (addr=%s role=%s)", c.addr, c.role)
return
}
// CMD_RESTORE_CONSOLE must go through the screen sub-conn — the client
// dispatches it from CScreenManager::OnReceive, which only reads from
// the screen sub-conn (see client/ScreenManager.cpp:996). Sending on the
// main conn would silently no-op.
if err := h.devices.SendToScreen(deviceID, []byte{protocol.CmdRestoreConsole}); err != nil {
h.log.Warn("rdp_reset: device=%s: %v", deviceID, err)
return
}
h.log.Info("rdp_reset sent: device=%s", deviceID)
}
func (h *wsHub) handleGetDevices(c *wsClient, raw []byte) { func (h *wsHub) handleGetDevices(c *wsClient, raw []byte) {
if !h.requireAuth(c, raw, "device_list") { if !h.requireAuth(c, raw, "device_list") {
return return