From d757c33bcb4840c08b2bc362ff068eaf5cce4c21 Mon Sep 17 00:00:00 2001 From: yuanyuanxiang <962914132@qq.com> Date: Tue, 19 May 2026 16:28:01 +0200 Subject: [PATCH] Fix(Go): stable device list ordering + RDP-reset handler Fix UTF-8 login text decode + stale screen sub-conn retirement --- server/go/hub/hub.go | 129 +++++++++++++++++++++------------ server/go/protocol/commands.go | 93 ++++++++++++++---------- server/go/web/ws_handlers.go | 32 +++++++- 3 files changed, 167 insertions(+), 87 deletions(-) diff --git a/server/go/hub/hub.go b/server/go/hub/hub.go index 9c25e7d..22c3e61 100644 --- a/server/go/hub/hub.go +++ b/server/go/hub/hub.go @@ -10,6 +10,7 @@ package hub import ( "errors" + "sort" "sync" "time" @@ -280,6 +281,14 @@ func (h *Hub) SendToScreen(id string, data []byte) error { // just sent TOKEN_BITMAPINFO) with the device identified by clientID. // Returns false if the device is not registered — callers should drop the // orphan connection in that case. +// +// If the device already has a screen sub-conn bound (typically because the +// client's monitor-poll logic opened a new one without the previous viewer +// going through CloseScreen — e.g. multiple open/close cycles where each +// cycle picks a different display), the previous sub-conn is retired via +// retireScreenConn. Without this, both sub-conns keep streaming under the +// same device ID and the browser sees H.264 frames from two encoders +// interleaved, rendering as a picture that jumps between monitors. func (h *Hub) BindScreenConn(deviceID string, ctx *connection.Context) bool { if deviceID == "" || ctx == nil { return false @@ -290,12 +299,29 @@ func (h *Hub) BindScreenConn(deviceID string, ctx *connection.Context) bool { h.mu.Unlock() return false } + old := d.screenConn d.screenConn = ctx + // The cached resolution and last IDR belong to the old encoder's + // stream. A viewer joining now must wait for the new sub-conn's + // TOKEN_BITMAPINFO + first IDR; serving the old monitor's keyframe + // to a decoder that's about to receive a different SPS/PPS would + // produce the same mixed-stream corruption retireScreenConn exists + // to prevent. + replacing := old != nil && old != ctx + if replacing { + d.screenWidth = 0 + d.screenHeight = 0 + d.lastKeyframe = nil + } h.mu.Unlock() h.screenIndexMu.Lock() h.screenIndex[ctx] = deviceID h.screenIndexMu.Unlock() + + if replacing { + h.retireScreenConn(old) + } return true } @@ -332,6 +358,49 @@ func (h *Hub) UnbindScreenConn(ctx *connection.Context) { h.mu.Unlock() } +// retireScreenConn tears down a screen sub-conn that's being replaced or +// closed. Shared by CloseScreen (last viewer left) and BindScreenConn (device +// opened a new sub-conn that's superseding this one, e.g. when the client's +// monitor-poll logic switches to a different display). +// +// Steps and ordering matter: +// +// 1. screenIndex entry is dropped FIRST so any in-flight frames still in the +// device's TCP send buffer arrive at handleScreenFrame with deviceID="" +// and are silently dropped. Without this they'd be relayed under the +// same deviceID as the new sub-conn — that's the "frames from two +// monitors interleaved in one stream" symptom: the browser decoder sees +// a mix of two independent x264 SPS/PPS sequences and renders an +// alternating / glitchy picture. +// +// 2. COMMAND_BYE is sent so the C++ client's IOCPClient exits via the +// clean StopRunning() path. Without it the client treats FIN as a +// network blip and fires m_ReconnectFunc, which opens a fresh sub-conn +// that never sends BITMAPINFO again, keeps the encoder thread alive, +// and ultimately wedges the device for ~10 s — see the original +// CloseScreen comment for the full failure mode. +// +// 3. The actual TCP Close happens 500 ms later on a goroutine, mirroring +// the C++ ScreenSpyDlg.cpp:842 (Sleep(500); CancelIO()) sequence so +// COMMAND_BYE has time to reach the device read loop before FIN does. +// +// No-op for a nil sc so callers can pass d.screenConn unconditionally. +func (h *Hub) retireScreenConn(sc *connection.Context) { + if sc == nil { + return + } + h.screenIndexMu.Lock() + delete(h.screenIndex, sc) + h.screenIndexMu.Unlock() + if h.sender != nil { + _ = h.sender(sc, []byte{protocol.CommandBye}) + } + go func(c *connection.Context) { + time.Sleep(500 * time.Millisecond) + c.Close() + }(sc) +} + // Subscribe registers an EventHandler. The returned func removes it. // Multiple handlers are supported; each receives every event. func (h *Hub) Subscribe(eh EventHandler) (unsubscribe func()) { @@ -397,6 +466,10 @@ func (h *Hub) Unregister(id string) { // ListDevices returns a fresh snapshot slice. The caller may mutate it freely; // it shares no state with the hub. +// +// Sort by ConnectedAt asc (ID as tiebreaker) so the order stays stable across +// REST polls and WS pushes — Go's map iteration is intentionally randomized, +// which would otherwise reshuffle the UI list on every refresh. func (h *Hub) ListDevices() []DeviceInfo { h.mu.RLock() defer h.mu.RUnlock() @@ -404,6 +477,12 @@ func (h *Hub) ListDevices() []DeviceInfo { for _, d := range h.devices { out = append(out, deviceToInfo(d)) } + sort.Slice(out, func(i, j int) bool { + if out[i].ConnectedAt != out[j].ConnectedAt { + return out[i].ConnectedAt < out[j].ConnectedAt + } + return out[i].ID < out[j].ID + }) return out } @@ -460,6 +539,9 @@ func (h *Hub) PublishCursor(deviceID string, index byte) { // the disconnect callback would see Active=true with stale dimensions/IDR // and skip the COMMAND_SCREEN_SPY kick, leaving the page stuck on a "connected" // status with no frames ever arriving. +// +// The actual sub-conn teardown is delegated to retireScreenConn, which is +// shared with BindScreenConn's replacement path. func (h *Hub) CloseScreen(deviceID string) { h.mu.Lock() d, ok := h.devices[deviceID] @@ -473,52 +555,7 @@ func (h *Hub) CloseScreen(deviceID string) { d.screenHeight = 0 d.lastKeyframe = nil h.mu.Unlock() - if sc != nil { - // Drop the screenIndex entry SYNCHRONOUSLY so any in-flight frames - // still draining out of the device on this sub-conn (between our - // FIN and the device's clean-up) are silently dropped instead of - // being relayed to the freshly initialized browser decoder. Mixing - // frames from the old x264 SPS/PPS sequence with the new session's - // decoder produces the classic "every other quick reconnect goes - // black" symptom — old NAL units come in via the old ctx after we - // nulled d.screenConn but before OnDisconnect fires. - h.screenIndexMu.Lock() - delete(h.screenIndex, sc) - h.screenIndexMu.Unlock() - - // Tell the client to shut its screen pipeline down gracefully. - // Without this, the client's IOCPClient sees recv()==0 as a network - // blip and fires m_ReconnectFunc, which: - // 1. Reconnects the sub-conn (~100 ms) - // 2. Re-sends ConnAuthPacket (no BITMAPINFO!) - // 3. Keeps the capture thread alive for ~10 s holding DXGI handles - // 4. ConnAuth eventually times out, ScreenManager exits - // Net effect: a second viewer arriving within ~10 s of leaving lands - // in the dead window where the device is still capturing for the old - // (now unrouted) sub-conn — page sits on "Waiting for video". - // - // COMMAND_BYE is what the C++ server sends via - // CDialogBase::SayByeBye (server/2015Remote/IOCPServer.h:248) before - // it tears down a sub-conn for the same reason. Client-side handler: - // CScreenManager::OnReceive case COMMAND_BYE - // (client/ScreenManager.cpp:812) sets m_bIsWorking=FALSE and calls - // StopRunning() — the clean exit path that does NOT trigger reconnect. - if h.sender != nil { - _ = h.sender(sc, []byte{protocol.CommandBye}) - } - // Mirror the C++ flow (ScreenSpyDlg.cpp:842 — Sleep(500); CancelIO()). - // Give the device's read loop a moment to pull COMMAND_BYE off the - // wire before our FIN arrives; otherwise on a fast LAN the BYE byte - // can be coalesced with the FIN and the client's IOCPClient may - // observe recv()==0 first and trigger reconnect anyway. - // Run the close on a goroutine so the caller (web handler) isn't - // blocked for 500 ms. screenIndex is already cleared above, so - // in-flight frames during the grace window are silently dropped. - go func(c *connection.Context) { - time.Sleep(500 * time.Millisecond) - c.Close() - }(sc) - } + h.retireScreenConn(sc) } // PublishResolution announces a new (or first-ever) screen geometry for a diff --git a/server/go/protocol/commands.go b/server/go/protocol/commands.go index 0ae95d9..a4f1eaf 100644 --- a/server/go/protocol/commands.go +++ b/server/go/protocol/commands.go @@ -109,6 +109,7 @@ const ( CommandNext byte = 30 // COMMAND_NEXT - "control-side dialog is open, you may stream" CommandShell byte = 40 // COMMAND_SHELL - ask device to open a shell sub-connection CommandTerminalRsize byte = 81 // CMD_TERMINAL_RESIZE - [cmd:1][cols:2 LE][rows:2 LE] + CmdRestoreConsole byte = 82 // CMD_RESTORE_CONSOLE - RDP session "归位": switch back to the console session and restart capture CommandBye byte = 204 // COMMAND_BYE - disconnect CommandHeartbeat byte = 216 // CMD_HEARTBEAT_ACK @@ -382,7 +383,22 @@ type LoginInfo struct { Reserved string // Contains additional info separated by | } -// ParseLoginInfo parses LOGIN_INFOR from data +// ParseLoginInfo parses LOGIN_INFOR from data. +// +// Encoding: text fields are GBK on legacy Windows clients and UTF-8 on modern +// clients that set CLIENT_CAP_UTF8 (always on for LNX / MAC). Picking the +// wrong codec mangles non-ASCII characters — e.g. a German location string +// "Nürnberg" sent as UTF-8 (4E C3 BC 72 ...) and force-decoded as GBK turns +// into mojibake. The heartbeat path already honors this via DecodeClientString +// (see cmd/main.go handleHeartbeat); ParseLoginInfo previously did not, so +// every login string from a UTF-8 client was being misread. +// +// To get encoding right we have a chicken-and-egg problem: capability lives +// in ModuleVersion (offset 164) and clientType lives in Reserved field 0 +// (offset 476) — but Reserved itself needs that information to decode. Both +// "discriminator" values are pure ASCII (hex digits, "Windows"/"LNX"/"MAC"), +// so we can extract them with a UTF-8 read and then re-decode the actual +// user-text fields with the correct codec. func ParseLoginInfo(data []byte) (*LoginInfo, error) { if len(data) < 100 { // Minimum size check return nil, ErrInvalidData @@ -392,64 +408,61 @@ func ParseLoginInfo(data []byte) (*LoginInfo, error) { Token: data[0], } - // Parse OS version info (offset 1, 156 bytes) - // The C++ client fills this with a readable string like "Windows 10" via getSystemName() - if len(data) >= OffsetOsVerInfoEx+156 { - info.OsVerInfo = parseOsVersionInfo(data[OffsetOsVerInfoEx : OffsetOsVerInfoEx+156]) - } - - // Parse CPU MHz (offset 160, 4 bytes) + // CPU MHz, WebCam, Speed — fixed-width binary, encoding-independent. if len(data) >= OffsetCPUMHz+4 { info.CPUMHz = binary.LittleEndian.Uint32(data[OffsetCPUMHz:]) } - - // Parse module version (offset 164, 24 bytes) - // This contains date string like "Dec 19 2025" - if len(data) >= OffsetModuleVersion+24 { - info.ModuleVersion = GbkToUTF8(data[OffsetModuleVersion : OffsetModuleVersion+24]) - } - - // Parse PC name (offset 188, 240 bytes) - if len(data) >= OffsetPCName+240 { - info.PCName = GbkToUTF8(data[OffsetPCName : OffsetPCName+240]) - } - - // Parse Master ID (offset 428, 20 bytes) - if len(data) >= OffsetMasterID+20 { - info.MasterID = GbkToUTF8(data[OffsetMasterID : OffsetMasterID+20]) - } - - // Parse WebCam exist (offset 448, 4 bytes) if len(data) >= OffsetWebCamExist+4 { info.WebCamExist = binary.LittleEndian.Uint32(data[OffsetWebCamExist:]) != 0 } - - // Parse Speed (offset 452, 4 bytes) if len(data) >= OffsetSpeed+4 { info.Speed = binary.LittleEndian.Uint32(data[OffsetSpeed:]) } - // Parse Start time (offset 456, 20 bytes) - if len(data) >= OffsetStartTime+20 { - info.StartTime = GbkToUTF8(data[OffsetStartTime : OffsetStartTime+20]) + // ModuleVersion is "version-capabilityHex" — pure ASCII (e.g. "Dec 19 + // 2025-0006"). Safe to read as UTF-8 regardless of client codec. + if len(data) >= OffsetModuleVersion+24 { + info.ModuleVersion = Utf8CleanString(data[OffsetModuleVersion : OffsetModuleVersion+24]) + } + _, capability, _ := strings.Cut(info.ModuleVersion, "-") + + // Peek at Reserved field 0 (RES_CLIENT_TYPE: "Windows" / "LNX" / "MAC") + // — pure ASCII, so we can read raw bytes without knowing the codec. + // LNX / MAC clients are implicitly UTF-8 even when capability is absent. + clientType := "" + if len(data) > OffsetReserved { + raw := data[OffsetReserved:min(OffsetReserved+512, len(data))] + if nul := bytes.IndexByte(raw, 0); nul >= 0 { + raw = raw[:nul] + } + head, _, _ := bytes.Cut(raw, []byte("|")) + clientType = string(head) } - // Parse Reserved (offset 476, 512 bytes) - contains additional info + // Now decode every user-text field with the client's actual codec. + decode := func(b []byte) string { return DecodeClientString(b, capability, clientType) } + + if len(data) >= OffsetOsVerInfoEx+156 { + info.OsVerInfo = decode(data[OffsetOsVerInfoEx : OffsetOsVerInfoEx+156]) + } + if len(data) >= OffsetPCName+240 { + info.PCName = decode(data[OffsetPCName : OffsetPCName+240]) + } + if len(data) >= OffsetMasterID+20 { + info.MasterID = decode(data[OffsetMasterID : OffsetMasterID+20]) + } + if len(data) >= OffsetStartTime+20 { + info.StartTime = decode(data[OffsetStartTime : OffsetStartTime+20]) + } if len(data) >= OffsetReserved+512 { - info.Reserved = GbkToUTF8(data[OffsetReserved : OffsetReserved+512]) + info.Reserved = decode(data[OffsetReserved : OffsetReserved+512]) } else if len(data) > OffsetReserved { - info.Reserved = GbkToUTF8(data[OffsetReserved:]) + info.Reserved = decode(data[OffsetReserved:]) } return info, nil } -// parseOsVersionInfo parses the OS version info field -// The C++ client fills this with a readable string like "Windows 10" via getSystemName() -func parseOsVersionInfo(data []byte) string { - return GbkToUTF8(data) -} - // ParseReserved parses the reserved field into a slice of strings func (info *LoginInfo) ParseReserved() []string { if info.Reserved == "" { diff --git a/server/go/web/ws_handlers.go b/server/go/web/ws_handlers.go index 8d9f3f7..8d69349 100644 --- a/server/go/web/ws_handlers.go +++ b/server/go/web/ws_handlers.go @@ -33,7 +33,7 @@ func (h *wsHub) dispatch(c *wsClient, cmd string, raw []byte) { case "connect": h.handleConnect(c, raw) case "rdp_reset": - // silently ignored — UI uses this as a fire-and-forget + h.handleRdpReset(c, raw) case "mouse": h.handleMouse(c, raw) case "key": @@ -311,6 +311,36 @@ func (h *wsHub) handleConnect(c *wsClient, raw []byte) { c.queue(mustJSON(map[string]any{"cmd": "connect_result", "ok": true})) } +// handleRdpReset asks the device to switch its screen capture back to the +// physical console session ("RDP 会话归位"). Useful when someone has RDP'd into +// the box: the device's screen thread is by default attached to whatever WTS +// session is currently active, so the operator may otherwise see a login +// screen or a different user's desktop instead of the local console. +// +// Fire-and-forget on purpose, matching the C++ server and the browser UI — +// front-end ignores any reply, so we don't send one. Failures (device offline, +// no active screen session, browser hasn't called `connect` yet) are warn- +// logged server-side only. +func (h *wsHub) handleRdpReset(c *wsClient, raw []byte) { + if !h.requireAuth(c, raw, "rdp_reset_result") { + return + } + deviceID := c.watching + if deviceID == "" { + h.log.Warn("rdp_reset: no device watched (addr=%s role=%s)", c.addr, c.role) + return + } + // CMD_RESTORE_CONSOLE must go through the screen sub-conn — the client + // dispatches it from CScreenManager::OnReceive, which only reads from + // the screen sub-conn (see client/ScreenManager.cpp:996). Sending on the + // main conn would silently no-op. + if err := h.devices.SendToScreen(deviceID, []byte{protocol.CmdRestoreConsole}); err != nil { + h.log.Warn("rdp_reset: device=%s: %v", deviceID, err) + return + } + h.log.Info("rdp_reset sent: device=%s", deviceID) +} + func (h *wsHub) handleGetDevices(c *wsClient, raw []byte) { if !h.requireAuth(c, raw, "device_list") { return