Fix(Go): stable device list ordering + RDP-reset handler

Fix UTF-8 login text decode + stale screen sub-conn retirement
This commit is contained in:
yuanyuanxiang
2026-05-19 16:28:01 +02:00
parent 5af017bf09
commit d757c33bcb
3 changed files with 167 additions and 87 deletions

View File

@@ -10,6 +10,7 @@ package hub
import (
"errors"
"sort"
"sync"
"time"
@@ -280,6 +281,14 @@ func (h *Hub) SendToScreen(id string, data []byte) error {
// just sent TOKEN_BITMAPINFO) with the device identified by clientID.
// Returns false if the device is not registered — callers should drop the
// orphan connection in that case.
//
// If the device already has a screen sub-conn bound (typically because the
// client's monitor-poll logic opened a new one without the previous viewer
// going through CloseScreen — e.g. multiple open/close cycles where each
// cycle picks a different display), the previous sub-conn is retired via
// retireScreenConn. Without this, both sub-conns keep streaming under the
// same device ID and the browser sees H.264 frames from two encoders
// interleaved, rendering as a picture that jumps between monitors.
func (h *Hub) BindScreenConn(deviceID string, ctx *connection.Context) bool {
if deviceID == "" || ctx == nil {
return false
@@ -290,12 +299,29 @@ func (h *Hub) BindScreenConn(deviceID string, ctx *connection.Context) bool {
h.mu.Unlock()
return false
}
old := d.screenConn
d.screenConn = ctx
// The cached resolution and last IDR belong to the old encoder's
// stream. A viewer joining now must wait for the new sub-conn's
// TOKEN_BITMAPINFO + first IDR; serving the old monitor's keyframe
// to a decoder that's about to receive a different SPS/PPS would
// produce the same mixed-stream corruption retireScreenConn exists
// to prevent.
replacing := old != nil && old != ctx
if replacing {
d.screenWidth = 0
d.screenHeight = 0
d.lastKeyframe = nil
}
h.mu.Unlock()
h.screenIndexMu.Lock()
h.screenIndex[ctx] = deviceID
h.screenIndexMu.Unlock()
if replacing {
h.retireScreenConn(old)
}
return true
}
@@ -332,6 +358,49 @@ func (h *Hub) UnbindScreenConn(ctx *connection.Context) {
h.mu.Unlock()
}
// retireScreenConn tears down a screen sub-conn that's being replaced or
// closed. Shared by CloseScreen (last viewer left) and BindScreenConn (device
// opened a new sub-conn that's superseding this one, e.g. when the client's
// monitor-poll logic switches to a different display).
//
// Steps and ordering matter:
//
// 1. screenIndex entry is dropped FIRST so any in-flight frames still in the
// device's TCP send buffer arrive at handleScreenFrame with deviceID=""
// and are silently dropped. Without this they'd be relayed under the
// same deviceID as the new sub-conn — that's the "frames from two
// monitors interleaved in one stream" symptom: the browser decoder sees
// a mix of two independent x264 SPS/PPS sequences and renders an
// alternating / glitchy picture.
//
// 2. COMMAND_BYE is sent so the C++ client's IOCPClient exits via the
// clean StopRunning() path. Without it the client treats FIN as a
// network blip and fires m_ReconnectFunc, which opens a fresh sub-conn
// that never sends BITMAPINFO again, keeps the encoder thread alive,
// and ultimately wedges the device for ~10 s — see the original
// CloseScreen comment for the full failure mode.
//
// 3. The actual TCP Close happens 500 ms later on a goroutine, mirroring
// the C++ ScreenSpyDlg.cpp:842 (Sleep(500); CancelIO()) sequence so
// COMMAND_BYE has time to reach the device read loop before FIN does.
//
// No-op for a nil sc so callers can pass d.screenConn unconditionally.
func (h *Hub) retireScreenConn(sc *connection.Context) {
if sc == nil {
return
}
h.screenIndexMu.Lock()
delete(h.screenIndex, sc)
h.screenIndexMu.Unlock()
if h.sender != nil {
_ = h.sender(sc, []byte{protocol.CommandBye})
}
go func(c *connection.Context) {
time.Sleep(500 * time.Millisecond)
c.Close()
}(sc)
}
// Subscribe registers an EventHandler. The returned func removes it.
// Multiple handlers are supported; each receives every event.
func (h *Hub) Subscribe(eh EventHandler) (unsubscribe func()) {
@@ -397,6 +466,10 @@ func (h *Hub) Unregister(id string) {
// ListDevices returns a fresh snapshot slice. The caller may mutate it freely;
// it shares no state with the hub.
//
// Sort by ConnectedAt asc (ID as tiebreaker) so the order stays stable across
// REST polls and WS pushes — Go's map iteration is intentionally randomized,
// which would otherwise reshuffle the UI list on every refresh.
func (h *Hub) ListDevices() []DeviceInfo {
h.mu.RLock()
defer h.mu.RUnlock()
@@ -404,6 +477,12 @@ func (h *Hub) ListDevices() []DeviceInfo {
for _, d := range h.devices {
out = append(out, deviceToInfo(d))
}
sort.Slice(out, func(i, j int) bool {
if out[i].ConnectedAt != out[j].ConnectedAt {
return out[i].ConnectedAt < out[j].ConnectedAt
}
return out[i].ID < out[j].ID
})
return out
}
@@ -460,6 +539,9 @@ func (h *Hub) PublishCursor(deviceID string, index byte) {
// the disconnect callback would see Active=true with stale dimensions/IDR
// and skip the COMMAND_SCREEN_SPY kick, leaving the page stuck on a "connected"
// status with no frames ever arriving.
//
// The actual sub-conn teardown is delegated to retireScreenConn, which is
// shared with BindScreenConn's replacement path.
func (h *Hub) CloseScreen(deviceID string) {
h.mu.Lock()
d, ok := h.devices[deviceID]
@@ -473,52 +555,7 @@ func (h *Hub) CloseScreen(deviceID string) {
d.screenHeight = 0
d.lastKeyframe = nil
h.mu.Unlock()
if sc != nil {
// Drop the screenIndex entry SYNCHRONOUSLY so any in-flight frames
// still draining out of the device on this sub-conn (between our
// FIN and the device's clean-up) are silently dropped instead of
// being relayed to the freshly initialized browser decoder. Mixing
// frames from the old x264 SPS/PPS sequence with the new session's
// decoder produces the classic "every other quick reconnect goes
// black" symptom — old NAL units come in via the old ctx after we
// nulled d.screenConn but before OnDisconnect fires.
h.screenIndexMu.Lock()
delete(h.screenIndex, sc)
h.screenIndexMu.Unlock()
// Tell the client to shut its screen pipeline down gracefully.
// Without this, the client's IOCPClient sees recv()==0 as a network
// blip and fires m_ReconnectFunc, which:
// 1. Reconnects the sub-conn (~100 ms)
// 2. Re-sends ConnAuthPacket (no BITMAPINFO!)
// 3. Keeps the capture thread alive for ~10 s holding DXGI handles
// 4. ConnAuth eventually times out, ScreenManager exits
// Net effect: a second viewer arriving within ~10 s of leaving lands
// in the dead window where the device is still capturing for the old
// (now unrouted) sub-conn — page sits on "Waiting for video".
//
// COMMAND_BYE is what the C++ server sends via
// CDialogBase::SayByeBye (server/2015Remote/IOCPServer.h:248) before
// it tears down a sub-conn for the same reason. Client-side handler:
// CScreenManager::OnReceive case COMMAND_BYE
// (client/ScreenManager.cpp:812) sets m_bIsWorking=FALSE and calls
// StopRunning() — the clean exit path that does NOT trigger reconnect.
if h.sender != nil {
_ = h.sender(sc, []byte{protocol.CommandBye})
}
// Mirror the C++ flow (ScreenSpyDlg.cpp:842 — Sleep(500); CancelIO()).
// Give the device's read loop a moment to pull COMMAND_BYE off the
// wire before our FIN arrives; otherwise on a fast LAN the BYE byte
// can be coalesced with the FIN and the client's IOCPClient may
// observe recv()==0 first and trigger reconnect anyway.
// Run the close on a goroutine so the caller (web handler) isn't
// blocked for 500 ms. screenIndex is already cleared above, so
// in-flight frames during the grace window are silently dropped.
go func(c *connection.Context) {
time.Sleep(500 * time.Millisecond)
c.Close()
}(sc)
}
h.retireScreenConn(sc)
}
// PublishResolution announces a new (or first-ever) screen geometry for a