Feature(Go): Screen frame relay end-to-end with graceful client BYE (Phase 4)

This commit is contained in:
yuanyuanxiang
2026-05-18 01:00:56 +02:00
committed by yuanyuanxiang
parent 4ea6ed252c
commit fba4143dd1
10 changed files with 999 additions and 74 deletions

View File

@@ -2,6 +2,9 @@ package web
import (
"encoding/json"
"github.com/yuanyuanxiang/SimpleRemoter/server/go/hub"
"github.com/yuanyuanxiang/SimpleRemoter/server/go/protocol"
)
// dispatch routes one inbound message to its handler. The `raw` payload is
@@ -21,12 +24,10 @@ func (h *wsHub) dispatch(c *wsClient, cmd string, raw []byte) {
case "ping":
// no-op heartbeat; the read itself was the keep-alive signal
case "disconnect":
c.queue([]byte(`{"cmd":"disconnect_result","ok":true}`))
h.handleDisconnect(c, raw)
// Reserved for later phases. Reply with a benign failure so the UI can
// surface a clear error instead of spinning indefinitely.
case "connect":
h.replyNotImplemented(c, "connect_result", "Screen sharing not yet implemented on Go server")
h.handleConnect(c, raw)
case "rdp_reset":
// silently ignored — UI uses this as a fire-and-forget
case "mouse", "key":
@@ -116,6 +117,125 @@ func (h *wsHub) handleLogin(c *wsClient, raw []byte) {
}))
}
// handleConnect kicks off a screen-sharing session for the browser. We send
// COMMAND_SCREEN_SPY to the device's main TCP connection; the device then
// opens a new sub-connection (TOKEN_BITMAPINFO) which the TCP side binds to
// the device via hub.BindScreenConn. Frame relay to the browser is handled
// in Phase 4.2 once frames actually arrive.
//
// Reply semantics: returning connect_result.ok=true (without width/height)
// triggers the browser's "Waiting for video..." spinner. We can't deliver
// width/height here because we don't yet know them — they show up in the
// first TOKEN_BITMAPINFO from the device.
// handleDisconnect detaches this client from any device it was watching and
// — if no other authenticated client is still watching — closes the device's
// screen sub-connection. Closing the TCP sub-conn is the signal the C++
// device firmware uses to stop screen capture, so this is how we ask the
// device to free its encoder.
func (h *wsHub) handleDisconnect(c *wsClient, _ []byte) {
// Mirror handleConnect: take h.mu so event-handler readers
// (OnResolutionChange/OnScreenFrame) get a consistent view of c.watching.
h.mu.Lock()
prev := c.watching
c.watching = ""
h.mu.Unlock()
c.queue([]byte(`{"cmd":"disconnect_result","ok":true}`))
if prev != "" && h.countWatchers(prev) == 0 {
h.devices.CloseScreen(prev)
}
}
// countWatchers returns how many authenticated clients still have their
// `watching` field pointing at deviceID. Called from disconnect paths.
func (h *wsHub) countWatchers(deviceID string) int {
h.mu.RLock()
defer h.mu.RUnlock()
n := 0
for c := range h.clients {
if c.watching == deviceID {
n++
}
}
return n
}
func (h *wsHub) handleConnect(c *wsClient, raw []byte) {
if !h.requireAuth(c, raw, "connect_result") {
return
}
var in struct {
ID string `json:"id"`
}
if err := json.Unmarshal(raw, &in); err != nil || in.ID == "" {
c.queue(mustJSON(map[string]any{"cmd": "connect_result", "ok": false, "msg": "Bad request"}))
return
}
// If a screen session is already live for this device (another browser
// is already watching), reuse it: hand the new viewer the current
// resolution and the most recent IDR keyframe so its decoder can start
// rendering immediately, without waiting for the next IDR (~15 s).
cache := h.devices.ScreenState(in.ID)
if cache.Active {
c.queue(mustJSON(map[string]any{
"cmd": "connect_result", "ok": true,
"width": cache.Width, "height": cache.Height,
}))
if len(cache.Keyframe) > 0 {
c.queueBinary(cache.Keyframe)
}
h.mu.Lock()
c.watching = in.ID
h.mu.Unlock()
return
}
// No active session — kick the device to start capturing. We send the
// same 32-byte COMMAND_SCREEN_SPY payload the C++ WebService sends:
// [0]=COMMAND_SCREEN_SPY, [1]=0 (GDI), [2]=ALGORITHM_H264, [3]=1 (multi-screen),
// [4..31]=0.
cmd := make([]byte, 32)
cmd[0] = protocol.CommandScreenSpy
cmd[2] = protocol.AlgorithmH264
cmd[3] = 1
// CRITICAL: bind c.watching BEFORE asking the device to start capturing.
// On fast reconnects the device's screen sub-conn handshake completes in
// <100 ms, so TOKEN_BITMAPINFO and even the first H264 frame can arrive
// before this handler finishes — and the resolution_changed / frame
// broadcasts in wsHub filter on c.watching. With the assignment after
// SendToDevice the new viewer silently misses the very first IDR and
// resolution_changed, leaving the page stuck on "Waiting for video".
//
// The write needs to share the lock event handlers use to read c.watching
// (they iterate h.clients under h.mu.RLock). Without that the write is a
// data race; on a fast reconnect the reader goroutine can keep observing
// the previous value ("") long enough to drop the first resolution_changed
// and the first IDR, which produces the exact "every other quick reconnect
// goes black" symptom — the C++ server avoids it because it does the same
// state mutation under std::mutex and reaps the memory-barrier as a bonus.
h.mu.Lock()
c.watching = in.ID
h.mu.Unlock()
if err := h.devices.SendToDevice(in.ID, cmd); err != nil {
// Roll back the watching flag if we never managed to kick capture.
h.mu.Lock()
c.watching = ""
h.mu.Unlock()
msg := "Device offline"
if err != hub.ErrDeviceOffline {
msg = "Failed to start screen capture"
h.log.Error("SendToDevice(%s): %v", in.ID, err)
}
c.queue(mustJSON(map[string]any{"cmd": "connect_result", "ok": false, "msg": msg}))
return
}
h.log.Info("[timing] COMMAND_SCREEN_SPY sent to device=%s (cold start)", in.ID)
c.queue(mustJSON(map[string]any{"cmd": "connect_result", "ok": true}))
}
func (h *wsHub) handleGetDevices(c *wsClient, raw []byte) {
if !h.requireAuth(c, raw, "device_list") {
return