From d7f38ecfdb2eb7403828dbaff9685fc208c42c70 Mon Sep 17 00:00:00 2001 From: yuanyuanxiang <962914132@qq.com> Date: Mon, 18 May 2026 15:03:42 +0200 Subject: [PATCH] Feature(Go): Web terminal relay with PTY mode and graceful close (Phase 6) --- server/go/README.md | 133 +++++++++-------- server/go/cmd/main.go | 124 +++++++++++++++- server/go/hub/hub.go | 254 ++++++++++++++++++++++++++++++++- server/go/hub/hub_test.go | 6 + server/go/protocol/commands.go | 52 +++++-- server/go/web/ws.go | 73 +++++++++- server/go/web/ws_handlers.go | 150 +++++++++++++++++-- 7 files changed, 696 insertions(+), 96 deletions(-) diff --git a/server/go/README.md b/server/go/README.md index 27c6be1..dc94308 100644 --- a/server/go/README.md +++ b/server/go/README.md @@ -43,31 +43,59 @@ server/go/ ## 核心特性 +底层基础设施: + - **高并发**: 基于 Goroutine 池管理并发连接 -- **协议兼容**: 支持原有 C++ 客户端的多种协议标识 (Hell/Hello/Shine/Fuck) -- **协议头解密**: 支持8种协议头加密方式 (V0-V6 + Default) -- **授权验证**: 支持 TOKEN_AUTH 和 Heartbeat HMAC-SHA256 双重授权验证 -- **XOR编码**: 支持 XOREncoder16 数据编码/解码 -- **ZSTD 压缩**: 使用高效的 ZSTD 算法进行数据压缩 -- **GBK编码**: 自动将 Windows 客户端的 GBK 编码转换为 UTF-8 -- **线程安全**: Buffer、连接管理器和 LastActive 均为线程安全设计 -- **优雅关闭**: 支持信号处理和优雅停机,自动释放资源 -- **可配置**: 支持自定义端口、最大连接数、超时时间等 -- **日志系统**: 基于 zerolog,支持文件输出、日志轮转、客户端上下线记录 -- **Web UI 服务**: 内建 HTTP server,编译期 `//go:embed` 嵌入页面和静态资源,免外部文件依赖 +- **协议兼容**: 支持原有客户端的多种协议标识 (Hell/Hello/Shine/Fuck) +- **协议头解密**: 支持 8 种协议头加密方式 (V0-V6 + Default) +- **授权验证**: TOKEN_AUTH 和 Heartbeat HMAC-SHA256 双重授权 +- **XOR 编码 / ZSTD 压缩**: 与客户端完全兼容 +- **字符编码自适应**: 根据客户端能力位选择 UTF-8 直通或 GBK→UTF-8 转换 +- **线程安全 / 优雅关闭 / 多端口监听 / 结构化日志** + +Web 应用能力 (Phase 3-7): + +- **Web 鉴权**: challenge-response 登录 + 不透明 token,与 users.json schema 互通 +- **设备列表与监控**: 在线设备 / RTT / 活动窗口 / 分辨率 实时下发 +- **Web 远程桌面**: 浏览器 WebCodecs 解码 H.264,二进制 WS 帧低延迟中继;late-join 自动重发最近 IDR;优雅 BYE 关闭防止客户端无意义重连 +- **鼠标 / 键盘输入**: Win32 消息映射 (`WM_*` / `VK_*` / `MK_*`),MSG64 48 字节布局直传客户端 +- **Web 终端**: xterm.js + Windows ConPTY / 旧 cmd 管道双模式;二进制 "TRM1" 帧分流;尺寸自适应;单设备单 viewer +- **用户与分组**: admin 可创建/删除 viewer 账号、配置 allowed_groups,users.json 原子写入 ## 支持的命令 -当前已实现以下命令处理: +### 客户端 → 服务端 -| 命令 | 值 | 说明 | -|------|-----|------| -| TOKEN_AUTH | 100 | 授权请求 (验证 SN + Passcode + HMAC) | -| TOKEN_HEARTBEAT | 101 | 心跳包 (支持 HMAC 授权验证,返回 Authorized 状态) | -| TOKEN_LOGIN | 102 | 客户端登录 | -| CMD_HEARTBEAT_ACK | 216 | 心跳响应 (包含 Authorized 字段) | +| Token | 值 | 用途 | +| ---- | ---- | ---- | +| `TOKEN_AUTH` | 100 | 授权请求(SN + Passcode + HMAC) | +| `TOKEN_HEARTBEAT` | 101 | 心跳包(携带 ActiveWnd / Ping / SN) | +| `TOKEN_LOGIN` | 102 | 主连接登录 | +| `TOKEN_BITMAPINFO` | 115 | 屏幕子连接首包,含分辨率 + clientID | +| `TOKEN_FIRSTSCREEN` | 116 | 原始 BGRA 首帧(Go 侧丢弃) | +| `TOKEN_NEXTSCREEN` | 117 | H.264 屏幕帧 | +| `TOKEN_SHELL_START` | 128 | 旧 cmd-pipe 终端子连接首包 | +| `TOKEN_KEYFRAME` | 134 | GOP 关键帧(DEFAULT_GOP 无限大,实际未用) | +| `TOKEN_TERMINAL_START` | 232 | PTY 终端子连接首包 | +| `TOKEN_TERMINAL_CLOSE` | 233 | 终端关闭通知 | +| `TOKEN_CONN_AUTH` | 246 | 子连接身份握手,含 clientID | +| (raw bytes) | — | 终端 sub-conn 绑定后裸字节即 shell 输出 | -其他命令会被记录为 Debug 日志,可按需扩展。 +### 服务端 → 客户端 + +| Command | 值 | 用途 | +| ---- | ---- | ---- | +| `COMMAND_SCREEN_SPY` | 16 | 启动屏幕捕获 | +| `COMMAND_SCREEN_CONTROL` | 20 | 鼠标 / 键盘输入(MSG64 批次) | +| `COMMAND_NEXT` | 30 | 解除客户端读线程阻塞 | +| `COMMAND_SHELL` | 40 | 请求开启 shell 子连接 | +| `CMD_TERMINAL_RESIZE` | 81 | PTY 尺寸 (cols / rows int16 LE) | +| `COMMAND_BYE` | 204 | 优雅断开屏幕 / 终端 | +| `CMD_MASTERSETTING` | 215 | 主控配置 + HMAC 签名 (1000B) | +| `CMD_HEARTBEAT_ACK` | 216 | 心跳响应(携带 Authorized 字段) | +| `TOKEN_CONN_AUTH` | 246 | 子连接身份握手响应 (256B) | + +未列出的命令字节会被记录为 Debug 日志,按需扩展。 ## 快速开始 @@ -136,6 +164,10 @@ $env:YAMA_PWD="your_super_password" ## 使用示例 +完整的 TCP + Hub + Web 集成示例就是 [`cmd/main.go`](cmd/main.go),那是程序入口本身、也是最权威的范例 —— 包含 handler 装配、hub 注册、web HTTP/WS 服务、信号优雅关闭等。 + +如果只想用 TCP 框架做自定义服务端(不要 Web/Hub),最小示例如下: + ```go package main @@ -150,57 +182,32 @@ import ( "github.com/yuanyuanxiang/SimpleRemoter/server/go/server" ) -// 实现 Handler 接口 -type MyHandler struct { - log *logger.Logger -} - -func (h *MyHandler) OnConnect(ctx *connection.Context) { - h.log.ClientEvent("online", ctx.ID, ctx.GetPeerIP()) -} - -func (h *MyHandler) OnDisconnect(ctx *connection.Context) { - h.log.ClientEvent("offline", ctx.ID, ctx.GetPeerIP()) -} +type MyHandler struct{ log *logger.Logger } +func (h *MyHandler) OnConnect(ctx *connection.Context) {} +func (h *MyHandler) OnDisconnect(ctx *connection.Context) {} func (h *MyHandler) OnReceive(ctx *connection.Context, data []byte) { if len(data) == 0 { return } - cmd := data[0] - switch cmd { - case protocol.TokenLogin: + if data[0] == protocol.TokenLogin { info, _ := protocol.ParseLoginInfo(data) - h.log.Info("Client login: %s (%s)", info.PCName, info.OsVerInfo) - case protocol.TokenHeartbeat: - h.log.Debug("Heartbeat from client %d", ctx.ID) + h.log.Info("login: %s (%s)", info.PCName, info.OsVerInfo) } } func main() { - // 配置日志 (控制台 + 文件) - logCfg := logger.DefaultConfig() - logCfg.File = "logs/server.log" - log := logger.New(logCfg) - - // 配置服务器 - config := server.DefaultConfig() - config.Port = 6543 - - // 创建并启动服务器 - srv := server.New(config) + log := logger.New(logger.DefaultConfig()) + srv := server.New(server.DefaultConfig()) srv.SetLogger(log.WithPrefix("Server")) srv.SetHandler(&MyHandler{log: log}) - if err := srv.Start(); err != nil { - log.Fatal("启动失败: %v", err) + log.Fatal("start: %v", err) } - // 等待退出信号 - sigChan := make(chan os.Signal, 1) - signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) - <-sigChan - + sig := make(chan os.Signal, 1) + signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM) + <-sig srv.Stop() } ``` @@ -286,7 +293,7 @@ func main() { | bWebCamExist | 448 | 4 | 是否有摄像头 | | dwSpeed | 452 | 4 | 网速 | | szStartTime | 456 | 20 | 启动时间 | -| szReserved | 476 | 512 | 扩展字段 (用`|`分隔) | +| szReserved | 476 | 512 | 扩展字段(多字段以 `\|` 分隔) | ### Heartbeat 结构 @@ -410,15 +417,19 @@ publicIP := info.GetReservedField(11) // 公网 IP ## 与 C++ 版本对比 | 特性 | C++ (IOCP) | Go | -|------|------------|-----| +| ---- | ---- | ---- | | 并发模型 | IOCP + 线程池 | Goroutine 池 | -| 压缩算法 | ZSTD | ZSTD | | 跨平台 | Windows | 全平台 | | 内存管理 | 手动 | GC | | 代码复杂度 | 高 | 低 | -| 协议头解密 | 8种方式 | 8种方式 | -| XOR编码 | XOREncoder16 | XOREncoder16 | -| 字符编码 | GBK | GBK -> UTF-8 | +| 压缩 / XOR / 头加密 | 完整 8 套加密方式 + XOREncoder16 + ZSTD | 完全对齐 | +| 字符编码 | GBK | UTF-8 直通 / GBK→UTF-8 (按客户端能力位) | +| 设备列表与监控 | MFC 列表控件 | Web UI | +| Web 远程桌面 | 内嵌浏览器 + H.264 | 完全对齐(WebCodecs 解码) | +| 鼠标键盘转发 | 已实现 | 完全对齐 | +| Web 终端 | 内嵌 xterm.js + ConPTY | 完全对齐(含旧 cmd-pipe 兼容) | +| 用户 / 分组管理 | 已实现 | users.json schema 互通 | +| 文件传输 / 摄像头 / 录音 等 | 已实现 | 暂未实现(按需扩展) | ## 依赖 diff --git a/server/go/cmd/main.go b/server/go/cmd/main.go index cbda53d..56d602c 100644 --- a/server/go/cmd/main.go +++ b/server/go/cmd/main.go @@ -37,17 +37,33 @@ func (h *MyHandler) OnConnect(ctx *connection.Context) { // OnDisconnect is called when a client disconnects func (h *MyHandler) OnDisconnect(ctx *connection.Context) { - // Always clean up any screen sub-context mapping first — the connection - // may be a screen sub-conn (which has no ClientInfo) rather than a main - // login connection. UnbindScreenConn is a no-op if not tracked. + // Always clean up any sub-context mapping first — the connection may + // be a screen / terminal sub-conn rather than a main login connection. + // Both Unbind* calls are no-ops if not tracked. UnbindTerminalConn + // also fires OnTerminalClosed so the browser sees the session end on + // unexpected device-side drops. h.hub.UnbindScreenConn(ctx) + h.hub.UnbindTerminalConn(ctx) info := ctx.GetInfo() - if info.ClientID != "" { + // Only treat this disconnect as a device-going-offline event if this + // ctx is the device's MAIN login connection. Phase 6 added ClientID + // pinning to sub-conns (via ConnAuth — needed for terminal routing), + // so a non-empty ClientID alone no longer distinguishes main from + // sub. Closing a screen / terminal sub-conn must NOT remove the + // device from the hub. + if info.ClientID != "" && h.hub.MainConn(info.ClientID) == ctx { h.log.ClientEvent("offline", ctx.ID, ctx.GetPeerIP(), "clientID", info.ClientID, "computer", info.ComputerName, ) + // Tear down any active sub-conn sessions BEFORE Unregister so the + // browser sees screen/terminal close events alongside the + // device-offline event, instead of frames/output continuing to + // stream from orphaned sub-conn ctxs until they time out on + // their own. Both calls no-op if there's no active session. + h.hub.CloseScreen(info.ClientID) + h.hub.CloseTerminalSession(info.ClientID) h.hub.Unregister(info.ClientID) } } @@ -58,6 +74,27 @@ func (h *MyHandler) OnReceive(ctx *connection.Context, data []byte) { return } + // Terminal-bound sub-conns deliver RAW shell output with no leading + // command byte — see client/ConPTYManager.cpp:328 (Send2Server with + // just the buffer). We must short-circuit BEFORE the command switch + // or the first output byte will be misinterpreted as a token. + // Exception: a length-1 packet whose byte is TOKEN_TERMINAL_CLOSE + // is the device's "shell exited" notification, NOT data. + if devID := h.hub.TerminalDeviceID(ctx); devID != "" { + if len(data) == 1 && data[0] == protocol.TokenTerminalClose { + h.log.Info("terminal closed by device=%s conn=%d", devID, ctx.ID) + h.hub.CloseTerminalSession(devID) + return + } + // Wrap with the 'TRM1' magic the browser uses to demultiplex + // terminal output from screen frames over the shared WS. + packet := make([]byte, 4+len(data)) + copy(packet[:4], protocol.TerminalBinaryMagic[:]) + copy(packet[4:], data) + h.hub.PublishTerminalData(devID, packet) + return + } + cmd := data[0] // Handle commands switch cmd { @@ -71,6 +108,17 @@ func (h *MyHandler) OnReceive(ctx *connection.Context, data []byte) { h.handleConnAuth(ctx, data) case protocol.TokenBitmapInfo: h.handleBitmapInfo(ctx, data) + case protocol.TokenTerminalStart: + h.handleTerminalStart(ctx, true) + case protocol.TokenShellStart: + h.handleTerminalStart(ctx, false) + case protocol.TokenTerminalClose: + // Pre-bind close (rare — device gives up before the server + // finished its half of the handshake). Best-effort cleanup. + if devID := h.deviceIDOfSubConn(ctx); devID != "" { + h.log.Info("pre-bind terminal close: device=%s conn=%d", devID, ctx.ID) + h.hub.CloseTerminalSession(devID) + } case protocol.TokenFirstScreen: // TOKEN_FIRSTSCREEN delivers a RAW BGRA baseline frame, not an // H264 unit — bytes ≈ width × height × 4. The C++ MFC dialog @@ -107,7 +155,24 @@ func (h *MyHandler) OnReceive(ctx *connection.Context, data []byte) { // and the signing primitive lives in a vendored component out of scope // for this server, so we always reply OK and let TOKEN_BITMAPINFO carry // the device ID via offset 41 when the screen sub-conn proceeds. -func (h *MyHandler) handleConnAuth(ctx *connection.Context, _ []byte) { +func (h *MyHandler) handleConnAuth(ctx *connection.Context, data []byte) { + // Pin the parent device's ClientID onto the sub-conn. Without this, + // later 1-byte tokens (TOKEN_TERMINAL_START / TOKEN_SHELL_START) have + // no way to identify which device they belong to — they carry no + // clientID themselves. ConnAuthPacket layout has clientID at offset 1 + // (uint64 LE); see common/commands.h::ConnAuthPacket. + if len(data) >= protocol.ConnAuthOffClientID+8 { + clientID := binary.LittleEndian.Uint64( + data[protocol.ConnAuthOffClientID : protocol.ConnAuthOffClientID+8]) + if clientID != 0 { + // Sub-conns never go through handleLogin, so their ctx.Info + // is otherwise empty. We only need ClientID for routing. + info := ctx.GetInfo() + info.ClientID = strconv.FormatUint(clientID, 10) + ctx.SetInfo(info) + } + } + ack := make([]byte, protocol.ConnAuthAckSize) ack[0] = protocol.TokenConnAuth ack[protocol.ConnAuthAckOffStatus] = protocol.ConnAuthStatusOK @@ -119,6 +184,55 @@ func (h *MyHandler) handleConnAuth(ctx *connection.Context, _ []byte) { } } +// deviceIDOfSubConn resolves the parent device of a sub-conn from the +// ClientID pinned by handleConnAuth. Returns "" for the rare case of a +// legacy client that skipped ConnAuth (the Go server's only target is +// modern clients, so this is effectively a paranoia check). +func (h *MyHandler) deviceIDOfSubConn(ctx *connection.Context) string { + return ctx.GetInfo().ClientID +} + +// handleTerminalStart fires when the device's freshly-spawned shell +// sub-conn announces itself. TOKEN_TERMINAL_START (232) means PTY mode +// (Linux/macOS or Windows ConPTY); TOKEN_SHELL_START (128) means the +// legacy Windows cmd-pipe path. Both packets are 1-byte tokens — the +// device identity comes from ConnAuth's pinned ClientID. +// +// After binding we send: +// - For PTY only: an initial CMD_TERMINAL_RESIZE 80x24 so the shell +// doesn't render at the PTY default before the browser's first fit. +// vim/htop look broken otherwise. The browser will follow up with a +// real term_resize once xterm.js sizes the canvas. +// - Always: COMMAND_NEXT to unblock the device's read thread (the +// ConPTYManager ReadThread sits on m_hEventDlgOpen until then — +// see client/ConPTYManager.cpp:259). +func (h *MyHandler) handleTerminalStart(ctx *connection.Context, isPTY bool) { + devID := h.deviceIDOfSubConn(ctx) + if devID == "" { + h.log.Warn("terminal start with no clientID: conn=%d", ctx.ID) + ctx.Close() + return + } + if !h.hub.BindTerminalConn(devID, ctx, isPTY) { + // No pending session — this is a stale sub-conn (e.g. browser + // gave up and closed term_close already). Drop it. + h.log.Warn("orphan terminal sub-conn: device=%s conn=%d isPTY=%v", + devID, ctx.ID, isPTY) + ctx.Close() + return + } + + if isPTY { + if err := h.srv.Send(ctx, protocol.BuildTerminalResize(80, 24)); err != nil { + h.log.Error("initial resize send failed: conn=%d: %v", ctx.ID, err) + } + } + if err := h.srv.Send(ctx, []byte{protocol.CommandNext}); err != nil { + h.log.Error("COMMAND_NEXT send failed on terminal: conn=%d: %v", ctx.ID, err) + } + h.log.Info("terminal bound: device=%s conn=%d isPTY=%v", devID, ctx.ID, isPTY) +} + // handleBitmapInfo is the first packet on a freshly-arrived screen // sub-connection. Packet layout (after the command byte at data[0]): // diff --git a/server/go/hub/hub.go b/server/go/hub/hub.go index f600fd6..9c25e7d 100644 --- a/server/go/hub/hub.go +++ b/server/go/hub/hub.go @@ -77,6 +77,21 @@ type Device struct { // JSON messages. cursorSeen bool lastCursorIndex byte + + // Terminal session state — at most one web terminal per device (MVP + // constraint shared with the C++ server). All three fields are + // guarded by hub.mu. + // + // terminalPending: COMMAND_SHELL has been sent, waiting for the device's + // sub-conn to arrive and announce itself via TOKEN_TERMINAL_START / + // TOKEN_SHELL_START. + // terminalConn: the shell sub-conn ctx after binding. Nil before BIND + // and after teardown. + // terminalIsPTY: distinguishes Linux/macOS/ConPTY (true) from the legacy + // Windows cmd-pipe path. PTY mode supports resize; cmd-pipe ignores it. + terminalPending bool + terminalConn *connection.Context + terminalIsPTY bool } // ScreenCache is a read-only snapshot of a device's last-seen screen state, @@ -171,6 +186,18 @@ type EventHandler interface { // Duplicates (same index as the previous frame) are filtered out by the // hub before reaching subscribers. OnCursorChange(deviceID string, index byte) + // OnTerminalReady fires once the device's shell sub-conn is bound and + // the server has sent COMMAND_NEXT to start its output read loop. + // isPTY=true means PTY mode (Linux/macOS or ConPTY); false means the + // legacy Windows cmd-pipe path which doesn't support resize. + OnTerminalReady(deviceID string, isPTY bool) + // OnTerminalData ships one chunk of raw shell output (already wrapped + // in the WS-binary "TRM1" magic header) to terminal viewers. + OnTerminalData(deviceID string, packet []byte) + // OnTerminalClosed fires when the session ends — either because the + // device sent TOKEN_TERMINAL_CLOSE, the sub-conn dropped, or the + // server explicitly tore it down. + OnTerminalClosed(deviceID string, reason string) } // Hub is a thread-safe registry of online devices. @@ -187,13 +214,19 @@ type Hub struct { // having to walk every device. Empty when no screen sessions exist. screenIndex map[*connection.Context]string screenIndexMu sync.RWMutex + + // Parallel reverse index for terminal sub-conns. Same purpose: O(1) + // lookup from a raw ctx (e.g. on OnDisconnect) back to its device. + terminalIndex map[*connection.Context]string + terminalIndexMu sync.RWMutex } // New returns an empty Hub. func New() *Hub { return &Hub{ - devices: make(map[string]*Device), - screenIndex: make(map[*connection.Context]string), + devices: make(map[string]*Device), + screenIndex: make(map[*connection.Context]string), + terminalIndex: make(map[*connection.Context]string), } } @@ -547,6 +580,223 @@ func (h *Hub) UpdateLive(id string, rtt int, activeWindow string) { } } +// ----- Terminal session management (Phase 6) -------------------------------- + +// ErrTerminalBusy is returned by OpenTerminalSession when the device already +// has a pending or active terminal session — MVP enforces single-viewer. +var ErrTerminalBusy = errors.New("terminal already open by another viewer") + +// OpenTerminalSession atomically marks a terminal session as pending for the +// device, then sends COMMAND_SHELL on the main TCP connection so the device +// will spawn a shell sub-conn. Returns nil if the request was sent. On any +// failure the pending flag is rolled back so retries are possible. +// +// Single-viewer constraint: if a pending or bound session already exists, +// returns ErrTerminalBusy. Mirrors C++ CWebService::HandleTermOpen +// (server/2015Remote/WebService.cpp:1838). +func (h *Hub) OpenTerminalSession(deviceID string) error { + if deviceID == "" { + return ErrDeviceOffline + } + h.mu.Lock() + d, ok := h.devices[deviceID] + if !ok || d.conn == nil { + h.mu.Unlock() + return ErrDeviceOffline + } + if d.terminalPending || d.terminalConn != nil { + h.mu.Unlock() + return ErrTerminalBusy + } + d.terminalPending = true + mainConn := d.conn + h.mu.Unlock() + + if h.sender == nil { + // Roll back so a retry isn't permanently blocked. + h.mu.Lock() + d.terminalPending = false + h.mu.Unlock() + return ErrNoSender + } + if err := h.sender(mainConn, []byte{protocol.CommandShell}); err != nil { + h.mu.Lock() + d.terminalPending = false + h.mu.Unlock() + return err + } + return nil +} + +// IsTerminalPending tells the TCP layer whether the next-arriving shell +// sub-conn should be claimed by the web terminal. The C++ side uses this +// in MessageHandle to decide between WebService takeover and opening an +// MFC dialog (server/2015Remote/2015RemoteDlg.cpp:5753). +func (h *Hub) IsTerminalPending(deviceID string) bool { + h.mu.RLock() + defer h.mu.RUnlock() + d, ok := h.devices[deviceID] + return ok && d.terminalPending +} + +// BindTerminalConn promotes the pending session to an active one by +// associating the device's freshly-arrived shell sub-conn. Returns false +// if no pending session exists — callers should drop the orphan ctx. +// +// Subscribers receive OnTerminalReady AFTER binding so they can flip the +// browser into "ready" state immediately on the same TCP roundtrip that +// will deliver the first shell output. +func (h *Hub) BindTerminalConn(deviceID string, ctx *connection.Context, isPTY bool) bool { + if deviceID == "" || ctx == nil { + return false + } + h.mu.Lock() + d, ok := h.devices[deviceID] + if !ok || !d.terminalPending { + h.mu.Unlock() + return false + } + d.terminalConn = ctx + d.terminalIsPTY = isPTY + d.terminalPending = false + h.mu.Unlock() + + h.terminalIndexMu.Lock() + h.terminalIndex[ctx] = deviceID + h.terminalIndexMu.Unlock() + + for _, s := range h.snapshotSubscribers() { + s.OnTerminalReady(deviceID, isPTY) + } + return true +} + +// TerminalDeviceID returns the device ID whose terminal sub-conn this +// context belongs to, or "" otherwise. The TCP layer uses this on every +// inbound packet on a sub-conn — when non-empty, the bytes are raw shell +// output and bypass the usual command-byte switch. +func (h *Hub) TerminalDeviceID(ctx *connection.Context) string { + h.terminalIndexMu.RLock() + defer h.terminalIndexMu.RUnlock() + return h.terminalIndex[ctx] +} + +// UnbindTerminalConn removes the terminal mapping (called from the TCP +// disconnect path for any sub-conn ctx). Fires OnTerminalClosed once if +// the unbind actually removed something — so subscribers can update the +// browser even on unexpected device-side drops. +func (h *Hub) UnbindTerminalConn(ctx *connection.Context) { + h.terminalIndexMu.Lock() + deviceID, tracked := h.terminalIndex[ctx] + if !tracked { + h.terminalIndexMu.Unlock() + return + } + delete(h.terminalIndex, ctx) + h.terminalIndexMu.Unlock() + + h.mu.Lock() + if d, ok := h.devices[deviceID]; ok && d.terminalConn == ctx { + d.terminalConn = nil + d.terminalPending = false + d.terminalIsPTY = false + } + h.mu.Unlock() + + for _, s := range h.snapshotSubscribers() { + s.OnTerminalClosed(deviceID, "disconnected") + } +} + +// SendToTerminal forwards bytes (typically xterm.js keystrokes) to the +// device's shell sub-conn. Returns ErrDeviceOffline if no session is +// active for this device. +func (h *Hub) SendToTerminal(id string, data []byte) error { + h.mu.RLock() + d, ok := h.devices[id] + var tc *connection.Context + if ok { + tc = d.terminalConn + } + h.mu.RUnlock() + if !ok || tc == nil { + return ErrDeviceOffline + } + if h.sender == nil { + return ErrNoSender + } + return h.sender(tc, data) +} + +// TerminalIsPTY reports whether the active session is PTY mode (the +// resize command only applies in PTY mode — legacy cmd-pipe ignores it). +func (h *Hub) TerminalIsPTY(id string) bool { + h.mu.RLock() + defer h.mu.RUnlock() + d, ok := h.devices[id] + return ok && d.terminalConn != nil && d.terminalIsPTY +} + +// CloseTerminalSession tears down the session from the server side +// (typically when the requesting browser sends term_close or disconnects). +// Mirrors CloseScreen's graceful pattern: drop the index synchronously, +// send COMMAND_BYE, then close after a short grace period so the client's +// IOCPClient reconnect logic doesn't fire. +func (h *Hub) CloseTerminalSession(deviceID string) { + h.mu.Lock() + d, ok := h.devices[deviceID] + if !ok { + h.mu.Unlock() + return + } + tc := d.terminalConn + // hadSession guards against firing spurious OnTerminalClosed events + // when there was nothing to tear down — relevant when the main-conn + // teardown path calls CloseTerminalSession unconditionally as part of + // device-offline cleanup, or when both OnDisconnect and an explicit + // browser term_close race for the same teardown. + hadSession := tc != nil || d.terminalPending + d.terminalConn = nil + d.terminalPending = false + d.terminalIsPTY = false + h.mu.Unlock() + + if !hadSession { + return + } + + for _, s := range h.snapshotSubscribers() { + s.OnTerminalClosed(deviceID, "closed") + } + + if tc == nil { + return + } + h.terminalIndexMu.Lock() + delete(h.terminalIndex, tc) + h.terminalIndexMu.Unlock() + + // Mirror Hub.CloseScreen: send COMMAND_BYE then close after 500 ms so + // the device exits its shell read loop instead of treating the FIN as + // a network blip and triggering reconnect. + if h.sender != nil { + _ = h.sender(tc, []byte{protocol.CommandBye}) + } + go func(c *connection.Context) { + time.Sleep(500 * time.Millisecond) + c.Close() + }(tc) +} + +// PublishTerminalData fans out one chunk of shell output to subscribers. +// Caller has already wrapped it in the "TRM1" magic header so the browser +// can demultiplex from screen frames over the shared WebSocket. +func (h *Hub) PublishTerminalData(deviceID string, packet []byte) { + for _, s := range h.snapshotSubscribers() { + s.OnTerminalData(deviceID, packet) + } +} + // Count returns the current number of online devices. func (h *Hub) Count() int { h.mu.RLock() diff --git a/server/go/hub/hub_test.go b/server/go/hub/hub_test.go index cbd8185..8df9c8d 100644 --- a/server/go/hub/hub_test.go +++ b/server/go/hub/hub_test.go @@ -84,6 +84,12 @@ func (c *captureHandler) OnResolutionChange(_ string, _, _ int) {} func (c *captureHandler) OnCursorChange(_ string, _ byte) {} +func (c *captureHandler) OnTerminalReady(_ string, _ bool) {} + +func (c *captureHandler) OnTerminalData(_ string, _ []byte) {} + +func (c *captureHandler) OnTerminalClosed(_ string, _ string) {} + func TestHubSubscribeEvents(t *testing.T) { h := New() c := &captureHandler{} diff --git a/server/go/protocol/commands.go b/server/go/protocol/commands.go index 09fe249..0ae95d9 100644 --- a/server/go/protocol/commands.go +++ b/server/go/protocol/commands.go @@ -107,19 +107,24 @@ const ( CommandScreenSpy byte = 16 // COMMAND_SCREEN_SPY - start screen capture CommandScreenControl byte = 20 // COMMAND_SCREEN_CONTROL - mouse/keyboard input (MSG64 batches) CommandNext byte = 30 // COMMAND_NEXT - "control-side dialog is open, you may stream" + CommandShell byte = 40 // COMMAND_SHELL - ask device to open a shell sub-connection + CommandTerminalRsize byte = 81 // CMD_TERMINAL_RESIZE - [cmd:1][cols:2 LE][rows:2 LE] CommandBye byte = 204 // COMMAND_BYE - disconnect CommandHeartbeat byte = 216 // CMD_HEARTBEAT_ACK // Client -> Server tokens - TokenAuth byte = 100 // TOKEN_AUTH - authorization required - TokenHeartbeat byte = 101 // TOKEN_HEARTBEAT - TokenLogin byte = 102 // TOKEN_LOGIN - login packet - TokenBitmapInfo byte = 115 // TOKEN_BITMAPINFO - screen sub-connection header - TokenFirstScreen byte = 116 // TOKEN_FIRSTSCREEN - raw BGRA baseline frame (NOT H264) - TokenNextScreen byte = 117 // TOKEN_NEXTSCREEN - non-keyframe H264 (P-frame) - TokenKeyframe byte = 134 // TOKEN_KEYFRAME - H264 IDR (sent on GOP boundary) - TokenConnAuth byte = 246 // TOKEN_CONN_AUTH - sub-connection identity handshake - CmdCursorImage byte = 93 // CMD_CURSOR_IMAGE - custom cursor bitmap (Phase 5+ feature) + TokenAuth byte = 100 // TOKEN_AUTH - authorization required + TokenHeartbeat byte = 101 // TOKEN_HEARTBEAT + TokenLogin byte = 102 // TOKEN_LOGIN - login packet + TokenBitmapInfo byte = 115 // TOKEN_BITMAPINFO - screen sub-connection header + TokenFirstScreen byte = 116 // TOKEN_FIRSTSCREEN - raw BGRA baseline frame (NOT H264) + TokenNextScreen byte = 117 // TOKEN_NEXTSCREEN - non-keyframe H264 (P-frame) + TokenShellStart byte = 128 // TOKEN_SHELL_START - legacy cmd-pipe shell sub-conn open + TokenKeyframe byte = 134 // TOKEN_KEYFRAME - H264 IDR (sent on GOP boundary) + TokenTerminalStart byte = 232 // TOKEN_TERMINAL_START - modern PTY shell sub-conn open + TokenTerminalClose byte = 233 // TOKEN_TERMINAL_CLOSE - shell exited / close ack + TokenConnAuth byte = 246 // TOKEN_CONN_AUTH - sub-connection identity handshake + CmdCursorImage byte = 93 // CMD_CURSOR_IMAGE - custom cursor bitmap (Phase 5+ feature) ) // Sub-connection authentication (matches common/commands.h ConnAuth* structs). @@ -128,7 +133,13 @@ const ( const ( ConnAuthPacketSize = 512 ConnAuthAckSize = 256 - // ConnAuthAck field offsets within the 256-byte buffer. + // ConnAuthPacket field offsets within the inbound 512-byte buffer. + // Layout (from common/commands.h::ConnAuthPacket): + // [token:1][clientID:8 LE][timestamp:8 LE][nonce:16][signature:64][reserved:415] + ConnAuthOffClientID = 1 // uint64 LE — pin to the sub-conn so later + // // 1-byte tokens (TOKEN_TERMINAL_START etc.) can + // // resolve the parent device. + // ConnAuthAck field offsets within the outbound 256-byte buffer. ConnAuthAckOffStatus = 1 // uint8 ConnAuthAckOffServerTime = 2 // uint64 LE // Status codes. @@ -245,6 +256,27 @@ func BuildScreenControlPacket(message, wParam, lParam uint64, ptX, ptY int32, ti return buf } +// TerminalBinaryMagic is the 4-byte prefix the web UI uses to demultiplex +// terminal output from screen frames over the single WebSocket. Matches +// the C++ side at server/2015Remote/WebService.cpp:2013 ("TRM1"). Screen +// frames lead with a uint32 LE device ID, so collisions with this exact +// magic are astronomically rare in practice. +var TerminalBinaryMagic = [4]byte{'T', 'R', 'M', '1'} + +// BuildTerminalResize encodes the 5-byte CMD_TERMINAL_RESIZE packet the +// client's ConPTYManager/TerminalManager expects on the shell sub-conn: +// +// [CMD_TERMINAL_RESIZE:1][cols:2 LE][rows:2 LE] +// +// cols/rows are signed int16 on the wire (the C++ side casts to `short`). +func BuildTerminalResize(cols, rows int) []byte { + buf := make([]byte, 5) + buf[0] = CommandTerminalRsize + binary.LittleEndian.PutUint16(buf[1:3], uint16(int16(cols))) + binary.LittleEndian.PutUint16(buf[3:5], uint16(int16(rows))) + return buf +} + // MakeLParam packs x into the low word and y into the high word — the // Windows MAKELPARAM macro the client expects in mouse-message lParams. func MakeLParam(x, y int32) uint64 { diff --git a/server/go/web/ws.go b/server/go/web/ws.go index 13d07cb..5b7b9c5 100644 --- a/server/go/web/ws.go +++ b/server/go/web/ws.go @@ -46,11 +46,12 @@ type wsClient struct { once sync.Once // Mutated under wsHub.mu (or only by the read loop owning this client). - nonce string // outstanding challenge — cleared after a successful login - token string // set once authenticated - role string // mirrors session role after login - addr string // client address for logs - watching string // device ID this browser is currently streaming, "" when on the list + nonce string // outstanding challenge — cleared after a successful login + token string // set once authenticated + role string // mirrors session role after login + addr string // client address for logs + watching string // device ID this browser is currently streaming, "" when on the list + termWatching string // device ID for an open web terminal session, "" otherwise } // queue writes a JSON text frame onto the send buffer. Drops silently if the @@ -176,6 +177,61 @@ func (h *wsHub) OnScreenFrame(deviceID string, packet []byte, _ bool) { } } +// OnTerminalReady notifies the requesting browser that its term_open +// handshake completed. mode is "pty" or "legacy" — xterm.js disables the +// resize callback in legacy mode (no PTY behind the cmd pipe). +func (h *wsHub) OnTerminalReady(deviceID string, isPTY bool) { + mode := "legacy" + if isPTY { + mode = "pty" + } + msg := mustJSON(map[string]any{ + "cmd": "term_ready", + "id": deviceID, + "mode": mode, + }) + h.mu.RLock() + defer h.mu.RUnlock() + for c := range h.clients { + if c.termWatching == deviceID && c.token != "" { + c.queue(msg) + } + } +} + +// OnTerminalData ships one chunk of raw shell output (already wrapped in +// the "TRM1" magic header) over the binary WS frame. Single-viewer is +// enforced upstream so at most one client matches per device. +func (h *wsHub) OnTerminalData(deviceID string, packet []byte) { + h.mu.RLock() + defer h.mu.RUnlock() + for c := range h.clients { + if c.termWatching == deviceID && c.token != "" { + c.queueBinary(packet) + } + } +} + +// OnTerminalClosed fires when the device's shell exits or the sub-conn +// drops. The browser closes its xterm panel. We also clear termWatching +// so a subsequent term_open from the same browser isn't rejected as +// "already open" by stale state. +func (h *wsHub) OnTerminalClosed(deviceID string, reason string) { + msg := mustJSON(map[string]any{ + "cmd": "term_closed", + "ok": true, + "reason": reason, + }) + h.mu.Lock() + defer h.mu.Unlock() + for c := range h.clients { + if c.termWatching == deviceID && c.token != "" { + c.termWatching = "" + c.queue(msg) + } + } +} + // OnDeviceUpdate forwards heartbeat-derived liveness data so the device-list // rows can refresh RTT and active-window labels without re-fetching. func (h *wsHub) OnDeviceUpdate(id string, rtt int, activeWindow string) { @@ -221,6 +277,13 @@ func (h *wsHub) unregister(c *wsClient) { if c.watching != "" && h.countWatchers(c.watching) == 0 { h.devices.CloseScreen(c.watching) } + // Terminal sessions are single-viewer by design, so any open session + // belongs to this client. Tear it down so the next viewer doesn't + // hit ErrTerminalBusy from an abandoned session. + if c.termWatching != "" { + h.devices.CloseTerminalSession(c.termWatching) + c.termWatching = "" + } // Do NOT revoke the token: tokens are session-scoped, not WS-scoped. // Frontend may close+reopen the WS at any time (visibilitychange handler, // brief network blip, reload) and must be able to resume with the same diff --git a/server/go/web/ws_handlers.go b/server/go/web/ws_handlers.go index cb5906f..f1202fa 100644 --- a/server/go/web/ws_handlers.go +++ b/server/go/web/ws_handlers.go @@ -15,8 +15,8 @@ import ( // Phase 3 implements: get_salt, login, get_devices, ping, disconnect. // Phase 4 adds: connect, screen frame relay. // Phase 5 adds: mouse, key (input forwarding to the device screen sub-conn). -// Phase 6/7 commands (term_*, user mgmt) get a friendly "not yet implemented" -// reply so the browser UI doesn't hang silently. +// Phase 6 adds: term_open / term_input / term_resize / term_close (PTY relay). +// Phase 7 covers admin: create_user / delete_user / list_users / get_groups. func (h *wsHub) dispatch(c *wsClient, cmd string, raw []byte) { switch cmd { case "get_salt": @@ -39,9 +39,13 @@ func (h *wsHub) dispatch(c *wsClient, cmd string, raw []byte) { case "key": h.handleKey(c, raw) case "term_open": - h.replyNotImplemented(c, "term_closed", "Web terminal not yet implemented on Go server") - case "term_input", "term_resize", "term_close": - // silently ignored — no terminal session + h.handleTermOpen(c, raw) + case "term_input": + h.handleTermInput(c, raw) + case "term_resize": + h.handleTermResize(c, raw) + case "term_close": + h.handleTermClose(c, raw) // Admin operations (Phase 7). case "create_user": @@ -55,14 +59,6 @@ func (h *wsHub) dispatch(c *wsClient, cmd string, raw []byte) { } } -func (h *wsHub) replyNotImplemented(c *wsClient, replyCmd, msg string) { - c.queue(mustJSON(map[string]any{ - "cmd": replyCmd, - "ok": false, - "msg": msg, - })) -} - // requireAdmin combines token validation with a role=="admin" check. The // reply on failure has the standard `{cmd, ok:false, msg}` shape so the // front-end's generic toast handler can surface the reason. @@ -555,6 +551,134 @@ func (h *wsHub) handleGetGroups(c *wsClient, raw []byte) { })) } +// handleTermOpen kicks off a web terminal session. On success the wsClient +// records `termWatching = deviceID` so subsequent term_input / term_resize +// have a target, and the hub sends COMMAND_SHELL to the device. The +// device's shell sub-conn arrives separately and is bound by the TCP layer +// via Hub.BindTerminalConn; that step fires OnTerminalReady to flip the +// browser into "ready" state. +// +// Single-viewer is enforced at the hub. The C++ side matches: +// server/2015Remote/WebService.cpp:1799. +func (h *wsHub) handleTermOpen(c *wsClient, raw []byte) { + const replyCmd = "term_closed" + if !h.requireAuth(c, raw, replyCmd) { + return + } + var in struct { + ID string `json:"id"` + } + if err := json.Unmarshal(raw, &in); err != nil || in.ID == "" { + c.queue(mustJSON(map[string]any{"cmd": replyCmd, "ok": false, "msg": "Bad request"})) + return + } + + // Pin termWatching BEFORE asking the hub to open the session: the + // device's shell sub-conn can arrive in <100 ms on LAN, and + // OnTerminalReady filters by termWatching. Same race shape as the + // screen path in handleConnect. + h.mu.Lock() + if c.termWatching != "" && c.termWatching != in.ID { + h.mu.Unlock() + c.queue(mustJSON(map[string]any{ + "cmd": replyCmd, "ok": false, + "msg": "Close current terminal before opening another", + })) + return + } + c.termWatching = in.ID + h.mu.Unlock() + + if err := h.devices.OpenTerminalSession(in.ID); err != nil { + h.mu.Lock() + c.termWatching = "" + h.mu.Unlock() + msg := "Device offline" + switch err { + case hub.ErrTerminalBusy: + msg = "Terminal already open by another viewer" + case hub.ErrDeviceOffline: + msg = "Device offline" + default: + msg = "Failed to start terminal" + h.log.Error("OpenTerminalSession(%s): %v", in.ID, err) + } + c.queue(mustJSON(map[string]any{"cmd": replyCmd, "ok": false, "msg": msg})) + return + } + h.log.Info("term_open: device=%s role=%s", in.ID, c.role) +} + +// handleTermInput forwards xterm.js keystrokes to the device's shell +// sub-conn verbatim. The client's ConPTYManager treats anything that +// isn't a known control byte (CMD_TERMINAL_RESIZE / COMMAND_NEXT) as +// raw PTY input — see client/ConPTYManager.cpp:244. +func (h *wsHub) handleTermInput(c *wsClient, raw []byte) { + if !h.requireAuth(c, raw, "term_input_result") { + return + } + var in struct { + ID string `json:"id"` + Data string `json:"data"` + } + if err := json.Unmarshal(raw, &in); err != nil { + return + } + if in.ID == "" || in.Data == "" { + return + } + if c.termWatching != in.ID { + return // someone else's session, or no session + } + _ = h.devices.SendToTerminal(in.ID, []byte(in.Data)) +} + +// handleTermResize forwards xterm.js fit/resize events to the device's +// PTY. Legacy cmd-pipe mode silently ignores resize (the underlying +// pipes have no notion of geometry). +func (h *wsHub) handleTermResize(c *wsClient, raw []byte) { + if !h.requireAuth(c, raw, "term_resize_result") { + return + } + var in struct { + ID string `json:"id"` + Cols int `json:"cols"` + Rows int `json:"rows"` + } + if err := json.Unmarshal(raw, &in); err != nil { + return + } + if in.ID == "" || in.Cols <= 0 || in.Rows <= 0 { + return + } + if c.termWatching != in.ID { + return + } + if !h.devices.TerminalIsPTY(in.ID) { + return // legacy cmd pipe — ignored, same as the C++ guard + } + _ = h.devices.SendToTerminal(in.ID, protocol.BuildTerminalResize(in.Cols, in.Rows)) +} + +// handleTermClose tears down the active session. CloseTerminalSession +// fires OnTerminalClosed which the wsHub broadcast loop turns into the +// front-end's `term_closed` notification — no need to ack here. +func (h *wsHub) handleTermClose(c *wsClient, raw []byte) { + if !h.requireAuth(c, raw, "term_closed") { + return + } + var in struct { + ID string `json:"id"` + } + if err := json.Unmarshal(raw, &in); err != nil || in.ID == "" { + return + } + if c.termWatching != in.ID { + return + } + h.devices.CloseTerminalSession(in.ID) +} + // tickMillis returns a 32-bit-truncated ms timestamp suitable for the // MSG64.time field. The client compares these with GetTickCount(), which // is also a 32-bit ms counter — exact origin doesn't matter, only that