package protocol import ( "bytes" "crypto/hmac" "crypto/sha256" "encoding/binary" "encoding/hex" "strconv" "strings" "golang.org/x/text/encoding/simplifiedchinese" "golang.org/x/text/transform" ) // GbkToUTF8 converts GBK encoded bytes to UTF-8 string. The input is treated // as a null-terminated GBK buffer (typical for Windows clients); content // after the first NUL byte is discarded. Non-printable characters are // stripped from the result. func GbkToUTF8(data []byte) string { // Find the first null byte and truncate there if idx := bytes.IndexByte(data, 0); idx >= 0 { data = data[:idx] } if len(data) == 0 { return "" } // Try to decode as GBK reader := transform.NewReader(bytes.NewReader(data), simplifiedchinese.GBK.NewDecoder()) buf := new(bytes.Buffer) _, err := buf.ReadFrom(reader) if err != nil { // If GBK decoding fails, try treating as UTF-8 or ASCII return cleanString(string(data)) } return cleanString(buf.String()) } // Utf8CleanString trims at the first NUL and strips non-printables — the // UTF-8 counterpart of GbkToUTF8 for clients that have the CLIENT_CAP_UTF8 // capability bit. Decoding as GBK in that case would mangle multi-byte // sequences (the C++ comment at WebService.cpp:1530 calls out this exact // "double-encoding" footgun). func Utf8CleanString(data []byte) string { if idx := bytes.IndexByte(data, 0); idx >= 0 { data = data[:idx] } if len(data) == 0 { return "" } return cleanString(string(data)) } // cleanString removes non-printable characters except common whitespace func cleanString(s string) string { var result strings.Builder for _, r := range s { if r >= 32 || r == '\t' || r == '\n' || r == '\r' { result.WriteRune(r) } } return strings.TrimSpace(result.String()) } // Client capability bitmask values, matching common/commands.h CLIENT_CAP_*. // Reported in the hex tail of LOGIN_INFOR.moduleVersion (after the '-'). const ( ClientCapV2 uint32 = 0x0001 // CLIENT_CAP_V2 — V2 file transfer ClientCapUTF8 uint32 = 0x0002 // CLIENT_CAP_UTF8 — UTF-8 protocol strings (activeWindow, key-log titles, ...) ClientCapScreenPreview uint32 = 0x0004 // CLIENT_CAP_SCREEN_PREVIEW ) // SupportsCap returns true when the client's reported capability hex string // has the given bit set. An empty / unparseable string means "no caps" and // matches the legacy GBK-Windows convention. func SupportsCap(capability string, bit uint32) bool { if capability == "" { return false } caps, err := strconv.ParseUint(strings.TrimSpace(capability), 16, 32) if err != nil { return false } return uint32(caps)&bit != 0 } // DecodeClientString decodes a fixed-length, NUL-padded buffer the client // sent as part of a binary protocol field (typically ActiveWnd). If the // client signals UTF-8 capability or is known to ship UTF-8 by default // (Linux / macOS), the bytes are treated as UTF-8; otherwise they're // decoded from GBK (CP936 — the legacy Windows default). // // clientType comes from LOGIN_INFOR reserved field 0 (RES_CLIENT_TYPE) and // capability from the hex tail of moduleVersion. Both can be empty. func DecodeClientString(data []byte, capability, clientType string) string { if SupportsCap(capability, ClientCapUTF8) || clientType == "LNX" || clientType == "MAC" { return Utf8CleanString(data) } return GbkToUTF8(data) } // Command tokens - matching the C++ definitions (common/commands.h). const ( // Server -> Client commands CommandActived byte = 0 // COMMAND_ACTIVED CommandScreenSpy byte = 16 // COMMAND_SCREEN_SPY - start screen capture CommandScreenControl byte = 20 // COMMAND_SCREEN_CONTROL - mouse/keyboard input (MSG64 batches) CommandNext byte = 30 // COMMAND_NEXT - "control-side dialog is open, you may stream" CommandShell byte = 40 // COMMAND_SHELL - ask device to open a shell sub-connection CommandTerminalRsize byte = 81 // CMD_TERMINAL_RESIZE - [cmd:1][cols:2 LE][rows:2 LE] CmdRestoreConsole byte = 82 // CMD_RESTORE_CONSOLE - RDP session "归位": switch back to the console session and restart capture CommandBye byte = 204 // COMMAND_BYE - disconnect CommandHeartbeat byte = 216 // CMD_HEARTBEAT_ACK // Client -> Server tokens TokenAuth byte = 100 // TOKEN_AUTH - authorization required TokenHeartbeat byte = 101 // TOKEN_HEARTBEAT TokenLogin byte = 102 // TOKEN_LOGIN - login packet TokenBitmapInfo byte = 115 // TOKEN_BITMAPINFO - screen sub-connection header TokenFirstScreen byte = 116 // TOKEN_FIRSTSCREEN - raw BGRA baseline frame (NOT H264) TokenNextScreen byte = 117 // TOKEN_NEXTSCREEN - non-keyframe H264 (P-frame) TokenShellStart byte = 128 // TOKEN_SHELL_START - legacy cmd-pipe shell sub-conn open TokenKeyframe byte = 134 // TOKEN_KEYFRAME - H264 IDR (sent on GOP boundary) TokenTerminalStart byte = 232 // TOKEN_TERMINAL_START - modern PTY shell sub-conn open TokenTerminalClose byte = 233 // TOKEN_TERMINAL_CLOSE - shell exited / close ack TokenConnAuth byte = 246 // TOKEN_CONN_AUTH - sub-connection identity handshake CmdCursorImage byte = 93 // CMD_CURSOR_IMAGE - custom cursor bitmap (Phase 5+ feature) ) // Sub-connection authentication (matches common/commands.h ConnAuth* structs). // Each newly-opened sub-conn first sends a 512-byte ConnAuthPacket, then waits // for a 256-byte ConnAuthAck before any further command is meaningful. const ( ConnAuthPacketSize = 512 ConnAuthAckSize = 256 // ConnAuthPacket field offsets within the inbound 512-byte buffer. // Layout (from common/commands.h::ConnAuthPacket): // [token:1][clientID:8 LE][timestamp:8 LE][nonce:16][signature:64][reserved:415] ConnAuthOffClientID = 1 // uint64 LE — pin to the sub-conn so later // // 1-byte tokens (TOKEN_TERMINAL_START etc.) can // // resolve the parent device. // ConnAuthAck field offsets within the outbound 256-byte buffer. ConnAuthAckOffStatus = 1 // uint8 ConnAuthAckOffServerTime = 2 // uint64 LE // Status codes. ConnAuthStatusOK byte = 0 ) // CMD_MASTERSETTING is the server's reply to a fresh client login. The // client uses the Signature field to prove this server has the shared // secret; without a valid signature the client's private FileUpload init // aborts the process. Struct layout matches MasterSettings in // common/commands.h (pragma pack 4, total 1000 bytes). const ( CmdMasterSetting byte = 215 MasterSettingsSize = 1000 MasterSettingsOffReportInterval = 0 // int32, seconds MasterSettingsOffSignature = 508 // Signature[64] MasterSettingsSignatureLen = 64 // DefaultReportIntervalSec matches the C++ default. Sending 0 makes the // client disable its active-window heartbeat field, breaking RTT / // ActiveWindow live updates on the web UI. DefaultReportIntervalSec = 5 ) // SignMessage computes HMAC-SHA256(key, msg) and returns the 64-char // lowercase hex digest. Used to sign CMD_MASTERSETTING replies so the // client can verify the response came from a legitimate server. // // The key is a deployment-time shared secret loaded from the // YAMA_SIGN_PASSWORD env var so the binary doesn't carry the literal in // cleartext; provision out-of-band and never commit it. func SignMessage(password string, msg []byte) string { mac := hmac.New(sha256.New, []byte(password)) mac.Write(msg) return hex.EncodeToString(mac.Sum(nil)) } // Screen-spy parameters that match the C++ ScreenSpy implementation. const ( AlgorithmH264 byte = 2 // ALGORITHM_H264 — H264 encoding (the algorithm web uses) ) // Windows message constants used inside MSG64.message. The client dispatches // on these values verbatim (CScreenManager::ProcessCommand at // client/ScreenManager.cpp:1617), so these MUST stay bit-identical to the // WinUser.h definitions even though this Go server is cross-platform. const ( WMKeyDown uint64 = 0x0100 WMKeyUp uint64 = 0x0101 WMSysKeyDown uint64 = 0x0104 WMSysKeyUp uint64 = 0x0105 WMMouseMove uint64 = 0x0200 WMLButtonDown uint64 = 0x0201 WMLButtonUp uint64 = 0x0202 WMLButtonDblClk uint64 = 0x0203 WMRButtonDown uint64 = 0x0204 WMRButtonUp uint64 = 0x0205 WMRButtonDblClk uint64 = 0x0206 WMMButtonDown uint64 = 0x0207 WMMButtonUp uint64 = 0x0208 WMMouseWheel uint64 = 0x020A ) // Virtual-key codes referenced from the input mapping. Same numeric values // as the Win32 VK_* constants. const ( VKLWin = 0x5B // VK_LWIN — filtered: never forwarded VKRWin = 0x5C // VK_RWIN — filtered: never forwarded VKPrior = 0x21 // VK_PRIOR (Page Up) — extended-key range start VKDown = 0x28 // VK_DOWN — extended-key range end VKInsert = 0x2D VKDelete = 0x2E VKNumLock = 0x90 VKRControl = 0xA3 VKRMenu = 0xA5 VKApps = 0x5D ) // MK_* wParam bitflags for mouse-button messages. const ( MKLButton uint64 = 0x0001 MKRButton uint64 = 0x0002 MKMButton uint64 = 0x0010 ) // MSG64 is the 48-byte fixed layout the client expects inside a // COMMAND_SCREEN_CONTROL packet (common/commands.h class MSG64). // // [hwnd:8][message:8][wParam:8][lParam:8][time:8][pt.x:4][pt.y:4] // // All uint64 fields are little-endian; pt is two int32 LE. The client's // ProcessCommand validates `ulLength % 48 == 0` and treats each 48-byte // block as one MSG64. const Msg64Size = 48 // BuildScreenControlPacket encodes one COMMAND_SCREEN_CONTROL packet // carrying a single MSG64 record. The cmd byte is prepended. // // Wire layout: // // [CMD:1][hwnd:8 LE][message:8 LE][wParam:8 LE][lParam:8 LE][time:8 LE][pt.x:4 LE][pt.y:4 LE] // // time is filled with a monotonic-ish ms value (ms since Unix epoch trimmed // to 32 bits) so the client's GetTickCount() comparisons stay reasonable. func BuildScreenControlPacket(message, wParam, lParam uint64, ptX, ptY int32, timeMs uint32) []byte { buf := make([]byte, 1+Msg64Size) buf[0] = CommandScreenControl // hwnd left zero — the client recomputes hWnd via WindowFromPoint. binary.LittleEndian.PutUint64(buf[1+8:1+16], message) binary.LittleEndian.PutUint64(buf[1+16:1+24], wParam) binary.LittleEndian.PutUint64(buf[1+24:1+32], lParam) binary.LittleEndian.PutUint64(buf[1+32:1+40], uint64(timeMs)) binary.LittleEndian.PutUint32(buf[1+40:1+44], uint32(ptX)) binary.LittleEndian.PutUint32(buf[1+44:1+48], uint32(ptY)) return buf } // TerminalBinaryMagic is the 4-byte prefix the web UI uses to demultiplex // terminal output from screen frames over the single WebSocket. Matches // the C++ side at server/2015Remote/WebService.cpp:2013 ("TRM1"). Screen // frames lead with a uint32 LE device ID, so collisions with this exact // magic are astronomically rare in practice. var TerminalBinaryMagic = [4]byte{'T', 'R', 'M', '1'} // BuildTerminalResize encodes the 5-byte CMD_TERMINAL_RESIZE packet the // client's ConPTYManager/TerminalManager expects on the shell sub-conn: // // [CMD_TERMINAL_RESIZE:1][cols:2 LE][rows:2 LE] // // cols/rows are signed int16 on the wire (the C++ side casts to `short`). func BuildTerminalResize(cols, rows int) []byte { buf := make([]byte, 5) buf[0] = CommandTerminalRsize binary.LittleEndian.PutUint16(buf[1:3], uint16(int16(cols))) binary.LittleEndian.PutUint16(buf[3:5], uint16(int16(rows))) return buf } // MakeLParam packs x into the low word and y into the high word — the // Windows MAKELPARAM macro the client expects in mouse-message lParams. func MakeLParam(x, y int32) uint64 { return uint64(uint32(x)&0xFFFF) | (uint64(uint32(y)&0xFFFF) << 16) } // IsExtendedKey returns true when the given Win32 VK code should set the // extended-key bit (bit 24) in a keyboard lParam. Matches the C++ // HandleKey logic (server/2015Remote/WebService.cpp:944). func IsExtendedKey(vk int) bool { if vk >= VKPrior && vk <= VKDown { return true } switch vk { case VKInsert, VKDelete, VKNumLock, VKRControl, VKRMenu, VKApps: return true } return false } // Reserved-field indices we care about (see common/commands.h RES_* enum). // LOGIN_INFOR.szReserved is a '|'-separated list; clients fill known slots // even when leaving others blank ("?"). const ( ResFieldClientType = 0 // RES_CLIENT_TYPE — client kind (Windows / macOS / ...) ResFieldFilePath = 4 // RES_FILE_PATH — install path ResFieldInstallTime = 6 // RES_INSTALL_TIME ResFieldClientLoc = 10 // RES_CLIENT_LOC — geo string ResFieldClientPubIP = 11 // RES_CLIENT_PUBIP — public IP ResFieldResolution = 15 // RES_RESOLUTION — client-formatted screen geometry: "N:W*H" ResFieldClientID = 16 // RES_CLIENT_ID — uint64 decimal, matches TOKEN_BITMAPINFO clientID ) // ScreenFrameHeaderLen is the size of the small per-frame header prepended by // the device on every TOKEN_NEXTSCREEN buffer, before the H.264 NAL payload. // Layout (excluding the leading TOKEN_* byte): // // [algorithm:1][cursorPos:8 (int32 x, int32 y)][cursorIdx:1] = 10 bytes // // (The C++ side counts the token byte into its ulHeadLength=11; we keep the // constant strictly post-token so the call site reads `skip := 1 + headerLen` // without confusion.) SCREENYSPY_IMPROVE adds a 4-byte frameID after the // cursor index, which is the production-off setting per common/commands.h. const ScreenFrameHeaderLen = 1 + 8 + 1 // IsH264Keyframe scans an Annex-B H.264 bitstream for a NAL unit indicating // a keyframe boundary — IDR (type 5), SPS (7) or PPS (8). Returns true on // the first hit. Matches the detection used by the C++ ScreenSpy broadcast // path so frame-type bytes stay consistent across server implementations. func IsH264Keyframe(data []byte) bool { n := len(data) for i := 0; i+4 < n; i++ { var nalOffset int switch { case data[i] == 0 && data[i+1] == 0 && data[i+2] == 0 && data[i+3] == 1: nalOffset = i + 4 case data[i] == 0 && data[i+1] == 0 && data[i+2] == 1: nalOffset = i + 3 default: continue } if nalOffset >= n { continue } nalType := data[nalOffset] & 0x1F if nalType == 5 || nalType == 7 || nalType == 8 { return true } } return false } // LOGIN_INFOR structure size and offsets (matching C++ struct with default alignment) // Note: C++ struct uses default alignment (4-byte for uint32/int) const ( LoginInfoSize = 980 // Total size of LOGIN_INFOR struct (with alignment padding) // Field offsets (with alignment padding) OffsetToken = 0 // 1 byte (unsigned char) OffsetOsVerInfoEx = 1 // 156 bytes (char[156]) // 3 bytes padding here to align dwCPUMHz to 4-byte boundary OffsetCPUMHz = 160 // 4 bytes (unsigned int) - aligned to 4 OffsetModuleVersion = 164 // 24 bytes (char[24]) OffsetPCName = 188 // 240 bytes (char[240]) OffsetMasterID = 428 // 20 bytes (char[20]) OffsetWebCamExist = 448 // 4 bytes (int) - aligned to 4 OffsetSpeed = 452 // 4 bytes (unsigned int) OffsetStartTime = 456 // 20 bytes (char[20]) OffsetReserved = 476 // 512 bytes (char[512]) ) // LoginInfo represents client login information type LoginInfo struct { Token byte OsVerInfo string // OS version info CPUMHz uint32 ModuleVersion string PCName string // Computer name MasterID string WebCamExist bool Speed uint32 StartTime string Reserved string // Contains additional info separated by | } // ParseLoginInfo parses LOGIN_INFOR from data. // // Encoding: text fields are GBK on legacy Windows clients and UTF-8 on modern // clients that set CLIENT_CAP_UTF8 (always on for LNX / MAC). Picking the // wrong codec mangles non-ASCII characters — e.g. a German location string // "Nürnberg" sent as UTF-8 (4E C3 BC 72 ...) and force-decoded as GBK turns // into mojibake. The heartbeat path already honors this via DecodeClientString // (see cmd/main.go handleHeartbeat); ParseLoginInfo previously did not, so // every login string from a UTF-8 client was being misread. // // To get encoding right we have a chicken-and-egg problem: capability lives // in ModuleVersion (offset 164) and clientType lives in Reserved field 0 // (offset 476) — but Reserved itself needs that information to decode. Both // "discriminator" values are pure ASCII (hex digits, "Windows"/"LNX"/"MAC"), // so we can extract them with a UTF-8 read and then re-decode the actual // user-text fields with the correct codec. func ParseLoginInfo(data []byte) (*LoginInfo, error) { if len(data) < 100 { // Minimum size check return nil, ErrInvalidData } info := &LoginInfo{ Token: data[0], } // CPU MHz, WebCam, Speed — fixed-width binary, encoding-independent. if len(data) >= OffsetCPUMHz+4 { info.CPUMHz = binary.LittleEndian.Uint32(data[OffsetCPUMHz:]) } if len(data) >= OffsetWebCamExist+4 { info.WebCamExist = binary.LittleEndian.Uint32(data[OffsetWebCamExist:]) != 0 } if len(data) >= OffsetSpeed+4 { info.Speed = binary.LittleEndian.Uint32(data[OffsetSpeed:]) } // ModuleVersion is "version-capabilityHex" — pure ASCII (e.g. "Dec 19 // 2025-0006"). Safe to read as UTF-8 regardless of client codec. if len(data) >= OffsetModuleVersion+24 { info.ModuleVersion = Utf8CleanString(data[OffsetModuleVersion : OffsetModuleVersion+24]) } _, capability, _ := strings.Cut(info.ModuleVersion, "-") // Peek at Reserved field 0 (RES_CLIENT_TYPE: "Windows" / "LNX" / "MAC") // — pure ASCII, so we can read raw bytes without knowing the codec. // LNX / MAC clients are implicitly UTF-8 even when capability is absent. clientType := "" if len(data) > OffsetReserved { raw := data[OffsetReserved:min(OffsetReserved+512, len(data))] if nul := bytes.IndexByte(raw, 0); nul >= 0 { raw = raw[:nul] } head, _, _ := bytes.Cut(raw, []byte("|")) clientType = string(head) } // Now decode every user-text field with the client's actual codec. decode := func(b []byte) string { return DecodeClientString(b, capability, clientType) } if len(data) >= OffsetOsVerInfoEx+156 { info.OsVerInfo = decode(data[OffsetOsVerInfoEx : OffsetOsVerInfoEx+156]) } if len(data) >= OffsetPCName+240 { info.PCName = decode(data[OffsetPCName : OffsetPCName+240]) } if len(data) >= OffsetMasterID+20 { info.MasterID = decode(data[OffsetMasterID : OffsetMasterID+20]) } if len(data) >= OffsetStartTime+20 { info.StartTime = decode(data[OffsetStartTime : OffsetStartTime+20]) } if len(data) >= OffsetReserved+512 { info.Reserved = decode(data[OffsetReserved : OffsetReserved+512]) } else if len(data) > OffsetReserved { info.Reserved = decode(data[OffsetReserved:]) } return info, nil } // ParseReserved parses the reserved field into a slice of strings func (info *LoginInfo) ParseReserved() []string { if info.Reserved == "" { return nil } return strings.Split(info.Reserved, "|") } // GetReservedField returns a specific field from reserved data by index // Fields: ClientType(0), SystemBits(1), CPU(2), Memory(3), FilePath(4), // Reserved(5), InstallTime(6), InstallInfo(7), ProgramBits(8), ExpiredDate(9), // ClientLoc(10), ClientPubIP(11), ExeVersion(12), Username(13), IsAdmin(14) func (info *LoginInfo) GetReservedField(index int) string { fields := info.ParseReserved() if index >= 0 && index < len(fields) { return fields[index] } return "" } // Validation structure for TOKEN_AUTH type Validation struct { From string // Start date (20 bytes) To string // End date (20 bytes) Admin string // Admin address (100 bytes) Port uint16 // Admin port (2 bytes) MaxDepth uint16 // Max generation depth (2 bytes), 0=cannot generate sub-master Checksum string // HMAC checksum field (16 bytes) } // BuildValidation creates a validation response func BuildValidation(days float64, admin string, port int, maxDepth uint16) []byte { // This would build the validation structure // For now, return a simple structure data := make([]byte, 160) // Size of Validation struct data[0] = TokenAuth // Fill in fields... // From: 20 bytes (offset 0) // To: 20 bytes (offset 20) // Admin: 100 bytes (offset 40) // Port: 2 bytes (offset 140) // MaxDepth: 2 bytes (offset 142) // Checksum: 16 bytes (offset 144) return data }