#import "ScreenHandler.h" #import "H264Encoder.h" #import "InputHandler.h" #import "../client/IOCPClient.h" #import "../common/commands.h" #import "../common/FileTransferV2.h" #import "../common/logger.h" #import "Permissions.h" #import #import #import #import #import // Global client ID (calculated in main.mm) extern uint64_t g_myClientID; ScreenHandler::ScreenHandler(IOCPClient* client) : m_client(client) , m_clientID(0) , m_running(false) , m_width(0) , m_height(0) , m_logicalWidth(0) , m_logicalHeight(0) , m_scaleFactor(1.0) , m_displayID(CGMainDisplayID()) , m_algorithm(ALGORITHM_H264) , m_maxFPS(15) , m_qualityLevel(QUALITY_GOOD) // Use fixed QUALITY_GOOD (H264) for web compatibility , m_h264Bitrate(3000000) // 3 Mbps (matches Windows QUALITY_GOOD) , m_displayAssertionID(0) { memset(&m_bmpHeader, 0, sizeof(m_bmpHeader)); // Initialize input handler for mouse/keyboard control m_inputHandler = std::make_unique(); if (m_inputHandler->init()) { NSLog(@"InputHandler initialized with accessibility permission"); } else { NSLog(@"InputHandler: waiting for accessibility permission"); } } ScreenHandler::~ScreenHandler() { stop(); } bool ScreenHandler::init() { // Check permissions if (!Permissions::checkScreenCapture()) { NSLog(@"Screen capture permission not granted"); return false; } // Get main display info m_displayID = CGMainDisplayID(); // Get physical pixel dimensions (what we capture and send) CGDisplayModeRef mode = CGDisplayCopyDisplayMode(m_displayID); if (mode) { m_width = (int)CGDisplayModeGetPixelWidth(mode); m_height = (int)CGDisplayModeGetPixelHeight(mode); CGDisplayModeRelease(mode); } else { m_width = (int)CGDisplayPixelsWide(m_displayID); m_height = (int)CGDisplayPixelsHigh(m_displayID); } // Get logical point dimensions (what CGEvent uses) // NSScreen provides logical dimensions NSScreen* mainScreen = [NSScreen mainScreen]; if (mainScreen) { NSRect frame = [mainScreen frame]; m_logicalWidth = (int)frame.size.width; m_logicalHeight = (int)frame.size.height; } else { // Fallback: use physical dimensions m_logicalWidth = m_width; m_logicalHeight = m_height; } // Calculate scale factor (Retina displays have factor > 1.0) m_scaleFactor = (double)m_width / (double)m_logicalWidth; NSLog(@"Screen dimensions: physical=%dx%d, logical=%dx%d, scale=%.2f", m_width, m_height, m_logicalWidth, m_logicalHeight, m_scaleFactor); if (m_width <= 0 || m_height <= 0) { NSLog(@"Invalid screen dimensions: %dx%d", m_width, m_height); return false; } // Initialize BITMAPINFOHEADER m_bmpHeader.biSize = sizeof(BITMAPINFOHEADER_MAC); m_bmpHeader.biWidth = m_width; m_bmpHeader.biHeight = m_height; m_bmpHeader.biPlanes = 1; m_bmpHeader.biBitCount = 32; m_bmpHeader.biCompression = 0; // BI_RGB m_bmpHeader.biSizeImage = m_width * m_height * 4; // Allocate frame buffers m_prevFrame.resize(m_bmpHeader.biSizeImage, 0); m_currFrame.resize(m_bmpHeader.biSizeImage, 0); m_diffBuffer.resize(1 + 1 + 8 + 1 + m_bmpHeader.biSizeImage * 2); // Wake display if needed (do this early, before sending TOKEN_BITMAPINFO) bool wasAsleep = CGDisplayIsAsleep(m_displayID); bool isLocked = false; CFDictionaryRef sessionInfo = CGSessionCopyCurrentDictionary(); if (sessionInfo) { CFBooleanRef screenLocked = (CFBooleanRef)CFDictionaryGetValue( sessionInfo, CFSTR("CGSSessionScreenIsLocked")); if (screenLocked && CFBooleanGetValue(screenLocked)) { isLocked = true; } CFRelease(sessionInfo); } if (wasAsleep || isLocked) { NSLog(@"Waking display in init (asleep=%d, locked=%d)...", wasAsleep, isLocked); // Create NoDisplaySleep assertion - this wakes the display if (m_displayAssertionID == 0) { IOReturn result = IOPMAssertionCreateWithName( kIOPMAssertionTypeNoDisplaySleep, kIOPMAssertionLevelOn, CFSTR("SimpleRemoter - remote desktop session active"), &m_displayAssertionID ); if (result == kIOReturnSuccess) { NSLog(@"Display assertion created (ID: %u)", m_displayAssertionID); } } // Declare user activity to ensure wake IOPMAssertionID wakeAssertionID = 0; IOPMAssertionDeclareUserActivity( CFSTR("SimpleRemoter - waking display"), kIOPMUserActiveLocal, &wakeAssertionID ); if (wakeAssertionID) { IOPMAssertionRelease(wakeAssertionID); } // Brief wait for loginwindow to render std::this_thread::sleep_for(std::chrono::milliseconds(500)); NSLog(@"Display wake complete"); } NSLog(@"ScreenHandler initialized: %dx%d", m_width, m_height); return true; } void ScreenHandler::start(IOCPClient* client, uint64_t clientID) { // If already running, just send TOKEN_BITMAPINFO again // This allows server to create additional dialogs (MFC can open while Web is active) if (m_running) { NSLog(@"ScreenHandler already running, sending TOKEN_BITMAPINFO for new dialog"); sendBitmapInfo(); return; } m_client = client; m_clientID = clientID; m_running = true; // Display wake was already done in init(), just ensure assertion exists if (m_displayAssertionID == 0) { IOReturn result = IOPMAssertionCreateWithName( kIOPMAssertionTypeNoDisplaySleep, kIOPMAssertionLevelOn, CFSTR("SimpleRemoter - remote desktop session active"), &m_displayAssertionID ); if (result == kIOReturnSuccess) { NSLog(@"Display sleep disabled (ID: %u)", m_displayAssertionID); } } m_captureThread = std::thread(&ScreenHandler::captureLoop, this); } void ScreenHandler::stop() { m_running = false; if (m_captureThread.joinable()) { m_captureThread.join(); } // Close H264 encoder if open if (m_h264Encoder) { m_h264Encoder->close(); m_h264Encoder.reset(); } // Release display sleep assertion - allow screen to turn off if (m_displayAssertionID != 0) { IOPMAssertionRelease(m_displayAssertionID); NSLog(@"Display sleep re-enabled (released ID: %u)", m_displayAssertionID); m_displayAssertionID = 0; } } void ScreenHandler::sendBitmapInfo() { if (!m_client) return; // Build packet: [TOKEN_BITMAPINFO][BITMAPINFOHEADER][clientID][reserved][ScreenSettings] // ScreenSettings defined in commands.h (100 bytes), QualityLevel at offset 32 const uint32_t len = 1 + sizeof(BITMAPINFOHEADER_MAC) + 2 * sizeof(uint64_t) + sizeof(ScreenSettings); std::vector buf(len, 0); buf[0] = TOKEN_BITMAPINFO; memcpy(&buf[1], &m_bmpHeader, sizeof(BITMAPINFOHEADER_MAC)); uint64_t clientID = g_myClientID; memcpy(&buf[1 + sizeof(BITMAPINFOHEADER_MAC)], &clientID, sizeof(uint64_t)); ScreenSettings settings = {}; settings.MaxFPS = m_maxFPS.load(); settings.QualityLevel = m_qualityLevel; // Fixed quality level (e.g., QUALITY_GOOD = 2) memcpy(&buf[1 + sizeof(BITMAPINFOHEADER_MAC) + 2 * sizeof(uint64_t)], &settings, sizeof(ScreenSettings)); m_client->Send2Server((char*)buf.data(), len); NSLog(@"SendBitmapInfo: clientID=%llu, QualityLevel=%d, SettingsSize=%zu", clientID, m_qualityLevel, sizeof(ScreenSettings)); } void ScreenHandler::OnReceive(uint8_t* data, ULONG size) { if (!size) return; switch (data[0]) { case COMMAND_NEXT: // Server ready, handled externally NSLog(@"Received COMMAND_NEXT from server"); if (!m_running) { start(m_client, g_myClientID); } break; case COMMAND_SCREEN_CONTROL: // Handle mouse/keyboard control commands // Protocol: [COMMAND_SCREEN_CONTROL:1][MSG64:48] if (size >= 1 + sizeof(MSG64_MAC) && m_inputHandler) { MSG64_MAC msg; memcpy(&msg, data + 1, sizeof(MSG64_MAC)); // Convert physical pixel coordinates to logical point coordinates // Server sends coordinates in physical pixels (matching our captured screen) // CGEvent expects logical points (for Retina displays, physical/scale) if (m_scaleFactor > 1.0) { // Extract coordinates from lParam (MAKELPARAM format: low=x, high=y) int x = (int)(msg.lParam & 0xFFFF); int y = (int)((msg.lParam >> 16) & 0xFFFF); // Scale down to logical coordinates x = (int)(x / m_scaleFactor); y = (int)(y / m_scaleFactor); // Update lParam with scaled coordinates msg.lParam = (uint64_t)x | ((uint64_t)y << 16); msg.pt_x = x; msg.pt_y = y; } m_inputHandler->handleInputEvent(&msg); } break; case CMD_QUALITY_LEVEL: if (size >= 2) { int8_t level = (int8_t)data[1]; bool persist = (size >= 3) ? data[2] : false; applyQualityLevel(level, persist); } break; case COMMAND_GET_FILE: // Server requests file download: [cmd:1][targetDir\0][file1\0file2\0...\0] // Use V2 protocol to upload files { if (size < 3) break; // Parse target directory (GBK encoding) const char* ptr = (const char*)(data + 1); const char* end = (const char*)(data + size); std::string targetDirGbk = ptr; std::string targetDir = FileTransferV2::gbkToUtf8(targetDirGbk); ptr += targetDirGbk.length() + 1; // Parse file list std::vector files; while (ptr < end && *ptr != '\0') { std::string fileGbk = ptr; files.push_back(FileTransferV2::gbkToUtf8(fileGbk)); ptr += fileGbk.length() + 1; } // TODO: If no file list, get from clipboard (ClipboardHandler not implemented yet) if (!files.empty() && !targetDir.empty()) { NSLog(@">>> COMMAND_GET_FILE: %zu files -> %s", files.size(), targetDir.c_str()); // Use V2 protocol to send files IOCPClient* client = m_client; std::thread([files, targetDir, client]() { // Collect all files (expand directories) std::vector allFiles; std::vector rootCandidates; for (const auto& path : files) { struct stat st; if (stat(path.c_str(), &st) != 0) continue; if (S_ISDIR(st.st_mode)) { std::string dirPath = path; if (dirPath.back() != '/') dirPath += '/'; size_t pos = dirPath.rfind('/', dirPath.length() - 2); std::string parentPath = (pos != std::string::npos) ? dirPath.substr(0, pos + 1) : dirPath; rootCandidates.push_back(parentPath); FileTransferV2::CollectFiles(dirPath, allFiles); } else { rootCandidates.push_back(path); allFiles.push_back(path); } } if (allFiles.empty()) { NSLog(@"*** No files to send"); return; } std::string commonRoot = FileTransferV2::GetCommonRoot(rootCandidates); NSLog(@">>> Sending %zu files, root=%s", allFiles.size(), commonRoot.c_str()); FileTransferV2::SendFilesV2(allFiles, targetDir, commonRoot, client, g_myClientID); }).detach(); } else { NSLog(@"*** COMMAND_GET_FILE: no files or empty target"); } } break; default: break; } } void ScreenHandler::applyQualityLevel(int8_t level, bool persist) { m_qualityLevel = level; if (level == QUALITY_DISABLED) { NSLog(@"Quality: Disabled"); return; } // Quality profiles: [FPS, Algorithm] // H264 provides best compression for remote desktop // Note: macOS uses slightly higher FPS than Windows for smoother experience static const int profiles[QUALITY_COUNT][2] = { {5, ALGORITHM_GRAY}, // Level 0: Emergency (very low bandwidth) {10, ALGORITHM_RGB565}, // Level 1: Low {15, ALGORITHM_H264}, // Level 2: Medium (office work default) {20, ALGORITHM_H264}, // Level 3: Good {25, ALGORITHM_H264}, // Level 4: High {30, ALGORITHM_H264}, // Level 5: Smooth }; if (level >= 0 && level < QUALITY_COUNT) { m_maxFPS.store(profiles[level][0]); m_algorithm.store(profiles[level][1]); NSLog(@"Quality: Level=%d, FPS=%d, Algo=%d", level, profiles[level][0], profiles[level][1]); } else { NSLog(@"Quality: Adaptive mode"); } } bool ScreenHandler::captureScreen(std::vector& buffer) { // Create image from display CGImageRef image = CGDisplayCreateImage(m_displayID); if (!image) { NSLog(@"Failed to capture screen image"); return false; } size_t width = CGImageGetWidth(image); size_t height = CGImageGetHeight(image); if (width != (size_t)m_width || height != (size_t)m_height) { // Screen resolution changed, need to reinitialize CGImageRelease(image); NSLog(@"Screen resolution changed: %zux%zu", width, height); return false; } // Create bitmap context to get raw pixel data CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); size_t bytesPerRow = width * 4; // Temporary buffer for top-down BGRA std::vector tempBuffer(bytesPerRow * height); CGContextRef context = CGBitmapContextCreate( tempBuffer.data(), width, height, 8, bytesPerRow, colorSpace, kCGImageAlphaPremultipliedFirst | kCGBitmapByteOrder32Little // BGRA ); CGColorSpaceRelease(colorSpace); if (!context) { CGImageRelease(image); NSLog(@"Failed to create bitmap context"); return false; } // Draw image into context CGContextDrawImage(context, CGRectMake(0, 0, width, height), image); CGContextRelease(context); CGImageRelease(image); // Flip vertically (BMP is bottom-up, CGImage is top-down) for (size_t y = 0; y < height; y++) { size_t srcRow = y; size_t dstRow = height - 1 - y; memcpy(buffer.data() + dstRow * bytesPerRow, tempBuffer.data() + srcRow * bytesPerRow, bytesPerRow); } return true; } void ScreenHandler::sendFirstScreen() { if (!captureScreen(m_currFrame)) return; if (!m_client) return; uint32_t imgSize = m_bmpHeader.biSizeImage; std::vector buf(1 + imgSize); buf[0] = TOKEN_FIRSTSCREEN; memcpy(&buf[1], m_currFrame.data(), imgSize); m_client->Send2Server((char*)buf.data(), buf.size()); // Save as previous frame m_prevFrame = m_currFrame; } void ScreenHandler::sendDiffFrame() { if (!captureScreen(m_currFrame)) return; if (!m_client) return; uint8_t* out = m_diffBuffer.data(); out[0] = TOKEN_NEXTSCREEN; uint8_t* data = out + 1; // Write algorithm type uint8_t algo = m_algorithm.load(); memcpy(data, &algo, sizeof(uint8_t)); // Write cursor position int32_t cursorX, cursorY; getCursorPosition(cursorX, cursorY); memcpy(data + 1, &cursorX, sizeof(int32_t)); memcpy(data + 1 + sizeof(int32_t), &cursorY, sizeof(int32_t)); // Write cursor type uint8_t cursorType = getCursorTypeIndex(); memcpy(data + 1 + 2 * sizeof(int32_t), &cursorType, sizeof(uint8_t)); uint32_t headerSize = 1 + 2 * sizeof(int32_t) + 1; uint8_t* diffData = data + headerSize; uint32_t diffLen = compareBitmap(m_currFrame.data(), m_prevFrame.data(), diffData, m_bmpHeader.biSizeImage, algo); uint32_t totalLen = 1 + headerSize + diffLen; m_client->Send2Server((char*)out, totalLen); // Update previous frame std::swap(m_prevFrame, m_currFrame); } void ScreenHandler::sendH264Frame(bool keyframe) { if (!captureScreen(m_currFrame)) return; if (!m_client) return; // Initialize encoder if needed if (!m_h264Encoder) { m_h264Encoder = std::make_unique(); int fps = m_maxFPS.load(); if (fps <= 0) fps = 30; if (!m_h264Encoder->open(m_width, m_height, fps, m_h264Bitrate)) { NSLog(@"Failed to initialize H264 encoder: %s", m_h264Encoder->getLastError()); m_h264Encoder.reset(); return; } NSLog(@"H264 encoder initialized: %dx%d @ %d fps", m_width, m_height, fps); } // Force keyframe if requested if (keyframe) { m_h264Encoder->forceKeyframe(); } // Encode frame uint8_t* encodedData = nullptr; uint32_t encodedSize = 0; uint32_t stride = m_width * 4; int result = m_h264Encoder->encode( m_currFrame.data(), 32, // bpp stride, m_width, m_height, &encodedData, &encodedSize, false // Don't flip - keep bottom-up format like Windows client ); if (result <= 0 || !encodedData || encodedSize == 0) { return; } // Build packet: [TOKEN_NEXTSCREEN][ALGORITHM_H264][CursorX][CursorY][CursorType][H264Data] // Note: H264 always uses TOKEN_NEXTSCREEN because: // - Server's TOKEN_KEYFRAME handler does nothing for H264 (just break) // - Server's TOKEN_NEXTSCREEN handler calls Decode() for H264 // - H264 encoder manages keyframes (I-frames) internally // - FFmpeg decoder auto-detects I-frames vs P-frames uint32_t headerSize = 1 + 1 + 2 * sizeof(int32_t) + 1; std::vector packet(headerSize + encodedSize); packet[0] = TOKEN_NEXTSCREEN; packet[1] = ALGORITHM_H264; // Cursor position int32_t cursorX, cursorY; getCursorPosition(cursorX, cursorY); memcpy(&packet[2], &cursorX, sizeof(int32_t)); memcpy(&packet[2 + sizeof(int32_t)], &cursorY, sizeof(int32_t)); // Cursor type packet[2 + 2 * sizeof(int32_t)] = getCursorTypeIndex(); // H264 data memcpy(&packet[headerSize], encodedData, encodedSize); m_client->Send2Server((char*)packet.data(), packet.size()); } uint32_t ScreenHandler::compareBitmap(const uint8_t* curr, const uint8_t* prev, uint8_t* outBuf, uint32_t totalBytes, uint8_t algo) { const uint32_t bytesPerPixel = 4; const uint32_t totalPixels = totalBytes / bytesPerPixel; const uint32_t gapThreshold = 8; const uint32_t ratio = (algo == ALGORITHM_GRAY || algo == ALGORITHM_RGB565) ? 4 : 1; uint32_t outOffset = 0; uint32_t i = 0; while (i < totalPixels) { // Skip identical pixels while (i < totalPixels && *(uint32_t*)(curr + i * 4) == *(uint32_t*)(prev + i * 4)) { i++; } if (i >= totalPixels) break; uint32_t start = i; uint32_t lastDiff = i; while (i < totalPixels) { if (*(uint32_t*)(curr + i * 4) != *(uint32_t*)(prev + i * 4)) { lastDiff = i; } else if (i - lastDiff > gapThreshold) { break; } i++; } uint32_t end = lastDiff + 1; uint32_t count = end - start; uint32_t byteOffset = start * bytesPerPixel; uint32_t byteCount = count * bytesPerPixel; // Write byteOffset memcpy(outBuf + outOffset, &byteOffset, sizeof(uint32_t)); outOffset += sizeof(uint32_t); // Write length uint32_t lengthField = byteCount / ratio; memcpy(outBuf + outOffset, &lengthField, sizeof(uint32_t)); outOffset += sizeof(uint32_t); // Write pixel data const uint8_t* srcData = curr + byteOffset; if (algo == ALGORITHM_RGB565) { convertBGRAtoRGB565(srcData, (uint16_t*)(outBuf + outOffset), count); outOffset += count * 2; } else if (algo == ALGORITHM_GRAY) { convertBGRAtoGray(srcData, outBuf + outOffset, count); outOffset += count; } else { memcpy(outBuf + outOffset, srcData, byteCount); outOffset += byteCount; } } return outOffset; } void ScreenHandler::convertBGRAtoGray(const uint8_t* src, uint8_t* dst, uint32_t pixelCount) { for (uint32_t i = 0; i < pixelCount; i++) { uint8_t b = src[i * 4 + 0]; uint8_t g = src[i * 4 + 1]; uint8_t r = src[i * 4 + 2]; dst[i] = (uint8_t)((306 * r + 601 * g + 117 * b) >> 10); } } void ScreenHandler::convertBGRAtoRGB565(const uint8_t* src, uint16_t* dst, uint32_t pixelCount) { for (uint32_t i = 0; i < pixelCount; i++) { uint8_t b = src[i * 4 + 0]; uint8_t g = src[i * 4 + 1]; uint8_t r = src[i * 4 + 2]; uint16_t r5 = (r >> 3) & 0x1F; uint16_t g6 = (g >> 2) & 0x3F; uint16_t b5 = (b >> 3) & 0x1F; dst[i] = (r5 << 11) | (g6 << 5) | b5; } } uint64_t ScreenHandler::getTickMs() { static mach_timebase_info_data_t timebase = {0, 0}; if (timebase.denom == 0) { mach_timebase_info(&timebase); } uint64_t now = mach_absolute_time(); return (now * timebase.numer / timebase.denom) / 1000000; } // Cached logical cursor position (shared between getCursorPosition and getCursorTypeIndex) static CGPoint s_cachedLogicalPos = {0, 0}; void ScreenHandler::getCursorPosition(int32_t& x, int32_t& y) { // Get cursor position in logical (point) coordinates CGEventRef event = CGEventCreate(nullptr); s_cachedLogicalPos = CGEventGetLocation(event); CFRelease(event); // Convert to physical pixel coordinates (for Retina displays) x = (int32_t)(s_cachedLogicalPos.x * m_scaleFactor); y = (int32_t)(s_cachedLogicalPos.y * m_scaleFactor); // Clamp to screen bounds if (x < 0) x = 0; if (y < 0) y = 0; if (x >= m_width) x = m_width - 1; if (y >= m_height) y = m_height - 1; } uint8_t ScreenHandler::getCursorTypeIndex() { // Windows cursor type indices (from CursorInfo.h): // 0: IDC_APPSTARTING, 1: IDC_ARROW, 2: IDC_CROSS, 3: IDC_HAND, // 4: IDC_HELP, 5: IDC_IBEAM, 6: IDC_ICON, 7: IDC_NO, // 8: IDC_SIZE, 9: IDC_SIZEALL, 10: IDC_SIZENESW, 11: IDC_SIZENS, // 12: IDC_SIZENWSE, 13: IDC_SIZEWE, 14: IDC_UPARROW, 15: IDC_WAIT // NSCursor.currentSystemCursor doesn't work for background daemons. // Use Accessibility API to infer cursor type from the UI element under cursor. // Throttle to avoid performance impact (check every 100ms) static uint8_t cachedIndex = 1; static uint64_t lastCheckTime = 0; static CGPoint lastPos = {-1, -1}; // Reuse cursor position from getCursorPosition (called before this) CGPoint pos = s_cachedLogicalPos; // Throttle: only check if cursor moved significantly or 100ms elapsed uint64_t now = getTickMs(); bool posChanged = (fabs(pos.x - lastPos.x) > 5 || fabs(pos.y - lastPos.y) > 5); if (!posChanged && (now - lastCheckTime) < 100) { return cachedIndex; } lastCheckTime = now; lastPos = pos; uint8_t index = 1; // Default to arrow // Get the UI element at cursor position using Accessibility API AXUIElementRef systemWide = AXUIElementCreateSystemWide(); AXUIElementRef element = nullptr; AXError err = AXUIElementCopyElementAtPosition(systemWide, (float)pos.x, (float)pos.y, &element); CFRelease(systemWide); if (err == kAXErrorSuccess && element) { // Get the role of the element CFTypeRef roleRef = nullptr; if (AXUIElementCopyAttributeValue(element, kAXRoleAttribute, &roleRef) == kAXErrorSuccess && roleRef) { NSString* role = (__bridge NSString*)roleRef; // Map UI element roles to cursor types if ([role isEqualToString:NSAccessibilityTextFieldRole] || [role isEqualToString:NSAccessibilityTextAreaRole] || [role isEqualToString:NSAccessibilityStaticTextRole] || [role isEqualToString:@"AXWebArea"]) { // Check if text is editable CFTypeRef editableRef = nullptr; if (AXUIElementCopyAttributeValue(element, CFSTR("AXEditable"), &editableRef) == kAXErrorSuccess) { if (editableRef && CFBooleanGetValue((CFBooleanRef)editableRef)) { index = 5; // IDC_IBEAM for editable text } if (editableRef) CFRelease(editableRef); } else if ([role isEqualToString:NSAccessibilityTextFieldRole] || [role isEqualToString:NSAccessibilityTextAreaRole]) { index = 5; // IDC_IBEAM for text input fields } } else if ([role isEqualToString:NSAccessibilityLinkRole] || [role isEqualToString:@"AXLink"]) { index = 3; // IDC_HAND for links } else if ([role isEqualToString:NSAccessibilityButtonRole]) { index = 3; // IDC_HAND for buttons (clickable) } else if ([role isEqualToString:NSAccessibilitySplitterRole] || [role isEqualToString:@"AXSplitGroup"]) { // Check orientation for resize cursor CFTypeRef orientRef = nullptr; if (AXUIElementCopyAttributeValue(element, CFSTR("AXOrientation"), &orientRef) == kAXErrorSuccess && orientRef) { NSString* orient = (__bridge NSString*)orientRef; if ([orient isEqualToString:@"AXHorizontalOrientation"]) { index = 11; // IDC_SIZENS (vertical resize) } else { index = 13; // IDC_SIZEWE (horizontal resize) } CFRelease(orientRef); } else { index = 13; // IDC_SIZEWE default for splitters } } else if ([role isEqualToString:NSAccessibilityGrowAreaRole]) { index = 12; // IDC_SIZENWSE for resize corners } CFRelease(roleRef); } CFRelease(element); } // Cache the result cachedIndex = index; return index; } void ScreenHandler::captureLoop() { NSLog(@"ScreenHandler CaptureLoop started (%dx%d)", m_width, m_height); uint8_t currentAlgo = m_algorithm.load(); // Always send raw first frame (TOKEN_FIRSTSCREEN) to initialize server display // This matches Windows client behavior: first frame is always raw bitmap, // even in H264 mode. Server needs TOKEN_FIRSTSCREEN to set m_bIsFirst = FALSE. sendFirstScreen(); // Small delay to ensure first frame is processed before H264 stream starts usleep(50000); // 50ms, same as Windows client while (m_running) { uint64_t start = getTickMs(); uint8_t algo = m_algorithm.load(); // Check if algorithm changed if (algo != currentAlgo) { NSLog(@"Algorithm changed: %d -> %d", currentAlgo, algo); currentAlgo = algo; // If switching to/from H264, reset encoder if (algo == ALGORITHM_H264) { // Starting H264 - will be initialized in sendH264Frame sendH264Frame(true); // First H264 frame is keyframe } else if (m_h264Encoder) { // Switching away from H264 - close encoder m_h264Encoder->close(); m_h264Encoder.reset(); sendFirstScreen(); // Send full frame for DIFF modes } } else { // Normal frame if (algo == ALGORITHM_H264) { sendH264Frame(false); } else { sendDiffFrame(); } } int fps = m_maxFPS.load(); if (fps <= 0) fps = 10; int sleepMs = 1000 / fps; int elapsed = (int)(getTickMs() - start); int wait = sleepMs - elapsed; if (wait > 0) { usleep(wait * 1000); } } NSLog(@"ScreenHandler CaptureLoop stopped"); }