Files
SimpleRemoter/macos/ScreenHandler.mm
yuanyuanxiang 92f3df8464 Perf: Optimize macOS screen capture with CGDisplayStream
Core optimization:
- Use CGDisplayStream instead of per-frame CGDisplayCreateImage
- Push model: CPU sleeps when screen is static (condition_variable wait)
- IOSurface capture avoids expensive image creation per frame
- ~47% CPU reduction during active remote desktop (45% → 24%)

Additional optimizations:
- vImageVerticalReflect (SIMD) replaces manual row-by-row flip
- Cache CGColorSpaceRef to avoid per-frame creation/release
- Cache tempBuffer to avoid per-frame memory allocation
- Throttle getCursorTypeIndex to 250ms (Accessibility API is expensive)

Bug fixes:
- Fix unreliable screen capture permission check (use actual capture test)
- Improve permission logging

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-03 23:36:23 +02:00

1145 lines
39 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#import "ScreenHandler.h"
#import "H264Encoder.h"
#import "InputHandler.h"
#import "ClipboardHandler.h"
#import "../client/IOCPClient.h"
#import "../common/commands.h"
#import "../common/FileTransferV2.h"
#import "../common/logger.h"
#import "Permissions.h"
#import <Cocoa/Cocoa.h>
#import <chrono>
#import <CoreGraphics/CoreGraphics.h>
#import <ApplicationServices/ApplicationServices.h>
#import <mach/mach_time.h>
#import <Accelerate/Accelerate.h>
// Global client ID (calculated in main.mm)
extern uint64_t g_myClientID;
ScreenHandler::ScreenHandler(IOCPClient* client)
: m_client(client)
, m_clientID(0)
, m_running(false)
, m_width(0)
, m_height(0)
, m_logicalWidth(0)
, m_logicalHeight(0)
, m_scaleFactor(1.0)
, m_displayID(CGMainDisplayID())
, m_algorithm(ALGORITHM_H264)
, m_maxFPS(15)
, m_qualityLevel(QUALITY_GOOD) // Use fixed QUALITY_GOOD (H264) for web compatibility
, m_h264Bitrate(3000000) // 3 Mbps (matches Windows QUALITY_GOOD)
, m_displayAssertionID(0)
, m_colorSpace(nullptr)
, m_displayStream(nullptr)
, m_streamQueue(nullptr)
, m_latestSurface(nullptr)
, m_hasNewFrame(false)
{
memset(&m_bmpHeader, 0, sizeof(m_bmpHeader));
// Cache color space (avoid per-frame creation)
m_colorSpace = CGColorSpaceCreateDeviceRGB();
// Initialize input handler for mouse/keyboard control
m_inputHandler = std::make_unique<InputHandler>();
if (m_inputHandler->init()) {
NSLog(@"InputHandler initialized with accessibility permission");
} else {
NSLog(@"InputHandler: waiting for accessibility permission");
}
}
ScreenHandler::~ScreenHandler()
{
stop();
cleanupDisplayStream();
// Release cached color space
if (m_colorSpace) {
CGColorSpaceRelease(m_colorSpace);
m_colorSpace = nullptr;
}
}
bool ScreenHandler::init()
{
// Check permissions
if (!Permissions::checkScreenCapture()) {
NSLog(@"Screen capture permission not granted");
return false;
}
// Get main display info
m_displayID = CGMainDisplayID();
// Get physical pixel dimensions (what we capture and send)
CGDisplayModeRef mode = CGDisplayCopyDisplayMode(m_displayID);
if (mode) {
m_width = (int)CGDisplayModeGetPixelWidth(mode);
m_height = (int)CGDisplayModeGetPixelHeight(mode);
CGDisplayModeRelease(mode);
} else {
m_width = (int)CGDisplayPixelsWide(m_displayID);
m_height = (int)CGDisplayPixelsHigh(m_displayID);
}
// Get logical point dimensions (what CGEvent uses)
// NSScreen provides logical dimensions
NSScreen* mainScreen = [NSScreen mainScreen];
if (mainScreen) {
NSRect frame = [mainScreen frame];
m_logicalWidth = (int)frame.size.width;
m_logicalHeight = (int)frame.size.height;
} else {
// Fallback: use physical dimensions
m_logicalWidth = m_width;
m_logicalHeight = m_height;
}
// Calculate scale factor (Retina displays have factor > 1.0)
m_scaleFactor = (double)m_width / (double)m_logicalWidth;
NSLog(@"Screen dimensions: physical=%dx%d, logical=%dx%d, scale=%.2f",
m_width, m_height, m_logicalWidth, m_logicalHeight, m_scaleFactor);
if (m_width <= 0 || m_height <= 0) {
NSLog(@"Invalid screen dimensions: %dx%d", m_width, m_height);
return false;
}
// Initialize BITMAPINFOHEADER
m_bmpHeader.biSize = sizeof(BITMAPINFOHEADER_MAC);
m_bmpHeader.biWidth = m_width;
m_bmpHeader.biHeight = m_height;
m_bmpHeader.biPlanes = 1;
m_bmpHeader.biBitCount = 32;
m_bmpHeader.biCompression = 0; // BI_RGB
m_bmpHeader.biSizeImage = m_width * m_height * 4;
// Allocate frame buffers
m_prevFrame.resize(m_bmpHeader.biSizeImage, 0);
m_currFrame.resize(m_bmpHeader.biSizeImage, 0);
m_diffBuffer.resize(1 + 1 + 8 + 1 + m_bmpHeader.biSizeImage * 2);
// Wake display if needed (do this early, before sending TOKEN_BITMAPINFO)
bool wasAsleep = CGDisplayIsAsleep(m_displayID);
bool isLocked = false;
CFDictionaryRef sessionInfo = CGSessionCopyCurrentDictionary();
if (sessionInfo) {
CFBooleanRef screenLocked = (CFBooleanRef)CFDictionaryGetValue(
sessionInfo, CFSTR("CGSSessionScreenIsLocked"));
if (screenLocked && CFBooleanGetValue(screenLocked)) {
isLocked = true;
}
CFRelease(sessionInfo);
}
if (wasAsleep || isLocked) {
NSLog(@"Waking display in init (asleep=%d, locked=%d)...", wasAsleep, isLocked);
// Create NoDisplaySleep assertion - this wakes the display
if (m_displayAssertionID == 0) {
IOReturn result = IOPMAssertionCreateWithName(
kIOPMAssertionTypeNoDisplaySleep,
kIOPMAssertionLevelOn,
CFSTR("SimpleRemoter - remote desktop session active"),
&m_displayAssertionID
);
if (result == kIOReturnSuccess) {
NSLog(@"Display assertion created (ID: %u)", m_displayAssertionID);
}
}
// Declare user activity to ensure wake
IOPMAssertionID wakeAssertionID = 0;
IOPMAssertionDeclareUserActivity(
CFSTR("SimpleRemoter - waking display"),
kIOPMUserActiveLocal,
&wakeAssertionID
);
if (wakeAssertionID) {
IOPMAssertionRelease(wakeAssertionID);
}
// Brief wait for loginwindow to render
std::this_thread::sleep_for(std::chrono::milliseconds(500));
NSLog(@"Display wake complete");
}
// Initialize CGDisplayStream for efficient capture
if (!initDisplayStream()) {
NSLog(@"Warning: CGDisplayStream init failed, falling back to legacy capture");
}
NSLog(@"ScreenHandler initialized: %dx%d", m_width, m_height);
return true;
}
bool ScreenHandler::initDisplayStream()
{
// Create dispatch queue for stream callbacks
m_streamQueue = dispatch_queue_create("com.ghost.screenstream", DISPATCH_QUEUE_SERIAL);
if (!m_streamQueue) {
NSLog(@"Failed to create dispatch queue for display stream");
return false;
}
// Stream properties
CFMutableDictionaryRef properties = CFDictionaryCreateMutable(
kCFAllocatorDefault, 0,
&kCFTypeDictionaryKeyCallBacks,
&kCFTypeDictionaryValueCallBacks
);
// Request minimum frame interval based on FPS (e.g., 15 FPS = 1/15 sec)
int fps = m_maxFPS.load();
if (fps <= 0) fps = 15;
double interval = 1.0 / (double)fps;
CFNumberRef intervalRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberDoubleType, &interval);
CFDictionarySetValue(properties, kCGDisplayStreamMinimumFrameTime, intervalRef);
CFRelease(intervalRef);
// Show cursor in stream
CFDictionarySetValue(properties, kCGDisplayStreamShowCursor, kCFBooleanFalse);
// Preserve aspect ratio
CFDictionarySetValue(properties, kCGDisplayStreamPreserveAspectRatio, kCFBooleanTrue);
// Create the display stream with BGRA format
__block ScreenHandler* handler = this;
m_displayStream = CGDisplayStreamCreateWithDispatchQueue(
m_displayID,
m_width,
m_height,
'BGRA', // Pixel format
properties,
m_streamQueue,
^(CGDisplayStreamFrameStatus status,
uint64_t displayTime,
IOSurfaceRef frameSurface,
CGDisplayStreamUpdateRef updateRef) {
(void)displayTime;
(void)updateRef;
if (status == kCGDisplayStreamFrameStatusFrameComplete && frameSurface) {
handler->processIOSurface(frameSurface);
} else if (status == kCGDisplayStreamFrameStatusFrameIdle) {
// Screen not changed, still notify for FPS timing
handler->m_hasNewFrame.store(true);
handler->m_surfaceCond.notify_one();
} else if (status == kCGDisplayStreamFrameStatusStopped) {
NSLog(@"CGDisplayStream stopped");
}
}
);
CFRelease(properties);
if (!m_displayStream) {
NSLog(@"Failed to create CGDisplayStream");
m_streamQueue = nullptr; // ARC manages dispatch objects
return false;
}
// Start the stream
CGError err = CGDisplayStreamStart(m_displayStream);
if (err != kCGErrorSuccess) {
NSLog(@"Failed to start CGDisplayStream: %d", err);
CFRelease(m_displayStream);
m_displayStream = nullptr;
m_streamQueue = nullptr; // ARC manages dispatch objects
return false;
}
NSLog(@"CGDisplayStream started: %dx%d @ %d FPS", m_width, m_height, fps);
return true;
}
void ScreenHandler::cleanupDisplayStream()
{
if (m_displayStream) {
CGDisplayStreamStop(m_displayStream);
CFRelease(m_displayStream);
m_displayStream = nullptr;
}
// ARC manages dispatch objects, just nil the pointer
m_streamQueue = nullptr;
std::lock_guard<std::mutex> lock(m_surfaceMutex);
if (m_latestSurface) {
CFRelease(m_latestSurface);
m_latestSurface = nullptr;
}
}
void ScreenHandler::processIOSurface(IOSurfaceRef surface)
{
// Retain the surface and store it
std::lock_guard<std::mutex> lock(m_surfaceMutex);
if (m_latestSurface) {
CFRelease(m_latestSurface);
}
m_latestSurface = (IOSurfaceRef)CFRetain(surface);
m_hasNewFrame.store(true);
m_surfaceCond.notify_one();
}
bool ScreenHandler::captureFromIOSurface(IOSurfaceRef surface, std::vector<uint8_t>& buffer)
{
if (!surface) return false;
// Lock the surface for CPU read
IOSurfaceLock(surface, kIOSurfaceLockReadOnly, nullptr);
size_t width = IOSurfaceGetWidth(surface);
size_t height = IOSurfaceGetHeight(surface);
size_t bytesPerRow = IOSurfaceGetBytesPerRow(surface);
void* baseAddr = IOSurfaceGetBaseAddress(surface);
if (!baseAddr || width != (size_t)m_width || height != (size_t)m_height) {
IOSurfaceUnlock(surface, kIOSurfaceLockReadOnly, nullptr);
return false;
}
// Ensure temp buffer is allocated
size_t requiredSize = m_width * 4 * m_height;
if (m_tempBuffer.size() != requiredSize) {
m_tempBuffer.resize(requiredSize);
}
// Copy from IOSurface to temp buffer (handle different bytesPerRow)
size_t dstBytesPerRow = m_width * 4;
if (bytesPerRow == dstBytesPerRow) {
memcpy(m_tempBuffer.data(), baseAddr, requiredSize);
} else {
// Row by row copy for different strides
uint8_t* src = (uint8_t*)baseAddr;
uint8_t* dst = m_tempBuffer.data();
for (size_t y = 0; y < height; y++) {
memcpy(dst + y * dstBytesPerRow, src + y * bytesPerRow, dstBytesPerRow);
}
}
IOSurfaceUnlock(surface, kIOSurfaceLockReadOnly, nullptr);
// Flip vertically using Accelerate framework (SIMD optimized)
vImage_Buffer src = {
.data = m_tempBuffer.data(),
.height = (vImagePixelCount)height,
.width = (vImagePixelCount)width,
.rowBytes = dstBytesPerRow
};
vImage_Buffer dst = {
.data = buffer.data(),
.height = (vImagePixelCount)height,
.width = (vImagePixelCount)width,
.rowBytes = dstBytesPerRow
};
vImage_Error err = vImageVerticalReflect_ARGB8888(&src, &dst, kvImageNoFlags);
if (err != kvImageNoError) {
// Fallback to manual flip
for (size_t y = 0; y < height; y++) {
memcpy(buffer.data() + (height - 1 - y) * dstBytesPerRow,
m_tempBuffer.data() + y * dstBytesPerRow,
dstBytesPerRow);
}
}
return true;
}
void ScreenHandler::start(IOCPClient* client, uint64_t clientID)
{
// If already running, just send TOKEN_BITMAPINFO again
// This allows server to create additional dialogs (MFC can open while Web is active)
if (m_running) {
NSLog(@"ScreenHandler already running, sending TOKEN_BITMAPINFO for new dialog");
sendBitmapInfo();
return;
}
m_client = client;
m_clientID = clientID;
m_running = true;
// Display wake was already done in init(), just ensure assertion exists
if (m_displayAssertionID == 0) {
IOReturn result = IOPMAssertionCreateWithName(
kIOPMAssertionTypeNoDisplaySleep,
kIOPMAssertionLevelOn,
CFSTR("SimpleRemoter - remote desktop session active"),
&m_displayAssertionID
);
if (result == kIOReturnSuccess) {
NSLog(@"Display sleep disabled (ID: %u)", m_displayAssertionID);
}
}
m_captureThread = std::thread(&ScreenHandler::captureLoop, this);
}
void ScreenHandler::stop()
{
m_running = false;
// Wake up capture thread if waiting
m_surfaceCond.notify_all();
if (m_captureThread.joinable()) {
m_captureThread.join();
}
// Close H264 encoder if open
if (m_h264Encoder) {
m_h264Encoder->close();
m_h264Encoder.reset();
}
// Release display sleep assertion - allow screen to turn off
if (m_displayAssertionID != 0) {
IOPMAssertionRelease(m_displayAssertionID);
NSLog(@"Display sleep re-enabled (released ID: %u)", m_displayAssertionID);
m_displayAssertionID = 0;
}
}
void ScreenHandler::sendBitmapInfo()
{
if (!m_client) return;
// Build packet: [TOKEN_BITMAPINFO][BITMAPINFOHEADER][clientID][reserved][ScreenSettings]
// ScreenSettings defined in commands.h (100 bytes), QualityLevel at offset 32
const uint32_t len = 1 + sizeof(BITMAPINFOHEADER_MAC) + 2 * sizeof(uint64_t) + sizeof(ScreenSettings);
std::vector<uint8_t> buf(len, 0);
buf[0] = TOKEN_BITMAPINFO;
memcpy(&buf[1], &m_bmpHeader, sizeof(BITMAPINFOHEADER_MAC));
uint64_t clientID = g_myClientID;
memcpy(&buf[1 + sizeof(BITMAPINFOHEADER_MAC)], &clientID, sizeof(uint64_t));
ScreenSettings settings = {};
settings.MaxFPS = m_maxFPS.load();
settings.QualityLevel = m_qualityLevel; // Fixed quality level (e.g., QUALITY_GOOD = 2)
memcpy(&buf[1 + sizeof(BITMAPINFOHEADER_MAC) + 2 * sizeof(uint64_t)], &settings, sizeof(ScreenSettings));
m_client->Send2Server((char*)buf.data(), len);
NSLog(@"SendBitmapInfo: clientID=%llu, QualityLevel=%d, SettingsSize=%zu",
clientID, m_qualityLevel, sizeof(ScreenSettings));
}
void ScreenHandler::OnReceive(uint8_t* data, ULONG size)
{
if (!size) return;
switch (data[0]) {
case COMMAND_NEXT:
// Server ready, handled externally
NSLog(@"Received COMMAND_NEXT from server");
if (!m_running) {
start(m_client, g_myClientID);
}
break;
case COMMAND_SCREEN_CONTROL:
// Handle mouse/keyboard control commands
// Protocol: [COMMAND_SCREEN_CONTROL:1][MSG64:48]
if (size >= 1 + sizeof(MSG64_MAC) && m_inputHandler) {
MSG64_MAC msg;
memcpy(&msg, data + 1, sizeof(MSG64_MAC));
// Convert physical pixel coordinates to logical point coordinates
// Server sends coordinates in physical pixels (matching our captured screen)
// CGEvent expects logical points (for Retina displays, physical/scale)
if (m_scaleFactor > 1.0) {
// Extract coordinates from lParam (MAKELPARAM format: low=x, high=y)
int x = (int)(msg.lParam & 0xFFFF);
int y = (int)((msg.lParam >> 16) & 0xFFFF);
// Scale down to logical coordinates
x = (int)(x / m_scaleFactor);
y = (int)(y / m_scaleFactor);
// Update lParam with scaled coordinates
msg.lParam = (uint64_t)x | ((uint64_t)y << 16);
msg.pt_x = x;
msg.pt_y = y;
}
m_inputHandler->handleInputEvent(&msg);
}
break;
case CMD_QUALITY_LEVEL:
if (size >= 2) {
int8_t level = (int8_t)data[1];
bool persist = (size >= 3) ? data[2] : false;
applyQualityLevel(level, persist);
}
break;
case COMMAND_SCREEN_SET_CLIPBOARD:
// 服务端设置剪贴板: [cmd:1][text:N]
if (size > 1) {
if (ClipboardHandler::SetTextRaw((const char*)(data + 1), size - 1)) {
NSLog(@">>> Clipboard SET: %zu bytes", size - 1);
} else {
NSLog(@"*** Clipboard SET failed");
}
}
break;
case COMMAND_SCREEN_GET_CLIPBOARD:
// 服务端请求剪贴板: [cmd:1][hash:64][hmac:16]
// 返回: [TOKEN_CLIPBOARD_TEXT:1][text:N] 或 [COMMAND_GET_FOLDER:1][files]
{
// 优先检查剪贴板中的文件
auto files = ClipboardHandler::GetFiles();
if (!files.empty()) {
// 返回 COMMAND_GET_FOLDER + 文件列表多字符串格式file1\0file2\0\0
std::vector<uint8_t> buf;
buf.push_back(COMMAND_GET_FOLDER);
for (const auto& f : files) {
// 文件路径需要转换为 GBK 编码(服务端预期)
std::string gbkPath = FileTransferV2::utf8ToGbk(f);
buf.insert(buf.end(), gbkPath.begin(), gbkPath.end());
buf.push_back(0); // 每个路径后的 null 终止符
}
buf.push_back(0); // 结束标记
m_client->Send2Server((char*)buf.data(), buf.size());
NSLog(@">>> Clipboard GET: %zu files", files.size());
break;
}
// 没有文件,返回文本
std::string text = ClipboardHandler::GetText();
if (!text.empty()) {
std::vector<uint8_t> buf(1 + text.size());
buf[0] = TOKEN_CLIPBOARD_TEXT;
memcpy(&buf[1], text.data(), text.size());
m_client->Send2Server((char*)buf.data(), buf.size());
NSLog(@">>> Clipboard GET: %zu bytes text", text.size());
} else {
// 返回空剪贴板
uint8_t empty = TOKEN_CLIPBOARD_TEXT;
m_client->Send2Server((char*)&empty, 1);
NSLog(@">>> Clipboard GET: empty");
}
}
break;
case COMMAND_GET_FILE:
// Server requests file download: [cmd:1][targetDir\0][file1\0file2\0...\0]
// Use V2 protocol to upload files
{
if (size < 3) break;
// Parse target directory (GBK encoding)
const char* ptr = (const char*)(data + 1);
const char* end = (const char*)(data + size);
std::string targetDirGbk = ptr;
std::string targetDir = FileTransferV2::gbkToUtf8(targetDirGbk);
ptr += targetDirGbk.length() + 1;
// Parse file list
std::vector<std::string> files;
while (ptr < end && *ptr != '\0') {
std::string fileGbk = ptr;
files.push_back(FileTransferV2::gbkToUtf8(fileGbk));
ptr += fileGbk.length() + 1;
}
// 如果没有文件列表,从剪贴板获取
if (files.empty()) {
files = ClipboardHandler::GetFiles();
}
if (!files.empty() && !targetDir.empty()) {
NSLog(@">>> COMMAND_GET_FILE: %zu files -> %s", files.size(), targetDir.c_str());
// Use V2 protocol to send files
IOCPClient* client = m_client;
std::thread([files, targetDir, client]() {
// Collect all files (expand directories)
std::vector<std::string> allFiles;
std::vector<std::string> rootCandidates;
for (const auto& path : files) {
struct stat st;
if (stat(path.c_str(), &st) != 0) continue;
if (S_ISDIR(st.st_mode)) {
std::string dirPath = path;
if (dirPath.back() != '/') dirPath += '/';
size_t pos = dirPath.rfind('/', dirPath.length() - 2);
std::string parentPath = (pos != std::string::npos) ? dirPath.substr(0, pos + 1) : dirPath;
rootCandidates.push_back(parentPath);
FileTransferV2::CollectFiles(dirPath, allFiles);
} else {
rootCandidates.push_back(path);
allFiles.push_back(path);
}
}
if (allFiles.empty()) {
NSLog(@"*** No files to send");
return;
}
std::string commonRoot = FileTransferV2::GetCommonRoot(rootCandidates);
NSLog(@">>> Sending %zu files, root=%s", allFiles.size(), commonRoot.c_str());
FileTransferV2::SendFilesV2(allFiles, targetDir, commonRoot, client, g_myClientID);
}).detach();
} else {
NSLog(@"*** COMMAND_GET_FILE: no files or empty target");
}
}
break;
default:
break;
}
}
void ScreenHandler::applyQualityLevel(int8_t level, bool persist)
{
m_qualityLevel = level;
// TODO: persist to config file if needed
(void)persist;
if (level == QUALITY_DISABLED) {
// Disabled mode: keep current settings
NSLog(@"Quality: Disabled (keep current)");
return;
}
if (level >= 0 && level < QUALITY_COUNT) {
// Get profile from commands.h (shared with Windows/Linux)
const QualityProfile& profile = GetQualityProfile(level);
// Apply FPS
m_maxFPS.store(profile.maxFPS);
// Apply algorithm (macOS supports all algorithms including H264 via VideoToolbox)
m_algorithm.store(profile.algorithm);
// Update H264 bitrate if applicable
if (profile.algorithm == ALGORITHM_H264 && profile.bitRate > 0) {
m_h264Bitrate = profile.bitRate * 1000; // kbps -> bps
}
NSLog(@"Quality: Level=%d (%s), FPS=%d, Algo=%d, BitRate=%d kbps",
level,
level == QUALITY_ULTRA ? "Ultra" :
level == QUALITY_HIGH ? "High" :
level == QUALITY_GOOD ? "Good" :
level == QUALITY_MEDIUM ? "Medium" :
level == QUALITY_LOW ? "Low" : "Minimal",
profile.maxFPS, profile.algorithm, profile.bitRate);
} else {
// Adaptive mode (level=-1): server adjusts dynamically
NSLog(@"Quality: Adaptive mode");
}
}
bool ScreenHandler::captureScreen(std::vector<uint8_t>& buffer)
{
// Try to use IOSurface from display stream (more efficient)
if (m_displayStream) {
IOSurfaceRef surface = nullptr;
{
std::lock_guard<std::mutex> lock(m_surfaceMutex);
if (m_latestSurface) {
surface = (IOSurfaceRef)CFRetain(m_latestSurface);
}
}
if (surface) {
bool result = captureFromIOSurface(surface, buffer);
CFRelease(surface);
if (result) {
return true;
}
}
// Fall through to legacy method if IOSurface failed
}
// Legacy method: CGDisplayCreateImage (fallback)
CGImageRef image = CGDisplayCreateImage(m_displayID);
if (!image) {
NSLog(@"Failed to capture screen image");
return false;
}
size_t width = CGImageGetWidth(image);
size_t height = CGImageGetHeight(image);
if (width != (size_t)m_width || height != (size_t)m_height) {
CGImageRelease(image);
NSLog(@"Screen resolution changed: %zux%zu", width, height);
return false;
}
size_t bytesPerRow = width * 4;
size_t requiredSize = bytesPerRow * height;
if (m_tempBuffer.size() != requiredSize) {
m_tempBuffer.resize(requiredSize);
}
CGContextRef context = CGBitmapContextCreate(
m_tempBuffer.data(),
width,
height,
8,
bytesPerRow,
m_colorSpace,
kCGImageAlphaPremultipliedFirst | kCGBitmapByteOrder32Little
);
if (!context) {
CGImageRelease(image);
NSLog(@"Failed to create bitmap context");
return false;
}
CGContextDrawImage(context, CGRectMake(0, 0, width, height), image);
CGContextRelease(context);
CGImageRelease(image);
// Flip vertically using Accelerate framework
vImage_Buffer src = {
.data = m_tempBuffer.data(),
.height = (vImagePixelCount)height,
.width = (vImagePixelCount)width,
.rowBytes = bytesPerRow
};
vImage_Buffer dst = {
.data = buffer.data(),
.height = (vImagePixelCount)height,
.width = (vImagePixelCount)width,
.rowBytes = bytesPerRow
};
vImage_Error err = vImageVerticalReflect_ARGB8888(&src, &dst, kvImageNoFlags);
if (err != kvImageNoError) {
for (size_t y = 0; y < height; y++) {
memcpy(buffer.data() + (height - 1 - y) * bytesPerRow,
m_tempBuffer.data() + y * bytesPerRow,
bytesPerRow);
}
}
return true;
}
void ScreenHandler::sendFirstScreen()
{
if (!captureScreen(m_currFrame)) return;
if (!m_client) return;
uint32_t imgSize = m_bmpHeader.biSizeImage;
std::vector<uint8_t> buf(1 + imgSize);
buf[0] = TOKEN_FIRSTSCREEN;
memcpy(&buf[1], m_currFrame.data(), imgSize);
m_client->Send2Server((char*)buf.data(), buf.size());
// Save as previous frame
m_prevFrame = m_currFrame;
}
void ScreenHandler::sendDiffFrame()
{
if (!captureScreen(m_currFrame)) return;
if (!m_client) return;
uint8_t* out = m_diffBuffer.data();
out[0] = TOKEN_NEXTSCREEN;
uint8_t* data = out + 1;
// Write algorithm type
uint8_t algo = m_algorithm.load();
memcpy(data, &algo, sizeof(uint8_t));
// Write cursor position
int32_t cursorX, cursorY;
getCursorPosition(cursorX, cursorY);
memcpy(data + 1, &cursorX, sizeof(int32_t));
memcpy(data + 1 + sizeof(int32_t), &cursorY, sizeof(int32_t));
// Write cursor type
uint8_t cursorType = getCursorTypeIndex();
memcpy(data + 1 + 2 * sizeof(int32_t), &cursorType, sizeof(uint8_t));
uint32_t headerSize = 1 + 2 * sizeof(int32_t) + 1;
uint8_t* diffData = data + headerSize;
uint32_t diffLen = compareBitmap(m_currFrame.data(), m_prevFrame.data(),
diffData, m_bmpHeader.biSizeImage, algo);
uint32_t totalLen = 1 + headerSize + diffLen;
m_client->Send2Server((char*)out, totalLen);
// Update previous frame
std::swap(m_prevFrame, m_currFrame);
}
void ScreenHandler::sendH264Frame(bool keyframe)
{
if (!captureScreen(m_currFrame)) return;
if (!m_client) return;
// Initialize encoder if needed
if (!m_h264Encoder) {
m_h264Encoder = std::make_unique<H264Encoder>();
int fps = m_maxFPS.load();
if (fps <= 0) fps = 30;
if (!m_h264Encoder->open(m_width, m_height, fps, m_h264Bitrate)) {
NSLog(@"Failed to initialize H264 encoder: %s", m_h264Encoder->getLastError());
m_h264Encoder.reset();
return;
}
NSLog(@"H264 encoder initialized: %dx%d @ %d fps", m_width, m_height, fps);
}
// Force keyframe if requested
if (keyframe) {
m_h264Encoder->forceKeyframe();
}
// Encode frame
uint8_t* encodedData = nullptr;
uint32_t encodedSize = 0;
uint32_t stride = m_width * 4;
int result = m_h264Encoder->encode(
m_currFrame.data(),
32, // bpp
stride,
m_width,
m_height,
&encodedData,
&encodedSize,
false // Don't flip - keep bottom-up format like Windows client
);
if (result <= 0 || !encodedData || encodedSize == 0) {
return;
}
// Build packet: [TOKEN_NEXTSCREEN][ALGORITHM_H264][CursorX][CursorY][CursorType][H264Data]
// Note: H264 always uses TOKEN_NEXTSCREEN because:
// - Server's TOKEN_KEYFRAME handler does nothing for H264 (just break)
// - Server's TOKEN_NEXTSCREEN handler calls Decode() for H264
// - H264 encoder manages keyframes (I-frames) internally
// - FFmpeg decoder auto-detects I-frames vs P-frames
uint32_t headerSize = 1 + 1 + 2 * sizeof(int32_t) + 1;
std::vector<uint8_t> packet(headerSize + encodedSize);
packet[0] = TOKEN_NEXTSCREEN;
packet[1] = ALGORITHM_H264;
// Cursor position
int32_t cursorX, cursorY;
getCursorPosition(cursorX, cursorY);
memcpy(&packet[2], &cursorX, sizeof(int32_t));
memcpy(&packet[2 + sizeof(int32_t)], &cursorY, sizeof(int32_t));
// Cursor type
packet[2 + 2 * sizeof(int32_t)] = getCursorTypeIndex();
// H264 data
memcpy(&packet[headerSize], encodedData, encodedSize);
m_client->Send2Server((char*)packet.data(), packet.size());
}
uint32_t ScreenHandler::compareBitmap(const uint8_t* curr, const uint8_t* prev,
uint8_t* outBuf, uint32_t totalBytes, uint8_t algo)
{
const uint32_t bytesPerPixel = 4;
const uint32_t totalPixels = totalBytes / bytesPerPixel;
const uint32_t gapThreshold = 8;
const uint32_t ratio = (algo == ALGORITHM_GRAY || algo == ALGORITHM_RGB565) ? 4 : 1;
uint32_t outOffset = 0;
uint32_t i = 0;
while (i < totalPixels) {
// Skip identical pixels
while (i < totalPixels &&
*(uint32_t*)(curr + i * 4) == *(uint32_t*)(prev + i * 4)) {
i++;
}
if (i >= totalPixels) break;
uint32_t start = i;
uint32_t lastDiff = i;
while (i < totalPixels) {
if (*(uint32_t*)(curr + i * 4) != *(uint32_t*)(prev + i * 4)) {
lastDiff = i;
} else if (i - lastDiff > gapThreshold) {
break;
}
i++;
}
uint32_t end = lastDiff + 1;
uint32_t count = end - start;
uint32_t byteOffset = start * bytesPerPixel;
uint32_t byteCount = count * bytesPerPixel;
// Write byteOffset
memcpy(outBuf + outOffset, &byteOffset, sizeof(uint32_t));
outOffset += sizeof(uint32_t);
// Write length
uint32_t lengthField = byteCount / ratio;
memcpy(outBuf + outOffset, &lengthField, sizeof(uint32_t));
outOffset += sizeof(uint32_t);
// Write pixel data
const uint8_t* srcData = curr + byteOffset;
if (algo == ALGORITHM_RGB565) {
convertBGRAtoRGB565(srcData, (uint16_t*)(outBuf + outOffset), count);
outOffset += count * 2;
} else if (algo == ALGORITHM_GRAY) {
convertBGRAtoGray(srcData, outBuf + outOffset, count);
outOffset += count;
} else {
memcpy(outBuf + outOffset, srcData, byteCount);
outOffset += byteCount;
}
}
return outOffset;
}
void ScreenHandler::convertBGRAtoGray(const uint8_t* src, uint8_t* dst, uint32_t pixelCount)
{
for (uint32_t i = 0; i < pixelCount; i++) {
uint8_t b = src[i * 4 + 0];
uint8_t g = src[i * 4 + 1];
uint8_t r = src[i * 4 + 2];
dst[i] = (uint8_t)((306 * r + 601 * g + 117 * b) >> 10);
}
}
void ScreenHandler::convertBGRAtoRGB565(const uint8_t* src, uint16_t* dst, uint32_t pixelCount)
{
for (uint32_t i = 0; i < pixelCount; i++) {
uint8_t b = src[i * 4 + 0];
uint8_t g = src[i * 4 + 1];
uint8_t r = src[i * 4 + 2];
uint16_t r5 = (r >> 3) & 0x1F;
uint16_t g6 = (g >> 2) & 0x3F;
uint16_t b5 = (b >> 3) & 0x1F;
dst[i] = (r5 << 11) | (g6 << 5) | b5;
}
}
uint64_t ScreenHandler::getTickMs()
{
static mach_timebase_info_data_t timebase = {0, 0};
if (timebase.denom == 0) {
mach_timebase_info(&timebase);
}
uint64_t now = mach_absolute_time();
return (now * timebase.numer / timebase.denom) / 1000000;
}
// Cached logical cursor position (shared between getCursorPosition and getCursorTypeIndex)
static CGPoint s_cachedLogicalPos = {0, 0};
void ScreenHandler::getCursorPosition(int32_t& x, int32_t& y)
{
// Get cursor position in logical (point) coordinates
CGEventRef event = CGEventCreate(nullptr);
s_cachedLogicalPos = CGEventGetLocation(event);
CFRelease(event);
// Convert to physical pixel coordinates (for Retina displays)
x = (int32_t)(s_cachedLogicalPos.x * m_scaleFactor);
y = (int32_t)(s_cachedLogicalPos.y * m_scaleFactor);
// Clamp to screen bounds
if (x < 0) x = 0;
if (y < 0) y = 0;
if (x >= m_width) x = m_width - 1;
if (y >= m_height) y = m_height - 1;
}
uint8_t ScreenHandler::getCursorTypeIndex()
{
// Windows cursor type indices (from CursorInfo.h):
// 0: IDC_APPSTARTING, 1: IDC_ARROW, 2: IDC_CROSS, 3: IDC_HAND,
// 4: IDC_HELP, 5: IDC_IBEAM, 6: IDC_ICON, 7: IDC_NO,
// 8: IDC_SIZE, 9: IDC_SIZEALL, 10: IDC_SIZENESW, 11: IDC_SIZENS,
// 12: IDC_SIZENWSE, 13: IDC_SIZEWE, 14: IDC_UPARROW, 15: IDC_WAIT
// NSCursor.currentSystemCursor doesn't work for background daemons.
// Use Accessibility API to infer cursor type from the UI element under cursor.
// Throttle to avoid performance impact (check every 100ms)
static uint8_t cachedIndex = 1;
static uint64_t lastCheckTime = 0;
static CGPoint lastPos = {-1, -1};
// Reuse cursor position from getCursorPosition (called before this)
CGPoint pos = s_cachedLogicalPos;
// Throttle: only check if cursor moved significantly or 250ms elapsed
// (Accessibility API is expensive, cursor type is just a visual hint)
uint64_t now = getTickMs();
bool posChanged = (fabs(pos.x - lastPos.x) > 10 || fabs(pos.y - lastPos.y) > 10);
if (!posChanged && (now - lastCheckTime) < 250) {
return cachedIndex;
}
lastCheckTime = now;
lastPos = pos;
uint8_t index = 1; // Default to arrow
// Get the UI element at cursor position using Accessibility API
AXUIElementRef systemWide = AXUIElementCreateSystemWide();
AXUIElementRef element = nullptr;
AXError err = AXUIElementCopyElementAtPosition(systemWide, (float)pos.x, (float)pos.y, &element);
CFRelease(systemWide);
if (err == kAXErrorSuccess && element) {
// Get the role of the element
CFTypeRef roleRef = nullptr;
if (AXUIElementCopyAttributeValue(element, kAXRoleAttribute, &roleRef) == kAXErrorSuccess && roleRef) {
NSString* role = (__bridge NSString*)roleRef;
// Map UI element roles to cursor types
if ([role isEqualToString:NSAccessibilityTextFieldRole] ||
[role isEqualToString:NSAccessibilityTextAreaRole] ||
[role isEqualToString:NSAccessibilityStaticTextRole] ||
[role isEqualToString:@"AXWebArea"]) {
// Check if text is editable
CFTypeRef editableRef = nullptr;
if (AXUIElementCopyAttributeValue(element, CFSTR("AXEditable"), &editableRef) == kAXErrorSuccess) {
if (editableRef && CFBooleanGetValue((CFBooleanRef)editableRef)) {
index = 5; // IDC_IBEAM for editable text
}
if (editableRef) CFRelease(editableRef);
} else if ([role isEqualToString:NSAccessibilityTextFieldRole] ||
[role isEqualToString:NSAccessibilityTextAreaRole]) {
index = 5; // IDC_IBEAM for text input fields
}
} else if ([role isEqualToString:NSAccessibilityLinkRole] ||
[role isEqualToString:@"AXLink"]) {
index = 3; // IDC_HAND for links
} else if ([role isEqualToString:NSAccessibilityButtonRole]) {
index = 3; // IDC_HAND for buttons (clickable)
} else if ([role isEqualToString:NSAccessibilitySplitterRole] ||
[role isEqualToString:@"AXSplitGroup"]) {
// Check orientation for resize cursor
CFTypeRef orientRef = nullptr;
if (AXUIElementCopyAttributeValue(element, CFSTR("AXOrientation"), &orientRef) == kAXErrorSuccess && orientRef) {
NSString* orient = (__bridge NSString*)orientRef;
if ([orient isEqualToString:@"AXHorizontalOrientation"]) {
index = 11; // IDC_SIZENS (vertical resize)
} else {
index = 13; // IDC_SIZEWE (horizontal resize)
}
CFRelease(orientRef);
} else {
index = 13; // IDC_SIZEWE default for splitters
}
} else if ([role isEqualToString:NSAccessibilityGrowAreaRole]) {
index = 12; // IDC_SIZENWSE for resize corners
}
CFRelease(roleRef);
}
CFRelease(element);
}
// Cache the result
cachedIndex = index;
return index;
}
void ScreenHandler::captureLoop()
{
NSLog(@"ScreenHandler CaptureLoop started (%dx%d)%s", m_width, m_height,
m_displayStream ? " [CGDisplayStream]" : " [Legacy]");
uint8_t currentAlgo = m_algorithm.load();
// Always send raw first frame (TOKEN_FIRSTSCREEN) to initialize server display
sendFirstScreen();
// Small delay to ensure first frame is processed before H264 stream starts
usleep(50000); // 50ms, same as Windows client
while (m_running) {
uint64_t start = getTickMs();
// Wait for new frame from display stream (push model)
// This is key optimization: CPU sleeps when screen is static
if (m_displayStream) {
std::unique_lock<std::mutex> lock(m_surfaceMutex);
int fps = m_maxFPS.load();
if (fps <= 0) fps = 15;
int waitMs = 1000 / fps;
// Wait for new frame or timeout (maintains FPS even if no change)
m_surfaceCond.wait_for(lock, std::chrono::milliseconds(waitMs), [this] {
return m_hasNewFrame.load() || !m_running;
});
m_hasNewFrame.store(false);
if (!m_running) break;
}
uint8_t algo = m_algorithm.load();
// Check if algorithm changed
if (algo != currentAlgo) {
NSLog(@"Algorithm changed: %d -> %d", currentAlgo, algo);
currentAlgo = algo;
if (algo == ALGORITHM_H264) {
sendH264Frame(true); // First H264 frame is keyframe
} else if (m_h264Encoder) {
m_h264Encoder->close();
m_h264Encoder.reset();
sendFirstScreen();
}
} else {
if (algo == ALGORITHM_H264) {
sendH264Frame(false);
} else {
sendDiffFrame();
}
}
// Only use sleep-based FPS control for legacy mode
if (!m_displayStream) {
int fps = m_maxFPS.load();
if (fps <= 0) fps = 10;
int sleepMs = 1000 / fps;
int elapsed = (int)(getTickMs() - start);
int wait = sleepMs - elapsed;
if (wait > 0) {
usleep(wait * 1000);
}
}
}
NSLog(@"ScreenHandler CaptureLoop stopped");
}