Files
SimpleRemoter/macos/H264Encoder.mm
2026-05-01 01:28:55 +02:00

522 lines
17 KiB
Plaintext

#import "H264Encoder.h"
#import <VideoToolbox/VideoToolbox.h>
#import <CoreMedia/CoreMedia.h>
#import <CoreVideo/CoreVideo.h>
#import <Cocoa/Cocoa.h>
H264Encoder::H264Encoder()
: m_session(nullptr)
, m_width(0)
, m_height(0)
, m_fps(30)
, m_bitrate(0)
, m_forceKeyframe(false)
, m_frameCount(0)
{
m_lastError[0] = '\0';
}
H264Encoder::~H264Encoder()
{
close();
}
bool H264Encoder::open(int width, int height, int fps, int bitrate)
{
close();
// Width and height must be even for H264
m_width = width & ~1;
m_height = height & ~1;
m_fps = fps > 0 ? fps : 30;
m_bitrate = bitrate > 0 ? bitrate : (m_width * m_height * 3); // ~3 bits per pixel default
// Allocate YUV buffers
int ySize = m_width * m_height;
int uvSize = (m_width / 2) * (m_height / 2);
m_yPlane.resize(ySize);
m_uPlane.resize(uvSize);
m_vPlane.resize(uvSize);
// Reserve output buffer
m_outputBuffer.reserve(m_width * m_height);
// Create compression session
CFMutableDictionaryRef encoderSpec = CFDictionaryCreateMutable(
kCFAllocatorDefault, 0,
&kCFTypeDictionaryKeyCallBacks,
&kCFTypeDictionaryValueCallBacks
);
// Prefer hardware encoder
CFDictionarySetValue(encoderSpec,
kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
kCFBooleanTrue);
// Source image attributes
CFMutableDictionaryRef sourceAttrs = CFDictionaryCreateMutable(
kCFAllocatorDefault, 0,
&kCFTypeDictionaryKeyCallBacks,
&kCFTypeDictionaryValueCallBacks
);
int32_t pixelFormat = kCVPixelFormatType_420YpCbCr8Planar; // I420
CFNumberRef pixelFormatNum = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pixelFormat);
CFDictionarySetValue(sourceAttrs, kCVPixelBufferPixelFormatTypeKey, pixelFormatNum);
CFRelease(pixelFormatNum);
int32_t widthNum = m_width;
int32_t heightNum = m_height;
CFNumberRef widthRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &widthNum);
CFNumberRef heightRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &heightNum);
CFDictionarySetValue(sourceAttrs, kCVPixelBufferWidthKey, widthRef);
CFDictionarySetValue(sourceAttrs, kCVPixelBufferHeightKey, heightRef);
CFRelease(widthRef);
CFRelease(heightRef);
// Create compression session
OSStatus status = VTCompressionSessionCreate(
kCFAllocatorDefault,
m_width,
m_height,
kCMVideoCodecType_H264,
encoderSpec,
sourceAttrs,
kCFAllocatorDefault,
compressionCallback,
this,
&m_session
);
CFRelease(encoderSpec);
CFRelease(sourceAttrs);
if (status != noErr) {
snprintf(m_lastError, sizeof(m_lastError),
"VTCompressionSessionCreate failed: %d", (int)status);
NSLog(@"H264Encoder: %s", m_lastError);
return false;
}
// Configure session properties
// Real-time encoding
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_RealTime, kCFBooleanTrue);
// Profile: Baseline for compatibility
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_ProfileLevel,
kVTProfileLevel_H264_Baseline_AutoLevel);
// Allow frame reordering: false for low latency
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_AllowFrameReordering, kCFBooleanFalse);
// Max keyframe interval (GOP size) - match Windows x264 setting (15 seconds)
int32_t keyframeInterval = m_fps * 15; // Keyframe every 15 seconds
CFNumberRef keyframeRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &keyframeInterval);
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_MaxKeyFrameInterval, keyframeRef);
CFRelease(keyframeRef);
// Expected frame rate
CFNumberRef fpsRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &m_fps);
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_ExpectedFrameRate, fpsRef);
CFRelease(fpsRef);
// Average bitrate
CFNumberRef bitrateRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &m_bitrate);
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_AverageBitRate, bitrateRef);
CFRelease(bitrateRef);
// Data rate limits (for more consistent bitrate)
// [bytes per second, duration in seconds]
int64_t dataRateLimit = m_bitrate / 8;
double duration = 1.0;
CFNumberRef bytesRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &dataRateLimit);
CFNumberRef durationRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberFloat64Type, &duration);
CFTypeRef limits[2] = { bytesRef, durationRef };
CFArrayRef limitsArray = CFArrayCreate(kCFAllocatorDefault, limits, 2, &kCFTypeArrayCallBacks);
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_DataRateLimits, limitsArray);
CFRelease(bytesRef);
CFRelease(durationRef);
CFRelease(limitsArray);
// Prepare to encode
status = VTCompressionSessionPrepareToEncodeFrames(m_session);
if (status != noErr) {
snprintf(m_lastError, sizeof(m_lastError),
"VTCompressionSessionPrepareToEncodeFrames failed: %d", (int)status);
NSLog(@"H264Encoder: %s", m_lastError);
close();
return false;
}
m_frameCount = 0;
m_forceKeyframe = true; // First frame is always keyframe
NSLog(@"H264Encoder opened: %dx%d @ %d fps, bitrate=%d",
m_width, m_height, m_fps, m_bitrate);
return true;
}
void H264Encoder::close()
{
if (m_session) {
VTCompressionSessionInvalidate(m_session);
CFRelease(m_session);
m_session = nullptr;
}
m_yPlane.clear();
m_uPlane.clear();
m_vPlane.clear();
m_outputBuffer.clear();
}
void H264Encoder::convertBGRAtoI420(const uint8_t* bgra, uint32_t stride,
uint32_t width, uint32_t height,
bool flipVertical)
{
// Convert BGRA to I420 (YUV 4:2:0 planar)
// Y = 0.299*R + 0.587*G + 0.114*B
// U = -0.169*R - 0.331*G + 0.500*B + 128
// V = 0.500*R - 0.419*G - 0.081*B + 128
uint8_t* yDst = m_yPlane.data();
uint8_t* uDst = m_uPlane.data();
uint8_t* vDst = m_vPlane.data();
int uvWidth = width / 2;
for (uint32_t y = 0; y < height; y++) {
// Source row (handle vertical flip)
uint32_t srcY = flipVertical ? (height - 1 - y) : y;
const uint8_t* srcRow = bgra + srcY * stride;
// Y plane destination
uint8_t* yRow = yDst + y * width;
for (uint32_t x = 0; x < width; x++) {
uint8_t b = srcRow[x * 4 + 0];
uint8_t g = srcRow[x * 4 + 1];
uint8_t r = srcRow[x * 4 + 2];
// Y component
int yVal = ((66 * r + 129 * g + 25 * b + 128) >> 8) + 16;
yRow[x] = (uint8_t)(yVal < 0 ? 0 : (yVal > 255 ? 255 : yVal));
}
// UV planes (subsampled 2x2)
if (y % 2 == 0) {
uint8_t* uRow = uDst + (y / 2) * uvWidth;
uint8_t* vRow = vDst + (y / 2) * uvWidth;
for (uint32_t x = 0; x < width; x += 2) {
// Average 2x2 block
uint32_t srcY2 = flipVertical ? (height - 2 - y) : (y + 1);
if (srcY2 >= height) srcY2 = srcY;
const uint8_t* srcRow2 = bgra + srcY2 * stride;
int r = 0, g = 0, b = 0;
// Top-left
b += srcRow[x * 4 + 0];
g += srcRow[x * 4 + 1];
r += srcRow[x * 4 + 2];
// Top-right
if (x + 1 < width) {
b += srcRow[(x + 1) * 4 + 0];
g += srcRow[(x + 1) * 4 + 1];
r += srcRow[(x + 1) * 4 + 2];
}
// Bottom-left
b += srcRow2[x * 4 + 0];
g += srcRow2[x * 4 + 1];
r += srcRow2[x * 4 + 2];
// Bottom-right
if (x + 1 < width) {
b += srcRow2[(x + 1) * 4 + 0];
g += srcRow2[(x + 1) * 4 + 1];
r += srcRow2[(x + 1) * 4 + 2];
}
r /= 4;
g /= 4;
b /= 4;
// U component
int uVal = ((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128;
uRow[x / 2] = (uint8_t)(uVal < 0 ? 0 : (uVal > 255 ? 255 : uVal));
// V component
int vVal = ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
vRow[x / 2] = (uint8_t)(vVal < 0 ? 0 : (vVal > 255 ? 255 : vVal));
}
}
}
}
int H264Encoder::encode(const uint8_t* bgra, uint8_t bpp, uint32_t stride,
uint32_t width, uint32_t height,
uint8_t** outData, uint32_t* outSize,
bool flipVertical)
{
if (!m_session) {
snprintf(m_lastError, sizeof(m_lastError), "Encoder not initialized");
return 0;
}
if (width != (uint32_t)m_width || height != (uint32_t)m_height) {
snprintf(m_lastError, sizeof(m_lastError),
"Frame size mismatch: expected %dx%d, got %dx%d",
m_width, m_height, (int)width, (int)height);
return 0;
}
// Convert BGRA to I420
convertBGRAtoI420(bgra, stride, width, height, flipVertical);
// Create CVPixelBuffer
CVPixelBufferRef pixelBuffer = nullptr;
NSDictionary* options = @{
(id)kCVPixelBufferIOSurfacePropertiesKey: @{}
};
CVReturn cvRet = CVPixelBufferCreate(
kCFAllocatorDefault,
m_width,
m_height,
kCVPixelFormatType_420YpCbCr8Planar,
(__bridge CFDictionaryRef)options,
&pixelBuffer
);
if (cvRet != kCVReturnSuccess) {
snprintf(m_lastError, sizeof(m_lastError),
"CVPixelBufferCreate failed: %d", (int)cvRet);
return 0;
}
// Lock and copy YUV data
CVPixelBufferLockBaseAddress(pixelBuffer, 0);
size_t planeCount = CVPixelBufferGetPlaneCount(pixelBuffer);
if (planeCount < 3) {
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
CVPixelBufferRelease(pixelBuffer);
snprintf(m_lastError, sizeof(m_lastError),
"CVPixelBuffer has %zu planes, expected 3", planeCount);
return 0;
}
// Y plane
uint8_t* yDst = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0);
size_t yStride = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0);
for (int y = 0; y < m_height; y++) {
memcpy(yDst + y * yStride, m_yPlane.data() + y * m_width, m_width);
}
// U plane
uint8_t* uDst = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1);
size_t uStride = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1);
int uvHeight = m_height / 2;
int uvWidth = m_width / 2;
for (int y = 0; y < uvHeight; y++) {
memcpy(uDst + y * uStride, m_uPlane.data() + y * uvWidth, uvWidth);
}
// V plane
uint8_t* vDst = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 2);
size_t vStride = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 2);
for (int y = 0; y < uvHeight; y++) {
memcpy(vDst + y * vStride, m_vPlane.data() + y * uvWidth, uvWidth);
}
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
// Prepare frame properties
CFMutableDictionaryRef frameProps = nullptr;
if (m_forceKeyframe.exchange(false)) {
frameProps = CFDictionaryCreateMutable(
kCFAllocatorDefault, 1,
&kCFTypeDictionaryKeyCallBacks,
&kCFTypeDictionaryValueCallBacks
);
CFDictionarySetValue(frameProps,
kVTEncodeFrameOptionKey_ForceKeyFrame,
kCFBooleanTrue);
}
// Clear output buffer
{
std::lock_guard<std::mutex> lock(m_outputMutex);
m_outputBuffer.clear();
}
// Presentation timestamp
CMTime pts = CMTimeMake(m_frameCount++, m_fps);
// Encode frame
OSStatus status = VTCompressionSessionEncodeFrame(
m_session,
pixelBuffer,
pts,
kCMTimeInvalid,
frameProps,
nullptr,
nullptr
);
if (frameProps) {
CFRelease(frameProps);
}
CVPixelBufferRelease(pixelBuffer);
if (status != noErr) {
snprintf(m_lastError, sizeof(m_lastError),
"VTCompressionSessionEncodeFrame failed: %d", (int)status);
return 0;
}
// Wait for encoding to complete
VTCompressionSessionCompleteFrames(m_session, kCMTimeInvalid);
// Return encoded data
std::lock_guard<std::mutex> lock(m_outputMutex);
if (m_outputBuffer.empty()) {
return 0;
}
*outData = m_outputBuffer.data();
*outSize = (uint32_t)m_outputBuffer.size();
return (int)m_outputBuffer.size();
}
void H264Encoder::compressionCallback(void* outputCallbackRefCon,
void* sourceFrameRefCon,
OSStatus status,
VTEncodeInfoFlags infoFlags,
CMSampleBufferRef sampleBuffer)
{
(void)sourceFrameRefCon;
(void)infoFlags;
H264Encoder* encoder = (H264Encoder*)outputCallbackRefCon;
if (status != noErr) {
NSLog(@"H264Encoder: Compression callback error: %d", (int)status);
return;
}
if (!sampleBuffer) {
return;
}
encoder->processSampleBuffer(sampleBuffer);
}
void H264Encoder::processSampleBuffer(CMSampleBufferRef sampleBuffer)
{
// Check if keyframe
CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, false);
bool isKeyframe = false;
if (attachments && CFArrayGetCount(attachments) > 0) {
CFDictionaryRef dict = (CFDictionaryRef)CFArrayGetValueAtIndex(attachments, 0);
CFBooleanRef notSync = (CFBooleanRef)CFDictionaryGetValue(dict,
kCMSampleAttachmentKey_NotSync);
isKeyframe = (notSync == nullptr || !CFBooleanGetValue(notSync));
}
std::lock_guard<std::mutex> lock(m_outputMutex);
m_outputBuffer.clear();
// Get format description for SPS/PPS
CMFormatDescriptionRef formatDesc = CMSampleBufferGetFormatDescription(sampleBuffer);
// If keyframe, prepend SPS and PPS
if (isKeyframe && formatDesc) {
// Get SPS
size_t spsSize = 0;
size_t spsCount = 0;
const uint8_t* sps = nullptr;
OSStatus status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(
formatDesc, 0, &sps, &spsSize, &spsCount, nullptr);
if (status == noErr && sps && spsSize > 0) {
// Write NAL start code + SPS
uint8_t startCode[] = {0x00, 0x00, 0x00, 0x01};
m_outputBuffer.insert(m_outputBuffer.end(), startCode, startCode + 4);
m_outputBuffer.insert(m_outputBuffer.end(), sps, sps + spsSize);
}
// Get PPS
size_t ppsSize = 0;
size_t ppsCount = 0;
const uint8_t* pps = nullptr;
status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(
formatDesc, 1, &pps, &ppsSize, &ppsCount, nullptr);
if (status == noErr && pps && ppsSize > 0) {
// Write NAL start code + PPS
uint8_t startCode[] = {0x00, 0x00, 0x00, 0x01};
m_outputBuffer.insert(m_outputBuffer.end(), startCode, startCode + 4);
m_outputBuffer.insert(m_outputBuffer.end(), pps, pps + ppsSize);
}
}
// Get encoded data
CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
if (!blockBuffer) {
return;
}
size_t totalLength = 0;
size_t lengthAtOffset = 0;
char* dataPointer = nullptr;
OSStatus status = CMBlockBufferGetDataPointer(
blockBuffer, 0, &lengthAtOffset, &totalLength, &dataPointer);
if (status != noErr || !dataPointer) {
return;
}
// Get NAL unit length size from format description (usually 4 bytes)
int nalLengthSize = 4;
if (formatDesc) {
int tmpNalLengthSize = 0;
status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(
formatDesc, 0, nullptr, nullptr, nullptr, &tmpNalLengthSize);
if (status == noErr && tmpNalLengthSize > 0 && tmpNalLengthSize <= 4) {
nalLengthSize = tmpNalLengthSize;
}
}
// Convert AVCC format (length-prefixed) to Annex B (start code prefixed)
size_t offset = 0;
while (offset < totalLength) {
// Read NAL unit length (big-endian, variable size)
uint32_t nalLength = 0;
const uint8_t* lengthPtr = (const uint8_t*)dataPointer + offset;
for (int i = 0; i < nalLengthSize; i++) {
nalLength = (nalLength << 8) | lengthPtr[i];
}
offset += nalLengthSize;
if (nalLength > 0 && offset + nalLength <= totalLength) {
// Write NAL start code
uint8_t startCode[] = {0x00, 0x00, 0x00, 0x01};
m_outputBuffer.insert(m_outputBuffer.end(), startCode, startCode + 4);
// Write NAL data
m_outputBuffer.insert(m_outputBuffer.end(),
(uint8_t*)dataPointer + offset,
(uint8_t*)dataPointer + offset + nalLength);
}
offset += nalLength;
}
}