522 lines
17 KiB
Plaintext
522 lines
17 KiB
Plaintext
#import "H264Encoder.h"
|
|
#import <VideoToolbox/VideoToolbox.h>
|
|
#import <CoreMedia/CoreMedia.h>
|
|
#import <CoreVideo/CoreVideo.h>
|
|
#import <Cocoa/Cocoa.h>
|
|
|
|
H264Encoder::H264Encoder()
|
|
: m_session(nullptr)
|
|
, m_width(0)
|
|
, m_height(0)
|
|
, m_fps(30)
|
|
, m_bitrate(0)
|
|
, m_forceKeyframe(false)
|
|
, m_frameCount(0)
|
|
{
|
|
m_lastError[0] = '\0';
|
|
}
|
|
|
|
H264Encoder::~H264Encoder()
|
|
{
|
|
close();
|
|
}
|
|
|
|
bool H264Encoder::open(int width, int height, int fps, int bitrate)
|
|
{
|
|
close();
|
|
|
|
// Width and height must be even for H264
|
|
m_width = width & ~1;
|
|
m_height = height & ~1;
|
|
m_fps = fps > 0 ? fps : 30;
|
|
m_bitrate = bitrate > 0 ? bitrate : (m_width * m_height * 3); // ~3 bits per pixel default
|
|
|
|
// Allocate YUV buffers
|
|
int ySize = m_width * m_height;
|
|
int uvSize = (m_width / 2) * (m_height / 2);
|
|
m_yPlane.resize(ySize);
|
|
m_uPlane.resize(uvSize);
|
|
m_vPlane.resize(uvSize);
|
|
|
|
// Reserve output buffer
|
|
m_outputBuffer.reserve(m_width * m_height);
|
|
|
|
// Create compression session
|
|
CFMutableDictionaryRef encoderSpec = CFDictionaryCreateMutable(
|
|
kCFAllocatorDefault, 0,
|
|
&kCFTypeDictionaryKeyCallBacks,
|
|
&kCFTypeDictionaryValueCallBacks
|
|
);
|
|
|
|
// Prefer hardware encoder
|
|
CFDictionarySetValue(encoderSpec,
|
|
kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
|
|
kCFBooleanTrue);
|
|
|
|
// Source image attributes
|
|
CFMutableDictionaryRef sourceAttrs = CFDictionaryCreateMutable(
|
|
kCFAllocatorDefault, 0,
|
|
&kCFTypeDictionaryKeyCallBacks,
|
|
&kCFTypeDictionaryValueCallBacks
|
|
);
|
|
|
|
int32_t pixelFormat = kCVPixelFormatType_420YpCbCr8Planar; // I420
|
|
CFNumberRef pixelFormatNum = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pixelFormat);
|
|
CFDictionarySetValue(sourceAttrs, kCVPixelBufferPixelFormatTypeKey, pixelFormatNum);
|
|
CFRelease(pixelFormatNum);
|
|
|
|
int32_t widthNum = m_width;
|
|
int32_t heightNum = m_height;
|
|
CFNumberRef widthRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &widthNum);
|
|
CFNumberRef heightRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &heightNum);
|
|
CFDictionarySetValue(sourceAttrs, kCVPixelBufferWidthKey, widthRef);
|
|
CFDictionarySetValue(sourceAttrs, kCVPixelBufferHeightKey, heightRef);
|
|
CFRelease(widthRef);
|
|
CFRelease(heightRef);
|
|
|
|
// Create compression session
|
|
OSStatus status = VTCompressionSessionCreate(
|
|
kCFAllocatorDefault,
|
|
m_width,
|
|
m_height,
|
|
kCMVideoCodecType_H264,
|
|
encoderSpec,
|
|
sourceAttrs,
|
|
kCFAllocatorDefault,
|
|
compressionCallback,
|
|
this,
|
|
&m_session
|
|
);
|
|
|
|
CFRelease(encoderSpec);
|
|
CFRelease(sourceAttrs);
|
|
|
|
if (status != noErr) {
|
|
snprintf(m_lastError, sizeof(m_lastError),
|
|
"VTCompressionSessionCreate failed: %d", (int)status);
|
|
NSLog(@"H264Encoder: %s", m_lastError);
|
|
return false;
|
|
}
|
|
|
|
// Configure session properties
|
|
|
|
// Real-time encoding
|
|
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_RealTime, kCFBooleanTrue);
|
|
|
|
// Profile: Baseline for compatibility
|
|
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_ProfileLevel,
|
|
kVTProfileLevel_H264_Baseline_AutoLevel);
|
|
|
|
// Allow frame reordering: false for low latency
|
|
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_AllowFrameReordering, kCFBooleanFalse);
|
|
|
|
// Max keyframe interval (GOP size) - match Windows x264 setting (15 seconds)
|
|
int32_t keyframeInterval = m_fps * 15; // Keyframe every 15 seconds
|
|
CFNumberRef keyframeRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &keyframeInterval);
|
|
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_MaxKeyFrameInterval, keyframeRef);
|
|
CFRelease(keyframeRef);
|
|
|
|
// Expected frame rate
|
|
CFNumberRef fpsRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &m_fps);
|
|
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_ExpectedFrameRate, fpsRef);
|
|
CFRelease(fpsRef);
|
|
|
|
// Average bitrate
|
|
CFNumberRef bitrateRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &m_bitrate);
|
|
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_AverageBitRate, bitrateRef);
|
|
CFRelease(bitrateRef);
|
|
|
|
// Data rate limits (for more consistent bitrate)
|
|
// [bytes per second, duration in seconds]
|
|
int64_t dataRateLimit = m_bitrate / 8;
|
|
double duration = 1.0;
|
|
CFNumberRef bytesRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &dataRateLimit);
|
|
CFNumberRef durationRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberFloat64Type, &duration);
|
|
CFTypeRef limits[2] = { bytesRef, durationRef };
|
|
CFArrayRef limitsArray = CFArrayCreate(kCFAllocatorDefault, limits, 2, &kCFTypeArrayCallBacks);
|
|
VTSessionSetProperty(m_session, kVTCompressionPropertyKey_DataRateLimits, limitsArray);
|
|
CFRelease(bytesRef);
|
|
CFRelease(durationRef);
|
|
CFRelease(limitsArray);
|
|
|
|
// Prepare to encode
|
|
status = VTCompressionSessionPrepareToEncodeFrames(m_session);
|
|
if (status != noErr) {
|
|
snprintf(m_lastError, sizeof(m_lastError),
|
|
"VTCompressionSessionPrepareToEncodeFrames failed: %d", (int)status);
|
|
NSLog(@"H264Encoder: %s", m_lastError);
|
|
close();
|
|
return false;
|
|
}
|
|
|
|
m_frameCount = 0;
|
|
m_forceKeyframe = true; // First frame is always keyframe
|
|
|
|
NSLog(@"H264Encoder opened: %dx%d @ %d fps, bitrate=%d",
|
|
m_width, m_height, m_fps, m_bitrate);
|
|
|
|
return true;
|
|
}
|
|
|
|
void H264Encoder::close()
|
|
{
|
|
if (m_session) {
|
|
VTCompressionSessionInvalidate(m_session);
|
|
CFRelease(m_session);
|
|
m_session = nullptr;
|
|
}
|
|
|
|
m_yPlane.clear();
|
|
m_uPlane.clear();
|
|
m_vPlane.clear();
|
|
m_outputBuffer.clear();
|
|
}
|
|
|
|
void H264Encoder::convertBGRAtoI420(const uint8_t* bgra, uint32_t stride,
|
|
uint32_t width, uint32_t height,
|
|
bool flipVertical)
|
|
{
|
|
// Convert BGRA to I420 (YUV 4:2:0 planar)
|
|
// Y = 0.299*R + 0.587*G + 0.114*B
|
|
// U = -0.169*R - 0.331*G + 0.500*B + 128
|
|
// V = 0.500*R - 0.419*G - 0.081*B + 128
|
|
|
|
uint8_t* yDst = m_yPlane.data();
|
|
uint8_t* uDst = m_uPlane.data();
|
|
uint8_t* vDst = m_vPlane.data();
|
|
|
|
int uvWidth = width / 2;
|
|
|
|
for (uint32_t y = 0; y < height; y++) {
|
|
// Source row (handle vertical flip)
|
|
uint32_t srcY = flipVertical ? (height - 1 - y) : y;
|
|
const uint8_t* srcRow = bgra + srcY * stride;
|
|
|
|
// Y plane destination
|
|
uint8_t* yRow = yDst + y * width;
|
|
|
|
for (uint32_t x = 0; x < width; x++) {
|
|
uint8_t b = srcRow[x * 4 + 0];
|
|
uint8_t g = srcRow[x * 4 + 1];
|
|
uint8_t r = srcRow[x * 4 + 2];
|
|
|
|
// Y component
|
|
int yVal = ((66 * r + 129 * g + 25 * b + 128) >> 8) + 16;
|
|
yRow[x] = (uint8_t)(yVal < 0 ? 0 : (yVal > 255 ? 255 : yVal));
|
|
}
|
|
|
|
// UV planes (subsampled 2x2)
|
|
if (y % 2 == 0) {
|
|
uint8_t* uRow = uDst + (y / 2) * uvWidth;
|
|
uint8_t* vRow = vDst + (y / 2) * uvWidth;
|
|
|
|
for (uint32_t x = 0; x < width; x += 2) {
|
|
// Average 2x2 block
|
|
uint32_t srcY2 = flipVertical ? (height - 2 - y) : (y + 1);
|
|
if (srcY2 >= height) srcY2 = srcY;
|
|
const uint8_t* srcRow2 = bgra + srcY2 * stride;
|
|
|
|
int r = 0, g = 0, b = 0;
|
|
|
|
// Top-left
|
|
b += srcRow[x * 4 + 0];
|
|
g += srcRow[x * 4 + 1];
|
|
r += srcRow[x * 4 + 2];
|
|
|
|
// Top-right
|
|
if (x + 1 < width) {
|
|
b += srcRow[(x + 1) * 4 + 0];
|
|
g += srcRow[(x + 1) * 4 + 1];
|
|
r += srcRow[(x + 1) * 4 + 2];
|
|
}
|
|
|
|
// Bottom-left
|
|
b += srcRow2[x * 4 + 0];
|
|
g += srcRow2[x * 4 + 1];
|
|
r += srcRow2[x * 4 + 2];
|
|
|
|
// Bottom-right
|
|
if (x + 1 < width) {
|
|
b += srcRow2[(x + 1) * 4 + 0];
|
|
g += srcRow2[(x + 1) * 4 + 1];
|
|
r += srcRow2[(x + 1) * 4 + 2];
|
|
}
|
|
|
|
r /= 4;
|
|
g /= 4;
|
|
b /= 4;
|
|
|
|
// U component
|
|
int uVal = ((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128;
|
|
uRow[x / 2] = (uint8_t)(uVal < 0 ? 0 : (uVal > 255 ? 255 : uVal));
|
|
|
|
// V component
|
|
int vVal = ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
|
|
vRow[x / 2] = (uint8_t)(vVal < 0 ? 0 : (vVal > 255 ? 255 : vVal));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int H264Encoder::encode(const uint8_t* bgra, uint8_t bpp, uint32_t stride,
|
|
uint32_t width, uint32_t height,
|
|
uint8_t** outData, uint32_t* outSize,
|
|
bool flipVertical)
|
|
{
|
|
if (!m_session) {
|
|
snprintf(m_lastError, sizeof(m_lastError), "Encoder not initialized");
|
|
return 0;
|
|
}
|
|
|
|
if (width != (uint32_t)m_width || height != (uint32_t)m_height) {
|
|
snprintf(m_lastError, sizeof(m_lastError),
|
|
"Frame size mismatch: expected %dx%d, got %dx%d",
|
|
m_width, m_height, (int)width, (int)height);
|
|
return 0;
|
|
}
|
|
|
|
// Convert BGRA to I420
|
|
convertBGRAtoI420(bgra, stride, width, height, flipVertical);
|
|
|
|
// Create CVPixelBuffer
|
|
CVPixelBufferRef pixelBuffer = nullptr;
|
|
NSDictionary* options = @{
|
|
(id)kCVPixelBufferIOSurfacePropertiesKey: @{}
|
|
};
|
|
|
|
CVReturn cvRet = CVPixelBufferCreate(
|
|
kCFAllocatorDefault,
|
|
m_width,
|
|
m_height,
|
|
kCVPixelFormatType_420YpCbCr8Planar,
|
|
(__bridge CFDictionaryRef)options,
|
|
&pixelBuffer
|
|
);
|
|
|
|
if (cvRet != kCVReturnSuccess) {
|
|
snprintf(m_lastError, sizeof(m_lastError),
|
|
"CVPixelBufferCreate failed: %d", (int)cvRet);
|
|
return 0;
|
|
}
|
|
|
|
// Lock and copy YUV data
|
|
CVPixelBufferLockBaseAddress(pixelBuffer, 0);
|
|
|
|
size_t planeCount = CVPixelBufferGetPlaneCount(pixelBuffer);
|
|
if (planeCount < 3) {
|
|
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
|
|
CVPixelBufferRelease(pixelBuffer);
|
|
snprintf(m_lastError, sizeof(m_lastError),
|
|
"CVPixelBuffer has %zu planes, expected 3", planeCount);
|
|
return 0;
|
|
}
|
|
|
|
// Y plane
|
|
uint8_t* yDst = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0);
|
|
size_t yStride = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0);
|
|
for (int y = 0; y < m_height; y++) {
|
|
memcpy(yDst + y * yStride, m_yPlane.data() + y * m_width, m_width);
|
|
}
|
|
|
|
// U plane
|
|
uint8_t* uDst = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1);
|
|
size_t uStride = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1);
|
|
int uvHeight = m_height / 2;
|
|
int uvWidth = m_width / 2;
|
|
for (int y = 0; y < uvHeight; y++) {
|
|
memcpy(uDst + y * uStride, m_uPlane.data() + y * uvWidth, uvWidth);
|
|
}
|
|
|
|
// V plane
|
|
uint8_t* vDst = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 2);
|
|
size_t vStride = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 2);
|
|
for (int y = 0; y < uvHeight; y++) {
|
|
memcpy(vDst + y * vStride, m_vPlane.data() + y * uvWidth, uvWidth);
|
|
}
|
|
|
|
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
|
|
|
|
// Prepare frame properties
|
|
CFMutableDictionaryRef frameProps = nullptr;
|
|
if (m_forceKeyframe.exchange(false)) {
|
|
frameProps = CFDictionaryCreateMutable(
|
|
kCFAllocatorDefault, 1,
|
|
&kCFTypeDictionaryKeyCallBacks,
|
|
&kCFTypeDictionaryValueCallBacks
|
|
);
|
|
CFDictionarySetValue(frameProps,
|
|
kVTEncodeFrameOptionKey_ForceKeyFrame,
|
|
kCFBooleanTrue);
|
|
}
|
|
|
|
// Clear output buffer
|
|
{
|
|
std::lock_guard<std::mutex> lock(m_outputMutex);
|
|
m_outputBuffer.clear();
|
|
}
|
|
|
|
// Presentation timestamp
|
|
CMTime pts = CMTimeMake(m_frameCount++, m_fps);
|
|
|
|
// Encode frame
|
|
OSStatus status = VTCompressionSessionEncodeFrame(
|
|
m_session,
|
|
pixelBuffer,
|
|
pts,
|
|
kCMTimeInvalid,
|
|
frameProps,
|
|
nullptr,
|
|
nullptr
|
|
);
|
|
|
|
if (frameProps) {
|
|
CFRelease(frameProps);
|
|
}
|
|
CVPixelBufferRelease(pixelBuffer);
|
|
|
|
if (status != noErr) {
|
|
snprintf(m_lastError, sizeof(m_lastError),
|
|
"VTCompressionSessionEncodeFrame failed: %d", (int)status);
|
|
return 0;
|
|
}
|
|
|
|
// Wait for encoding to complete
|
|
VTCompressionSessionCompleteFrames(m_session, kCMTimeInvalid);
|
|
|
|
// Return encoded data
|
|
std::lock_guard<std::mutex> lock(m_outputMutex);
|
|
if (m_outputBuffer.empty()) {
|
|
return 0;
|
|
}
|
|
|
|
*outData = m_outputBuffer.data();
|
|
*outSize = (uint32_t)m_outputBuffer.size();
|
|
return (int)m_outputBuffer.size();
|
|
}
|
|
|
|
void H264Encoder::compressionCallback(void* outputCallbackRefCon,
|
|
void* sourceFrameRefCon,
|
|
OSStatus status,
|
|
VTEncodeInfoFlags infoFlags,
|
|
CMSampleBufferRef sampleBuffer)
|
|
{
|
|
(void)sourceFrameRefCon;
|
|
(void)infoFlags;
|
|
|
|
H264Encoder* encoder = (H264Encoder*)outputCallbackRefCon;
|
|
|
|
if (status != noErr) {
|
|
NSLog(@"H264Encoder: Compression callback error: %d", (int)status);
|
|
return;
|
|
}
|
|
|
|
if (!sampleBuffer) {
|
|
return;
|
|
}
|
|
|
|
encoder->processSampleBuffer(sampleBuffer);
|
|
}
|
|
|
|
void H264Encoder::processSampleBuffer(CMSampleBufferRef sampleBuffer)
|
|
{
|
|
// Check if keyframe
|
|
CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, false);
|
|
bool isKeyframe = false;
|
|
if (attachments && CFArrayGetCount(attachments) > 0) {
|
|
CFDictionaryRef dict = (CFDictionaryRef)CFArrayGetValueAtIndex(attachments, 0);
|
|
CFBooleanRef notSync = (CFBooleanRef)CFDictionaryGetValue(dict,
|
|
kCMSampleAttachmentKey_NotSync);
|
|
isKeyframe = (notSync == nullptr || !CFBooleanGetValue(notSync));
|
|
}
|
|
|
|
std::lock_guard<std::mutex> lock(m_outputMutex);
|
|
m_outputBuffer.clear();
|
|
|
|
// Get format description for SPS/PPS
|
|
CMFormatDescriptionRef formatDesc = CMSampleBufferGetFormatDescription(sampleBuffer);
|
|
|
|
// If keyframe, prepend SPS and PPS
|
|
if (isKeyframe && formatDesc) {
|
|
// Get SPS
|
|
size_t spsSize = 0;
|
|
size_t spsCount = 0;
|
|
const uint8_t* sps = nullptr;
|
|
OSStatus status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(
|
|
formatDesc, 0, &sps, &spsSize, &spsCount, nullptr);
|
|
|
|
if (status == noErr && sps && spsSize > 0) {
|
|
// Write NAL start code + SPS
|
|
uint8_t startCode[] = {0x00, 0x00, 0x00, 0x01};
|
|
m_outputBuffer.insert(m_outputBuffer.end(), startCode, startCode + 4);
|
|
m_outputBuffer.insert(m_outputBuffer.end(), sps, sps + spsSize);
|
|
}
|
|
|
|
// Get PPS
|
|
size_t ppsSize = 0;
|
|
size_t ppsCount = 0;
|
|
const uint8_t* pps = nullptr;
|
|
status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(
|
|
formatDesc, 1, &pps, &ppsSize, &ppsCount, nullptr);
|
|
|
|
if (status == noErr && pps && ppsSize > 0) {
|
|
// Write NAL start code + PPS
|
|
uint8_t startCode[] = {0x00, 0x00, 0x00, 0x01};
|
|
m_outputBuffer.insert(m_outputBuffer.end(), startCode, startCode + 4);
|
|
m_outputBuffer.insert(m_outputBuffer.end(), pps, pps + ppsSize);
|
|
}
|
|
}
|
|
|
|
// Get encoded data
|
|
CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
|
|
if (!blockBuffer) {
|
|
return;
|
|
}
|
|
|
|
size_t totalLength = 0;
|
|
size_t lengthAtOffset = 0;
|
|
char* dataPointer = nullptr;
|
|
|
|
OSStatus status = CMBlockBufferGetDataPointer(
|
|
blockBuffer, 0, &lengthAtOffset, &totalLength, &dataPointer);
|
|
|
|
if (status != noErr || !dataPointer) {
|
|
return;
|
|
}
|
|
|
|
// Get NAL unit length size from format description (usually 4 bytes)
|
|
int nalLengthSize = 4;
|
|
if (formatDesc) {
|
|
int tmpNalLengthSize = 0;
|
|
status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(
|
|
formatDesc, 0, nullptr, nullptr, nullptr, &tmpNalLengthSize);
|
|
if (status == noErr && tmpNalLengthSize > 0 && tmpNalLengthSize <= 4) {
|
|
nalLengthSize = tmpNalLengthSize;
|
|
}
|
|
}
|
|
|
|
// Convert AVCC format (length-prefixed) to Annex B (start code prefixed)
|
|
size_t offset = 0;
|
|
while (offset < totalLength) {
|
|
// Read NAL unit length (big-endian, variable size)
|
|
uint32_t nalLength = 0;
|
|
const uint8_t* lengthPtr = (const uint8_t*)dataPointer + offset;
|
|
for (int i = 0; i < nalLengthSize; i++) {
|
|
nalLength = (nalLength << 8) | lengthPtr[i];
|
|
}
|
|
offset += nalLengthSize;
|
|
|
|
if (nalLength > 0 && offset + nalLength <= totalLength) {
|
|
// Write NAL start code
|
|
uint8_t startCode[] = {0x00, 0x00, 0x00, 0x01};
|
|
m_outputBuffer.insert(m_outputBuffer.end(), startCode, startCode + 4);
|
|
|
|
// Write NAL data
|
|
m_outputBuffer.insert(m_outputBuffer.end(),
|
|
(uint8_t*)dataPointer + offset,
|
|
(uint8_t*)dataPointer + offset + nalLength);
|
|
}
|
|
|
|
offset += nalLength;
|
|
}
|
|
}
|