Fix(client): harden TCP heartbeat against half-dead connections
This commit is contained in:
@@ -47,6 +47,13 @@ CONNECT_ADDRESS g_SETTINGS = { FLAG_GHOST, "91.99.165.207", "443", CLIENT_TYPE_L
|
||||
State g_bExit = S_CLIENT_NORMAL;
|
||||
static std::atomic<bool> g_needResendLogin(false); // 分组变更后需要重发登录信息
|
||||
|
||||
// 上次收到 HeartbeatACK 的 wall-clock 时间戳(ms),0 表示新连接刚建立尚未喂初值。
|
||||
// 心跳循环用它检测应用层超时:TCP send() 永远不会因半死连接报错(数据塞进 SNDBUF
|
||||
// 立即返回成功),必须靠 ACK 缺失来感知链路死亡。用 wall-clock 而非 monotonic:
|
||||
// VM/笔记本挂起期间 system_clock 继续推进,恢复后能立即识别"几分钟没收到 ACK",
|
||||
// 这是相比 TCP_USER_TIMEOUT(内核层) 的关键互补价值。
|
||||
static std::atomic<uint64_t> g_lastHeartbeatAckMs(0);
|
||||
|
||||
// 客户端 ID(V2 文件传输需要)
|
||||
uint64_t g_myClientID = 0;
|
||||
|
||||
@@ -390,6 +397,7 @@ int DataProcess(void* user, PBYTE szBuffer, ULONG ulLength)
|
||||
if (ulLength >= 1 + sizeof(HeartbeatACK)) {
|
||||
HeartbeatACK* ack = (HeartbeatACK*)(szBuffer + 1);
|
||||
uint64_t now = GetUnixMs();
|
||||
g_lastHeartbeatAckMs.store(now, std::memory_order_relaxed); // 喂应用层 ACK 看门狗
|
||||
double rtt_ms = (double)(now - ack->Time);
|
||||
g_rttEstimator.update_from_sample(rtt_ms);
|
||||
// 心跳节奏太密日志会刷屏;最多 60s 一行
|
||||
@@ -966,6 +974,9 @@ int main(int argc, char* argv[])
|
||||
ClientAuth::OnNewConnection();
|
||||
ClientObject->SendLoginInfo(logInfo.Speed(clock() - c));
|
||||
|
||||
// 新连接:把 ACK 看门狗喂到当前时间,避免循环刚进来就被误判为超时
|
||||
g_lastHeartbeatAckMs.store(GetUnixMs(), std::memory_order_relaxed);
|
||||
|
||||
// 心跳保活循环:定时发送心跳包,服务端回复后动态更新 RTT
|
||||
while (ClientObject->IsRunning() && ClientObject->IsConnected() && S_CLIENT_NORMAL == g_bExit) {
|
||||
// 检查是否需要重发登录信息(分组变更后)
|
||||
@@ -1000,6 +1011,28 @@ int main(int argc, char* argv[])
|
||||
break;
|
||||
}
|
||||
|
||||
// 应用层 ACK 看门狗:超过 max(60s, interval*3) 没收到 HeartbeatACK 就
|
||||
// 主动断开走重连。专治 TCP send() 在半死连接下永远返回成功的盲区——
|
||||
// VM 挂起恢复 / 笔记本合盖唤醒 / NAT 表项老化等场景,对端早已不在,
|
||||
// 但本端 send() 仍把字节塞进 SNDBUF,IsConnected() 一直为真。
|
||||
// 与服务端 CheckHeartbeat 超时(2015RemoteDlg.cpp 的 max(60, ReportInterval*3))
|
||||
// 对齐:服务端删 host 时本端也能感知到,立即重连而不是等数据卡 ~15 分钟。
|
||||
// 这一层不依赖 TCP_USER_TIMEOUT,跨平台必备。
|
||||
{
|
||||
int ackTimeoutSec = (interval * 3 > 60) ? interval * 3 : 60;
|
||||
const uint64_t ackTimeoutMs = (uint64_t)ackTimeoutSec * 1000ULL;
|
||||
uint64_t lastAck = g_lastHeartbeatAckMs.load(std::memory_order_relaxed);
|
||||
uint64_t nowMs = GetUnixMs();
|
||||
if (lastAck > 0 && nowMs > lastAck && nowMs - lastAck > ackTimeoutMs) {
|
||||
Mprintf(">>> Heartbeat ACK timeout: %llu ms since last ACK "
|
||||
"(threshold=%llu ms), reconnecting\n",
|
||||
(unsigned long long)(nowMs - lastAck),
|
||||
(unsigned long long)ackTimeoutMs);
|
||||
ClientObject->Disconnect();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 构造并发送心跳包(与 Windows 端 KernelManager::SendHeartbeat 格式一致)
|
||||
// ActiveWnd 直接发 UTF-8——与 LOGIN_INFOR.moduleVersion 中声明的
|
||||
// CLIENT_CAP_UTF8 一致;服务端按 cap 位用 CP_UTF8 解码。早期为兼容
|
||||
|
||||
Reference in New Issue
Block a user