1.搜索

搜索:new websocket

image-20240517205651868

image-20240517205701776

image-20240517205711634

2.PushFrame

image-20240517205721870

image-20240517205730424

image-20240517205738416

image-20240517205745105

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
message HeadersList {
string key = 1;
string value = 2;
}

message PushFrame {
uint64 seqid = 1;
uint64 logid = 2;
uint64 service = 3;
uint64 method = 4;
repeated HeadersList headersList = 5;
string payloadEncoding = 6;
string payloadType = 7;
bytes payload = 8;
}

3.Response

image-20240517205753812

image-20240517205802779

image-20240517205811389

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
message Message{
string method = 1;
bytes payload = 2;
int64 msgId = 3;
int32 msgType = 4;
int64 offset = 5;
bool needWrdsStore = 6;
int64 wrdsVersion = 7;
string wrdsSubKey = 8;
}

message Response {
repeated Message messagesList = 1;
string cursor = 2;
uint64 fetchInterval = 3;
uint64 now = 4;
string internalExt = 5;
uint32 fetchType = 6;
map<string, string> routeParams = 7;
uint64 heartbeatDuration = 8;
bool needAck = 9;
string pushServer = 10;
string liveCursor = 11;
bool historyNoMore = 12;
}

4.消息处理

image-20240517205834384

image-20240517205842342

image-20240517205852013

image-20240517205903364

image-20240517205912346

4.1 MemberMessage

image-20240517205920594

4.2 ChatMessage

image-20240517205928178

image-20240517205936578

image-20240517205947696

5.ACK心跳

image-20240517205955362

payload是什么?

image-20240517210003216

image-20240517210010433

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
function getPayLoad(e) {
const t = [];
for (const o of e) {
const e = o.charCodeAt(0);
e < 128 ? t.push(e) : e < 2048 ? (t.push(192 + (e >> 6)),
t.push(128 + (63 & e))) : e < 65536 && (t.push(224 + (e >> 12)),
t.push(128 + (e >> 6 & 63)),
t.push(128 + (63 & e)))
}
return Uint8Array.from(t)
}

const arg = "internal_src:pushserver|wss_push_room_id:7181868093256338235|wss_push_did:7181865126873220619|wss_push_log_id:8166207734905913069|wss_fetch_ms:1672159517145|wss_push_ms:1672159517209|wss_msg_type:r|wrds_kvs:RoomLinkMicAnchorSettingsSyncData-1672159139787371174_RoomLinkMicSyncData-1672159515222677464_WebcastRoomRankMessage-1672159367938299538_WebcastRoomStatsMessage-1672159511846110620"
const payload = getPayLoad(arg);
console.log(payload);

image-20240517210021079

image-20240517210028149

6.结构示例(简化)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
syntax = "proto3";

package douyin;

message HeadersList {
string key = 1;
string value = 2;
}

message PushFrame {
uint64 seqId = 1;
uint64 logId = 2;
uint64 service = 3;
uint64 method = 4;
repeated HeadersList headersList = 5;
string payloadEncoding = 6;
string payloadType = 7;
bytes payload = 8;
}


message Message {
string method = 1;
bytes payload = 2;
int64 msgId = 3;
int32 msgType = 4;
int64 offset = 5;
bool needWrdsStore = 6;
int64 wrdsVersion = 7;
string wrdsSubKey = 8;
}

message Response {
repeated Message messagesList = 1;
string cursor = 2;
uint64 fetchInterval = 3;
uint64 now = 4;
string internalExt = 5;
uint32 fetchType = 6;
map<string, string> routeParams = 7;
uint64 heartbeatDuration = 8;
bool needAck = 9;
string pushServer = 10;
string liveCursor = 11;
bool historyNoMore = 12;
}


message ChatMessage {
User user = 2;
string content = 3;
bool visibleToSender = 4;
}


message User {
uint64 id = 1;
uint64 shortId = 2;
string nickName = 3;
uint32 gender = 4;
string Signature = 5;
uint32 Level = 6;
uint64 Birthday = 7;
string Telephone = 8;
string city = 14;
}

7.解析抖音数据

1
protoc  --python_out=.  douyin.proto

image-20240517210038536

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import gzip
import binascii
from douyin_pb2 import PushFrame, Response, ChatMessage

# WebcastChatMessage
hexStr = "08bf0510e292e6bcb1958e8d7918b84520082a150a0d636f6d70726573735f747970651204677a69702ae2020a0f696d2d696e7465726e616c5f65787412ce02696e7465726e616c5f7372633a707573687365727665727c7773735f707573685f726f6f6d5f69643a373138313832333430383336323031353534307c7773735f707573685f6469643a373134303435393934333735363330313835347c7773735f707573685f6c6f675f69643a383732363334393533353437303235383533307c7773735f66657463685f6d733a313637323135373038313834317c7773735f707573685f6d733a313637323135373038313932307c7773735f6d73675f747970653a727c777264735f6b76733a41756469656e63654769667453796e63446174612d313637323135373037393430393233333936385f57656263617374526f6f6d52616e6b4d6573736167652d313637323135373037373039303839333937345f57656263617374526f6f6d53746174734d6573736167652d313637323135373037373035383332383131312a440a09696d2d637572736f721237742d313637323135373038313932305f722d373138313835393937363636393836353731325f642d315f752d315f682d315f7264632d322a170a06696d2d6e6f77120d313637323135373038313932302a190a0e696d2d6c6976655f637572736f721207642d315f752d31320270623a036d736742de081f8b08000000000000ffe4565d685b551cdf49ba255eb659af0f86f96058df0649ceb9e77e06fc48d30f1bfbb1b5daba821c6fee479236c9bddc7b9336a588b859566bd7e183639b03ad8c49b56c9622b59b43441f36410652f455489ab688b03e8a0fd2366bb611d1aa45d1fb7239e7ff3bbfdff9fffe7f0e7feaf54729ba4f8b2bb2ed4493b2d3a1d9b69cd0e8224d31b502f58b1f2c9d995cff6a49f1cdbf7d62e1c75b9f2d2910bc38b70e3e072b5efabd47bcdf8ddf1ebbfee62550fffd9d93cb3f8343079617e69767af95cf9f5cfdf87ad00f62a3d448d2714c3b1c0a990c1f548d5c219535534a503132217948cb682104e13082706b1590f3b2235b21c7b0034a3620e79d00848823a2a8aa2a62b02a4930ceb02c5445468deb2c94385dd7242538606a89a775cbc83c89212ff002c2989d0123d40b1bf21bea3830b495e00397486512a14a2494b3358b242c59d5485acb6b6992e70862836636f18a63a6f301233e104c65e484e6abf7fb21687cdc77f0f0632b93efafcc8c9767c7cb8b97115b5ebc5c9a98287ff8ee0c3805a8ee9da8eb72d656d2b9f85d6d9e7035a4a1a711520fad4e9f2ddf385d3e3d4e7b1a5a363fdfdec3beedddf2dc7871e162e9c2f4d65de680c77bc2557fc00716c197001e3c0f5eda5563a601a1fa76c2bf597b62ca858a08334c10fc0d85fdfefd107c01bef55045f73d46d4eff581c337ddd41af0d6d12540f5fe45ebc970c6ac215fe5e5ff565ebae10f9ce9de47d5edd9f314dd5ff953df00af8bbeb92b7d56e5fc9389d64af289dfc1d3d57ac29fc65e9d2a4efeb28f5e757b015db3d6ffb5f4ff57cd5babd6afb9f6ccb93ccff7238421bfee123a7ad8a1f6814863241289b4b546f91eb539cd633ddee934a128968ff73101fd98dedb859d967e22c6da638d562ca10e5e75bbd7dcffe0dbbfe6fef7bcfd6bee7776ad9f20def97d1e6c83337560baeefeb9e1565deb56e1bbb826a68b353bd17022ab1e6d563b8eb5750826db36d81b159f8b75377614743dd73b820b46a7291e4f9b9ade251acd4e7b53dbd1f6be162da91cf297cfdd289d1a2b7e3a816069ea4af1daa5f285f9e5b7664b936f94ae4e2d9ffd2416a75ede79a3a4e5b896269696c8a5658b2849d9a9e1ce114f43341ac1113c7ceeca4717f7f9b6a7293ae304102f308813a0884416112b202011899c24093ccf4b22cf098821eae6ae200a10618424816131c42457c1321287788ee3781e319024ef32f01009104b10320c2496aa0418df8ac77fe787dbd34bf0c8d7ae54d6d1acac9c26b6a584cd9c9db4352baf59a343b64d3696c4328c0c49a9e14d3a06b350c43c0311c7b1b00a52536a18b13c2b8918439663b108b96a346d2436184481e1312b71986305c8708284d12646d71c25493276f83e0faac76b463276823805530b5ba343966a93c1bc1d8ee4d4949655b4d694eef414b24a93ecc8dbc60a120b25066389174965b4ed368c4cb79c1dac8cb755a80025284a5812d87ba13d8eecd835b09c88191121f4ccc3cf825f010000ffff010000ffffa1814e1c530b0000"

# 1.将16进制字符串转换为原始字节
body_bytes = binascii.unhexlify(hexStr)

# 2.根据PushFrame结构 + 原始自己,生成数据对象
frame = PushFrame()
frame.ParseFromString(body_bytes)

# 3.对PushFrame的 payload 内容进行gzip解压
origin_bytes = gzip.decompress(frame.payload)

# 4.根据Response+gzip解压数据,生成数据对象
response = Response()
response.ParseFromString(origin_bytes)

# 5.获取数据内容(需根据不同method,使用不同的结构对象对 数据 进行解析)
# 注意:此处只处理 WebcastChatMessage ,其他处理方式都是类似的。

for item in response.messagesList:
if item.method != "WebcastChatMessage":
continue

message = ChatMessage()
message.ParseFromString(item.payload)
info = f"【{message.user.nickName}{message.content}"
print(info)

__END__