适配微信4.0.3正式版,修复表情包和合并转发的聊天记录解析失败的问题
This commit is contained in:
@@ -26,7 +26,7 @@ def parser_emoji(xml_content):
|
||||
'height': 0,
|
||||
'desc': ''
|
||||
}
|
||||
xml_content = xml_content.strip()
|
||||
xml_content = xml_content.strip().replace('&', '&')
|
||||
try:
|
||||
xml_dict = xmltodict.parse(xml_content)
|
||||
emoji_dic = xml_dict.get('msg', {}).get('emoji', {})
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
@Description :
|
||||
"""
|
||||
import html
|
||||
import re
|
||||
import traceback
|
||||
from datetime import datetime, timedelta
|
||||
import xml.etree.ElementTree as ET
|
||||
@@ -206,13 +207,27 @@ def parser_business(xml_content):
|
||||
return result
|
||||
|
||||
|
||||
def replace_entity(match):
|
||||
# 获取匹配的数字
|
||||
return ''
|
||||
|
||||
|
||||
def process_xml(xml_string):
|
||||
# 使用正则表达式替换所有十进制转义字符
|
||||
processed_xml = re.sub(r'&#(\d+);', replace_entity, xml_string)
|
||||
return processed_xml
|
||||
|
||||
|
||||
def parser_record_item(recorditem, output_dir, wxid, msg_time, level=0):
|
||||
xml_string = recorditem
|
||||
if isinstance(xml_string, dict):
|
||||
recorditem_dic = xml_string
|
||||
else:
|
||||
recorditem_dic = xmltodict.parse(xml_string)
|
||||
|
||||
try:
|
||||
recorditem_dic = xmltodict.parse(xml_string)
|
||||
except:
|
||||
xml_string = process_xml(xml_string)
|
||||
recorditem_dic = xmltodict.parse(xml_string)
|
||||
# logger.error(recorditem_dic)
|
||||
datalist = recorditem_dic.get('recordinfo', {}).get('datalist', {})
|
||||
count = datalist.get('@count', 0)
|
||||
@@ -522,7 +537,7 @@ def parser_record_item(recorditem, output_dir, wxid, msg_time, level=0):
|
||||
return result
|
||||
|
||||
|
||||
def parser_merged_messages(xml, output_dir, wxid, msg_time, level=0):
|
||||
def parser_merged_messages(xml: str, output_dir, wxid, msg_time, level=0):
|
||||
try:
|
||||
try:
|
||||
data_dic = xmltodict.parse(xml).get('msg', {})
|
||||
@@ -543,8 +558,8 @@ def parser_merged_messages(xml, output_dir, wxid, msg_time, level=0):
|
||||
}
|
||||
except:
|
||||
logger.error(xml)
|
||||
logger.error(new_xml1)
|
||||
logger.error(new_xml2)
|
||||
# logger.error(new_xml1)
|
||||
# logger.error(new_xml2)
|
||||
logger.error(traceback.format_exc())
|
||||
# raise ValueError('合并转发的消息解析失败')
|
||||
return {
|
||||
|
||||
@@ -3,9 +3,11 @@ syntax = "proto3";
|
||||
message PackedInfoDataImg2 {
|
||||
int32 field1 = 1;
|
||||
int32 field2 = 2;
|
||||
ImageInfo imageInfo = 3;
|
||||
VideoInfo videoInfo = 4;
|
||||
FileInfo fileInfo = 7;
|
||||
ImageInfo imageInfo = 3; // 图片
|
||||
VideoInfo videoInfo = 4; // 视频
|
||||
AudioInfo audioInfo = 5; // 语音
|
||||
FileInfo fileInfo = 7; // 文件
|
||||
MergeInfo mergeInfo = 9; // 合并转发的聊天记录
|
||||
}
|
||||
|
||||
message ImageInfo {
|
||||
@@ -35,4 +37,13 @@ message FileSubMessage2 {
|
||||
string field1 = 1;
|
||||
string field2 = 2;
|
||||
string field3 = 3;
|
||||
}
|
||||
|
||||
message MergeInfo {
|
||||
string dir = 1;
|
||||
}
|
||||
|
||||
message AudioInfo {
|
||||
uint32 field1 = 1;
|
||||
string audioTxt = 2; // 语音转文字结果
|
||||
}
|
||||
@@ -190,7 +190,7 @@ class Singleton:
|
||||
self.contacts[wxid] = manager.get_contact_by_username(wxid)
|
||||
if isinstance(message[12], bytes):
|
||||
message_content = decompress(message[12])
|
||||
message_content = message_content.replace('', '').replace(' ', ' ')
|
||||
message_content = message_content.replace('', '').replace(' ', ' ')
|
||||
# logger.error(message_content)
|
||||
else:
|
||||
message_content = message[12]
|
||||
@@ -198,7 +198,9 @@ class Singleton:
|
||||
2] != MessageType.Pat:
|
||||
# 群聊文字消息格式:<wxid>:<content>
|
||||
message_content = ':'.join(message_content.split(':')[1:]).strip()
|
||||
|
||||
if message_content and message_content.startswith(username):
|
||||
# md 微信不知道在搞什么,弄一些乱七八糟的东西 4.0.3.22
|
||||
message_content = message_content.strip(f'{username}:').replace('<?xml version="1.0"?>', '')
|
||||
return is_sender, wxid, message_content
|
||||
|
||||
|
||||
@@ -876,7 +878,7 @@ class FileMessageFactory(MessageFactory, Singleton):
|
||||
is_sender, wxid, message_content = self.common_attribute(message, username, manager)
|
||||
info = parser_file(message_content)
|
||||
md5 = info.get('md5', '')
|
||||
filename = info.get('filename','')
|
||||
filename = info.get('filename', '')
|
||||
if not filename:
|
||||
try:
|
||||
# 2025年3月微信4.0.3正式版修改了img命名方式才有了这个东西
|
||||
|
||||
Reference in New Issue
Block a user