新增年度关键词
This commit is contained in:
@@ -3,6 +3,8 @@ from collections import Counter
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
import jieba
|
||||
|
||||
from app.DataBase import msg_db, MsgType
|
||||
from pyecharts import options as opts
|
||||
from pyecharts.charts import WordCloud, Calendar, Bar, Line, Pie
|
||||
@@ -64,6 +66,46 @@ def wordcloud_(wxid, time_range=None):
|
||||
}
|
||||
|
||||
|
||||
def get_wordcloud(text):
|
||||
total_msg_len = len(text)
|
||||
# 使用jieba进行分词,并加入停用词
|
||||
words = jieba.cut(text)
|
||||
# 统计词频
|
||||
word_count = Counter(words)
|
||||
# 过滤停用词
|
||||
stopwords_file = './app/data/stopwords.txt'
|
||||
with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
|
||||
stopwords1 = set(stopword_file.read().splitlines())
|
||||
# 构建 FFmpeg 可执行文件的路径
|
||||
stopwords = set()
|
||||
stopwords_file = './app/resources/data/stopwords.txt'
|
||||
if not os.path.exists(stopwords_file):
|
||||
resource_dir = getattr(sys, '_MEIPASS', os.path.abspath(os.path.dirname(__file__)))
|
||||
stopwords_file = os.path.join(resource_dir, 'app', 'resources', 'data', 'stopwords.txt')
|
||||
with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
|
||||
stopwords = set(stopword_file.read().splitlines())
|
||||
stopwords = stopwords.union(stopwords1)
|
||||
|
||||
filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords}
|
||||
# 转换为词云数据格式
|
||||
data = [(word, count) for word, count in filtered_word_count.items()]
|
||||
# text_data = data
|
||||
data.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
text_data = data[:100] if len(data) > 100 else data
|
||||
# 创建词云图
|
||||
keyword, max_num = text_data[0]
|
||||
w = (
|
||||
WordCloud()
|
||||
.add(series_name="聊天文字", data_pair=text_data, word_size_range=[5, 40])
|
||||
)
|
||||
return {
|
||||
'chart_data_wordcloud': w.dump_options_with_quotes(),
|
||||
'keyword': keyword,
|
||||
'keyword_max_num': max_num,
|
||||
}
|
||||
|
||||
|
||||
def wordcloud_christmas(wxid, year='2023'):
|
||||
import jieba
|
||||
txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, year)
|
||||
@@ -375,6 +417,8 @@ def my_message_counter(time_range, my_name=''):
|
||||
types_count = {}
|
||||
send_num = 0 # 发送消息的数量
|
||||
weekday_count = {}
|
||||
str_content = ''
|
||||
total_text_num = 0
|
||||
for message in msg_data:
|
||||
type_ = message[2]
|
||||
is_sender = message[4]
|
||||
@@ -393,6 +437,10 @@ def my_message_counter(time_range, my_name=''):
|
||||
weekday_count[weekday] += 1
|
||||
else:
|
||||
weekday_count[weekday] = 1
|
||||
if type_ == 1:
|
||||
total_text_num += len(message[7])
|
||||
if is_sender == 1:
|
||||
str_content += message[7]
|
||||
receive_num = len(msg_data) - send_num
|
||||
data = [[types_.get(key), value] for key, value in types_count.items() if key in types_]
|
||||
if not data:
|
||||
@@ -418,7 +466,7 @@ def my_message_counter(time_range, my_name=''):
|
||||
Pie()
|
||||
.add(
|
||||
"",
|
||||
[['发送', send_num], ['接收',receive_num ]],
|
||||
[['发送', send_num], ['接收', receive_num]],
|
||||
center=["40%", "50%"],
|
||||
)
|
||||
.set_global_opts(
|
||||
@@ -428,9 +476,14 @@ def my_message_counter(time_range, my_name=''):
|
||||
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}\n{d}%", position='inside'))
|
||||
# .render("./data/聊天统计/pie_scroll_legend.html")
|
||||
)
|
||||
w = get_wordcloud(str_content)
|
||||
return {
|
||||
'chart_data_sender': p2.dump_options_with_quotes(),
|
||||
'chart_data_types': p1.dump_options_with_quotes(),
|
||||
'chart_data_wordcloud': w.get('chart_data_wordcloud'),
|
||||
'keyword': w.get('keyword'),
|
||||
'keyword_max_num': w.get('keyword_max_num'),
|
||||
'total_text_num':total_text_num,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user