新增年度关键词

This commit is contained in:
SiYuan
2024-02-09 15:23:13 +08:00
parent 85cff9aaea
commit 163b2ef269
3 changed files with 123 additions and 12 deletions

View File

@@ -3,6 +3,8 @@ from collections import Counter
import sys
from datetime import datetime
import jieba
from app.DataBase import msg_db, MsgType
from pyecharts import options as opts
from pyecharts.charts import WordCloud, Calendar, Bar, Line, Pie
@@ -64,6 +66,46 @@ def wordcloud_(wxid, time_range=None):
}
def get_wordcloud(text):
total_msg_len = len(text)
# 使用jieba进行分词并加入停用词
words = jieba.cut(text)
# 统计词频
word_count = Counter(words)
# 过滤停用词
stopwords_file = './app/data/stopwords.txt'
with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
stopwords1 = set(stopword_file.read().splitlines())
# 构建 FFmpeg 可执行文件的路径
stopwords = set()
stopwords_file = './app/resources/data/stopwords.txt'
if not os.path.exists(stopwords_file):
resource_dir = getattr(sys, '_MEIPASS', os.path.abspath(os.path.dirname(__file__)))
stopwords_file = os.path.join(resource_dir, 'app', 'resources', 'data', 'stopwords.txt')
with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
stopwords = set(stopword_file.read().splitlines())
stopwords = stopwords.union(stopwords1)
filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords}
# 转换为词云数据格式
data = [(word, count) for word, count in filtered_word_count.items()]
# text_data = data
data.sort(key=lambda x: x[1], reverse=True)
text_data = data[:100] if len(data) > 100 else data
# 创建词云图
keyword, max_num = text_data[0]
w = (
WordCloud()
.add(series_name="聊天文字", data_pair=text_data, word_size_range=[5, 40])
)
return {
'chart_data_wordcloud': w.dump_options_with_quotes(),
'keyword': keyword,
'keyword_max_num': max_num,
}
def wordcloud_christmas(wxid, year='2023'):
import jieba
txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, year)
@@ -375,6 +417,8 @@ def my_message_counter(time_range, my_name=''):
types_count = {}
send_num = 0 # 发送消息的数量
weekday_count = {}
str_content = ''
total_text_num = 0
for message in msg_data:
type_ = message[2]
is_sender = message[4]
@@ -393,6 +437,10 @@ def my_message_counter(time_range, my_name=''):
weekday_count[weekday] += 1
else:
weekday_count[weekday] = 1
if type_ == 1:
total_text_num += len(message[7])
if is_sender == 1:
str_content += message[7]
receive_num = len(msg_data) - send_num
data = [[types_.get(key), value] for key, value in types_count.items() if key in types_]
if not data:
@@ -418,7 +466,7 @@ def my_message_counter(time_range, my_name=''):
Pie()
.add(
"",
[['发送', send_num], ['接收',receive_num ]],
[['发送', send_num], ['接收', receive_num]],
center=["40%", "50%"],
)
.set_global_opts(
@@ -428,9 +476,14 @@ def my_message_counter(time_range, my_name=''):
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}\n{d}%", position='inside'))
# .render("./data/聊天统计/pie_scroll_legend.html")
)
w = get_wordcloud(str_content)
return {
'chart_data_sender': p2.dump_options_with_quotes(),
'chart_data_types': p1.dump_options_with_quotes(),
'chart_data_wordcloud': w.get('chart_data_wordcloud'),
'keyword': w.get('keyword'),
'keyword_max_num': w.get('keyword_max_num'),
'total_text_num':total_text_num,
}