1. 参考代码
import base64
import hashlib
import hmac
import json
import random
import re
import time
from datetime import datetime, timedelta
import requests
tenant_access_token = ""
expire_time = datetime.now() - timedelta(seconds=6)
# https://open.feishu.cn/document/server-docs/authentication-management/access-token/tenant_access_token_internal
def renew_tenant_access_token():
global tenant_access_token
global expire_time
data = {
"app_id": "cli_your_app_id",
"app_secret": "your_app_secret",
}
headers = {
"Content-Type": "application/json; charset=utf-8",
}
url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal"
response = requests.post(url, json=data, headers=headers)
print("send get_tenant_access_token post")
# Print response content as text
print("response text: {}".format(response.text))
data = json.loads(response.text)
tenant_access_token = data["tenant_access_token"]
expire_time = datetime.now() + timedelta(seconds=(data["expire"] - 10))
return
def record_chat_history(chat_id, counts_limit, page_size):
page_token = None
counts = 0
with open(f"record_{chat_id}.txt", "w") as file:
while True:
response = send_history_request(chat_id, page_token, page_size)
page_token = response.json().get("data", {}).get("page_token", {})
print(f"page_token:{page_token}")
contents = []
for i in range(0, len(response.json().get("data").get("items"))):
temp_content = (
response.json()
.get("data")
.get("items")[i]
.get("body")
.get("content")
)
# print(f"before match : {temp_content}")
match_result = re.search(r'^{"text":"(.*?)"', temp_content)
if match_result != None:
aim_content = match_result.group(1)
# print(f"after match : {aim_content}")
if aim_content != None:
contents.append(aim_content)
counts = counts + 1
# print(str(contents))
for content in contents:
file.write(f"{content}\n")
has_more = response.json().get("data").get("has_more")
if has_more != True or counts >= counts_limit:
print(f"record end:{chat_id}")
break
def send_history_request(chat_id, page_token, page_size):
global expire_time
global tenant_access_token
if expire_time < datetime.now():
renew_tenant_access_token()
parameters = {
"container_id_type": "chat",
"container_id": chat_id,
"sort_type": "ByCreateTimeDesc",
"page_size": page_size,
}
if page_token != None:
parameters["page_token"] = page_token
headers = {
"Authorization": "Bearer {}".format(tenant_access_token),
}
url = "https://open.feishu.cn/open-apis/im/v1/messages"
response = requests.get(url, params=parameters, headers=headers)
# print(f"sent a history request of chat_id:{chat_id}")
# print(f"get response:{response.json()}")
return response
def send_chat_id_request(page_token, page_size):
global expire_time
global tenant_access_token
if expire_time < datetime.now():
renew_tenant_access_token()
parameters = {}
if page_token != None:
parameters["page_token"] = page_token
if page_size != None:
parameters["page_size"] = page_size
headers = {
"Authorization": "Bearer {}".format(tenant_access_token),
}
url = "https://open.feishu.cn/open-apis/im/v1/chats"
response = requests.get(url, params=parameters, headers=headers)
# print(f"sent a history request of chat_id:{chat_id}")
# print(f"get response:{response.json()}")
return response
if __name__ == "__main__":
page_size = 20
counts_limit = 10000
page_token = None
chat_id_response = send_chat_id_request(page_token, page_size)
chat_ids = []
for i in range(0, len(chat_id_response.json().get("data").get("items"))):
temp_id = chat_id_response.json().get("data").get("items")[i].get("chat_id")
chat_ids.append(temp_id)
print(str(chat_ids))
for chat_id in chat_ids:
record_chat_history(chat_id, counts_limit, page_size)
1. 说明
1.1 效果
- 自动拉取机器人所在的群组(暂只拉头20个,可修改代码拉完)
- 拉取每个群组的历史消息中的文本消息(条数由 counts_limit 限制)
- 将每个群组的历史消息各自写进一个同目录下的 txt 文件
正常运行状态如下
1.2 准备
需要创建飞书应用,并添加机器人
https://open.feishu.cn/?lang=zh-CN
需要有 读取群信息 im:chat.group_info:readonly 的权限,没有在飞书应用平台上申请
将创建的应用的 app_id 和 app_secret 填入代码
"app_id": "cli_your_app_id",
"app_secret": "your_app_secret",
再把机器人添加到群内
运行即可
1.3 逻辑简述
renew_tenant_access_token()
- 参考官方文档:https://open.feishu.cn/document/server-docs/authentication-management/access-token/tenant_access_token_internal
- 用于向飞书请求限时有效的 tenant_access_token
- 发送其他请求时,需要在 header 中附带上这个 tenant_access_token
- 所以在其他操作前都需要 检查/更新 一下 tenant_access_token
send_chat_id_request()
- 参考官方文档:https://open.feishu.cn/document/server-docs/group/chat/search?appId=cli_a5d05c6f8cf8100e
- 用于请求拉取机器人所在的群组信息,特别是 chat_id 信息
- 每次拉取 page_size 个群消息,如果没拉完,飞书返回消息中 has_more 会为 True,并且带有一个 page_token,拉下一波的时候带上page_token即可。不过我代码偷懒了,只拉了一波
record_chat_history()
- 基于 chat_id 去拉历史消息,并写入文件 record_{chat_id}.txt
- 拉到没有更多消息(has_more 为 False),或者文本信息数量超过 counts_limit ,才会停止
send_history_request()
- 参考官方文档:https://open.feishu.cn/document/server-docs/im-v1/message/list?appId=cli_a5d05c6f8cf8100e
- 用于请求拉取机器人所在的群组聊天信息
- 每次拉取 page_size 个群消息,如果没拉完,飞书返回消息中 has_more 会为 True,并且带有一个 page_token,拉下一波的时候带上page_token即可。