python飞书机器人拉群聊历史消息

1. 参考代码

import base64
import hashlib
import hmac
import json
import random
import re
import time
from datetime import datetime, timedelta

import requests

tenant_access_token = ""
expire_time = datetime.now() - timedelta(seconds=6)



# https://open.feishu.cn/document/server-docs/authentication-management/access-token/tenant_access_token_internal
def renew_tenant_access_token():
    global tenant_access_token
    global expire_time
    data = {
        "app_id": "cli_your_app_id",
        "app_secret": "your_app_secret",
    }
    headers = {
        "Content-Type": "application/json; charset=utf-8",
    }
    url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal"
    response = requests.post(url, json=data, headers=headers)
    print("send get_tenant_access_token post")

    # Print response content as text
    print("response text: {}".format(response.text))

    data = json.loads(response.text)
    tenant_access_token = data["tenant_access_token"]
    expire_time = datetime.now() + timedelta(seconds=(data["expire"] - 10))
    return


def record_chat_history(chat_id, counts_limit, page_size):
    page_token = None
    counts = 0
    with open(f"record_{chat_id}.txt", "w") as file:
        while True:
            response = send_history_request(chat_id, page_token, page_size)
            page_token = response.json().get("data", {}).get("page_token", {})
            print(f"page_token:{page_token}")
            contents = []
            for i in range(0, len(response.json().get("data").get("items"))):
                temp_content = (
                    response.json()
                    .get("data")
                    .get("items")[i]
                    .get("body")
                    .get("content")
                )
                #                     print(f"before match : {temp_content}")
                match_result = re.search(r'^{"text":"(.*?)"', temp_content)
                if match_result != None:
                    aim_content = match_result.group(1)
                    #                         print(f"after match : {aim_content}")
                    if aim_content != None:
                        contents.append(aim_content)
                        counts = counts + 1
#             print(str(contents))
            for content in contents:
                file.write(f"{content}\n")
            has_more = response.json().get("data").get("has_more")
            if has_more != True or counts >= counts_limit:
                print(f"record end:{chat_id}")
                break


def send_history_request(chat_id, page_token, page_size):
    global expire_time
    global tenant_access_token
    if expire_time < datetime.now():
        renew_tenant_access_token()
    parameters = {
        "container_id_type": "chat",
        "container_id": chat_id,
        "sort_type": "ByCreateTimeDesc",
        "page_size": page_size,
    }
    if page_token != None:
        parameters["page_token"] = page_token
    headers = {
        "Authorization": "Bearer {}".format(tenant_access_token),
    }
    url = "https://open.feishu.cn/open-apis/im/v1/messages"
    response = requests.get(url, params=parameters, headers=headers)
    #     print(f"sent a history request of chat_id:{chat_id}")
    #     print(f"get response:{response.json()}")
    return response


def send_chat_id_request(page_token, page_size):
    global expire_time
    global tenant_access_token
    if expire_time < datetime.now():
        renew_tenant_access_token()
    parameters = {}
    if page_token != None:
       parameters["page_token"] = page_token
       if page_size != None:
         parameters["page_size"] = page_size
    headers = {
        "Authorization": "Bearer {}".format(tenant_access_token),
    }
    url = "https://open.feishu.cn/open-apis/im/v1/chats"
    response = requests.get(url, params=parameters, headers=headers)
    #     print(f"sent a history request of chat_id:{chat_id}")
    #     print(f"get response:{response.json()}")
    return response


if __name__ == "__main__":
    page_size = 20
    counts_limit = 10000
    page_token = None
    chat_id_response = send_chat_id_request(page_token, page_size)
    chat_ids = []
    for i in range(0, len(chat_id_response.json().get("data").get("items"))):
        temp_id = chat_id_response.json().get("data").get("items")[i].get("chat_id")
        chat_ids.append(temp_id)
    print(str(chat_ids))
    for chat_id in chat_ids:
        record_chat_history(chat_id, counts_limit, page_size)

1. 说明

1.1 效果

  • 自动拉取机器人所在的群组(暂只拉头20个,可修改代码拉完)
  • 拉取每个群组的历史消息中的文本消息(条数由 counts_limit 限制)
  • 将每个群组的历史消息各自写进一个同目录下的 txt 文件

正常运行状态如下

1.2 准备

需要创建飞书应用,并添加机器人

https://open.feishu.cn/?lang=zh-CN

需要有 读取群信息 im:chat.group_info:readonly 的权限,没有在飞书应用平台上申请

将创建的应用的 app_id 和 app_secret 填入代码

    "app_id": "cli_your_app_id",
    "app_secret": "your_app_secret",

再把机器人添加到群内

运行即可

1.3 逻辑简述

renew_tenant_access_token()

send_chat_id_request()

record_chat_history()

  • 基于 chat_id 去拉历史消息,并写入文件 record_{chat_id}.txt
  • 拉到没有更多消息(has_more 为 False),或者文本信息数量超过 counts_limit ,才会停止

send_history_request()

发表评论