爬虫爬取qq群内成员信息及批量发送邮件

·996 字·5 分钟· loading · loading ·
python 爬虫 qq 邮箱
spoula
作者
spoula

前言
#

注:代码仅供学习,别拿来做别的事哦!

事情起因是好朋友发来一段代码

import smtplib
from email.message import EmailMessage
import pandas as pd
def send_email(remail, rsubject, rcontent):
    email = EmailMessage()                          ## Creating a object for EmailMessage
    email['from'] = 'The Pythoneer Here'            ## Person who is sending
    email['to'] = remail                            ## Whom we are sending
    email['subject'] = rsubject                     ## Subject of email
    email.set_content(rcontent)                     ## content of email
    with smtplib.SMTP(host='smtp.gmail.com',port=587)as smtp:
        smtp.ehlo()                                 ## server object
        smtp.starttls()                             ## used to send data between server and client
        smtp.login("xxxx@gmail.com","") ## login id and password of gmail
        smtp.send_message(email)                    ## Sending email
        print("email send to ",remail)              ## Printing success message
if __name__ == '__main__':
    df = pd.read_excel('list.xlsx')
    length = len(df)+1
    for index, item in df.iterrows():
        email = item[0]
        subject = item[1]
        content = item[2]
        send_email(email,subject,content)

分析了一下,代码大致意思是从list.xlsx表里读取第一列的邮箱,并将第二列的标题和第三列的内容发送给他。

批量发送
#

既然是批量发送,表格里每个邮箱后面都要写邮件标题和内容,大量重复,即使复制粘贴也太麻烦了,于是为何不单独弄个文件夹放邮箱信息呢,然后一个表格文件存发送人呢?

还有个问题就是之前代码用的是gmail邮箱发送,因为gmail是谷歌的,发送时太慢。于是我将发送邮件服务器改成126邮箱,注册个126邮箱小号来使用吧。别忘了打开smtp服务,具体打开方法百度。

import smtplib
from email import encoders
from email.mime.base import MIMEBase
from email.mime.image import MIMEImage
from smtplib import SMTP_SSL
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
import pandas as pd


def send(receiver2):
    try:
        msg = MIMEMultipart()
        msg["Subject"] = Header(mail_title, 'utf-8')
        msg["From"] = sender_qq
        msg["To"] = receiver2
        msg.attach(MIMEText(mail_content, 'html'))
        msg.attach(img)
        msg.attach(txtAnnex)
        smtp = SMTP_SSL(host_server, 465)  # ssl登录连接到邮件服务器
        smtp.set_debuglevel(0)  # 0是关闭,1是开启debug
        smtp.ehlo(host_server)  # 跟服务器打招呼,告诉它我们准备连接,最好加上这行代码
        smtp.login(sender_qq, pwd)
        smtp.sendmail(sender_qq, [receiver2], msg.as_string())
        smtp.quit()
        print("邮件发送成功给", receiver2)
    except smtplib.SMTPException:
        print("无法发送邮件给", receiver2)


host_server = 'smtp.126.com'  # qq邮箱smtp服务器
sender_qq = '邮箱@126.com'  # 发件人邮箱
pwd = '授权码'  # 授权码

# 标题
with open(f"./email/subject.txt", "r", encoding='utf-8') as u:
    mail_title = u.read()

# 邮件正文内容
with open(f"./email/content.txt", "r", encoding='utf-8') as f:
    mail_content = f.read()

# 邮箱图片 无图片可以注释掉
img_file = open(r'./email/kunkun.gif', 'rb')
img_data = img_file.read()
img_file.close()
img = MIMEImage(img_data)
img.add_header('Content-ID', 'tutu')

# 邮箱附件 无附件可以注释掉
txtAnnex = MIMEBase("application", "octet-stream")
txtAnnex.set_payload(open(r"./email/ji.mp3", "rb").read())
txtAnnex['Content-Type'] = 'application/octet-stream'
txtAnnex['Content-Disposition'] = 'attachment; filename= ji.mp3'
encoders.encode_base64(txtAnnex)

# 接收人列表
df = pd.read_excel(f'./qq/email.xlsx')
for index, item in df.iterrows():
    receiver = item[0]
    send(receiver)

这是改后的代码,多了增加发送图片和附件功能,并且发送的邮件并不是一个文本,而是html格式,这样邮件就可以像网页一样精彩,代码中图片和附件如果不发送可以注释掉

大体就是在当前目录下qq文件夹内读取email.xlsx文件内第一列获取接收者邮箱,然后批量将email文件夹内的邮件文件发出去。

爬取qq邮箱
#

批量发送的脚本是有了,但是有个问题,email.xlsx里面的邮箱列表要我手打一个一个输进去,太麻烦了!

为何不写个爬虫脚本直接将一个群里的人邮箱都爬来呢?

说干就干,直接上脚本

import os
import openpyxl
import pandas as pd
import requests
from openpyxl.workbook import Workbook


class Qun:
    def __init__(self, cookie: str, bkn: str) -> None:
        self.cookie = cookie
        self.bkn = bkn

    def get_data(self, data: dict, params='search_group_members') -> dict:
        url = f'https://qun.qq.com/cgi-bin/qun_mgr/{params}'
        headers = {
            'Cookie': self.cookie,
            'Origin': 'https://qun.qq.com',
            'Referer': 'https://qun.qq.com/member.html',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42'
        }
        res = requests.post(url, headers=headers, data=data)
        return res.json()

    def get_count(self, gc: int) -> int:
        '''
        获取某个群的成员数量
        '''
        data = {
            'gc': gc,
            'st': '0',
            'end': '20',
            'sort': '0',
            'bkn': self.bkn
        }
        return self.get_data(data).get('search_count')

    def set_data(self, gc: int, num: int):
        '''
        构造表单数据
        '''
        n = 0
        for st in range(0, num, 20):
            if st != 0:
                n += 1
                st += n
                end = st + 20
            else:
                end = st + 20
            if st >= num:
                break
            elif end > num:
                yield {
                    'gc': gc,
                    'st': st,
                    'end': num,
                    'sort': '0',
                    'bkn': self.bkn
                }
                break
            yield {
                'gc': gc,
                'st': st,
                'end': end,
                'sort': '0',
                'bkn': self.bkn
            }

    def main(self):
        if not os.path.exists('./qq'):
            os.mkdir('./qq')
        num = self.get_count(gc)
        uin_list = []
        for data in self.set_data(gc, num):
            for uins in self.get_data(data).get('mems'):
                print('抓到' + str(uins.get('uin')) + '了,给爷爬!')
                uin_list.append(str(uins.get('uin')) + '@qq.com')
            df = pd.DataFrame(uin_list, columns=['email'])
            filename = f'./qq/{gc}.xlsx'
            df.to_excel(filename, index=False, startcol=0)

# https://qun.qq.com/member.html 请求头cookie
COOKIE = '你的cookie'
# https://qun.qq.com/cgi-bin/qun_mgr/get_group_list 负载参数bkn
BKN = ''
#群号
gc = '群号'
if __name__ == '__main__':
    Qun(COOKIE, BKN).main()

代码主要是获取这个qq号的某个群所有成员的qq号加上@qq.com就是他们的邮箱了,然后存在当前路径/qq文件下对应群号.xlsx文件里

cookie的话在 https://qun.qq.com/member.html 抓个包就能获取,BKN我之前在写这代码测试时是可以直接抓包获取到的,但现在似乎没了,我有去找了找,发现大佬逆向了BKN的算法, 大佬的文章,下面是算法:

def bkn(skey):
    e = skey
    t = 5381
    n = 0
    o = len(e)
    while n < o:
        t += (t << 5) + ord(e[n])
        n += 1
    return t & 2147483647


skey = '从cookie里获取的skey'

print(bkn(skey))

当中的skey的值在cookie中有

这代码运行起来真的很爽

image-20230715142126551

结合
#

下面就是将俩代码结合起来用了

这是主文件

import os
import pandas as pd
import requests
from pi_fa_quan import fa

class Qun:
    def __init__(self, cookie: str, bkn: str) -> None:
        self.cookie = cookie
        self.bkn = bkn

    def get_data(self, data: dict, params='search_group_members') -> dict:
        url = f'https://qun.qq.com/cgi-bin/qun_mgr/{params}'
        headers = {
            'Cookie': self.cookie,
            'Origin': 'https://qun.qq.com',
            'Referer': 'https://qun.qq.com/member.html',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42'
        }
        res = requests.post(url, headers=headers, data=data)
        return res.json()

    def get_count(self, gc: int) -> int:
        '''
        获取某个群的成员数量
        '''
        data = {
            'gc': gc,
            'st': '0',
            'end': '20',
            'sort': '0',
            'bkn': self.bkn
        }
        return self.get_data(data).get('search_count')

    def set_data(self, gc: int, num: int):
        '''
        构造表单数据
        '''
        n = 0
        for st in range(0, num, 20):
            if st != 0:
                n += 1
                st += n
                end = st + 20
            else:
                end = st + 20
            if st >= num:
                break
            elif end > num:
                yield {
                    'gc': gc,
                    'st': st,
                    'end': num,
                    'sort': '0',
                    'bkn': self.bkn
                }
                break
            yield {
                'gc': gc,
                'st': st,
                'end': end,
                'sort': '0',
                'bkn': self.bkn
            }

    def main(self):
        if not os.path.exists('./qq'):
            os.mkdir('./qq')
        num = self.get_count(gc)
        uin_list = []
        for data in self.set_data(gc, num):
            for uins in self.get_data(data).get('mems'):
                print('抓到' + str(uins.get('uin')) + '了,给爷爬!')
                uin_list.append(str(uins.get('uin')) + '@qq.com')
            df = pd.DataFrame(uin_list, columns=['email'])
            filename = f'./qq/{gc}.xlsx'
            df.to_excel(filename, index=False, startcol=0)


# https://qun.qq.com/member.html 请求头cookie
COOKIE = ''
# https://qun.qq.com/cgi-bin/qun_mgr/get_group_list 负载参数bkn
BKN = ''
# 群号
gc = ''
if __name__ == '__main__':
    Qun(COOKIE, BKN).main()
    fa(gc).main()

这是调用的批量转发类:

import smtplib
from email import encoders
from email.mime.base import MIMEBase
from email.mime.image import MIMEImage
from smtplib import SMTP_SSL
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
import pandas as pd


class fa:

    def __init__(self, gc: str) -> None:
        self.gc = gc

    def send(self, receiver2):
        try:
            msg = MIMEMultipart()
            msg["Subject"] = Header(self.mail_title, 'utf-8')
            msg["From"] = self.sender_qq
            msg["To"] = receiver2
            msg.attach(MIMEText(self.mail_content, 'html'))
            msg.attach(self.img)
            msg.attach(self.txtAnnex)
            smtp = SMTP_SSL(self.host_server, 465)  # ssl登录连接到邮件服务器
            smtp.set_debuglevel(0)  # 0是关闭,1是开启debug
            smtp.ehlo(self.host_server)  # 跟服务器打招呼,告诉它我们准备连接,最好加上这行代码
            smtp.login(self.sender_qq, self.pwd)
            smtp.sendmail(self.sender_qq, [receiver2], msg.as_string())
            smtp.quit()
            print("邮件发送成功给", receiver2)
        except smtplib.SMTPException:
            print("无法发送邮件给", receiver2)

    def main(self):

        self.host_server = 'smtp.126.com'  # qq邮箱smtp服务器
        self.sender_qq = ''  # 发件人邮箱
        self.pwd = ''  # 授权码

        # 标题
        with open(f"./email/subject.txt", "r", encoding='utf-8') as u:
            self.mail_title = u.read()

        # 邮件正文内容
        with open(f"./email/content.txt", "r", encoding='utf-8') as f:
            self.mail_content = f.read()

        # 邮箱图片
        img_file = open(r'./email/kunkun.gif', 'rb')
        img_data = img_file.read()
        img_file.close()
        self.img = MIMEImage(img_data)
        self.img.add_header('Content-ID', 'tutu')

        # 邮箱附件
        self.txtAnnex = MIMEBase("application", "octet-stream")
        self.txtAnnex.set_payload(open(r"./email/ji.mp3", "rb").read())
        self.txtAnnex['Content-Type'] = 'application/octet-stream'
        self.txtAnnex['Content-Disposition'] = 'attachment; filename= ji.mp3'
        encoders.encode_base64(self.txtAnnex)

        # 接收人列表
        df = pd.read_excel(f'./qq/{self.gc}.xlsx')
        for index, item in df.iterrows():
            receiver = item[0]
            self.send(receiver)


gc0 = 'email'
if __name__ == '__main__':
    fa(gc0).main()

下载
#

百度云盘 提取码:6666

总结
#

这次这个脚本主要是能批量发送,可以运用到很多场景,比如举办个小型活动,给群里发通知就可以大大节省时间,初学python这次代码主要基于大佬们的代码修改而来,最后再说明下代码仅供学习,不要用于违法的事。