爬虫爬取qq群内成员信息及批量发送邮件
前言 #
注:代码仅供学习,别拿来做别的事哦!
事情起因是好朋友发来一段代码
import smtplib
from email.message import EmailMessage
import pandas as pd
def send_email(remail, rsubject, rcontent):
email = EmailMessage() ## Creating a object for EmailMessage
email['from'] = 'The Pythoneer Here' ## Person who is sending
email['to'] = remail ## Whom we are sending
email['subject'] = rsubject ## Subject of email
email.set_content(rcontent) ## content of email
with smtplib.SMTP(host='smtp.gmail.com',port=587)as smtp:
smtp.ehlo() ## server object
smtp.starttls() ## used to send data between server and client
smtp.login("xxxx@gmail.com","") ## login id and password of gmail
smtp.send_message(email) ## Sending email
print("email send to ",remail) ## Printing success message
if __name__ == '__main__':
df = pd.read_excel('list.xlsx')
length = len(df)+1
for index, item in df.iterrows():
email = item[0]
subject = item[1]
content = item[2]
send_email(email,subject,content)
分析了一下,代码大致意思是从list.xlsx表里读取第一列的邮箱,并将第二列的标题和第三列的内容发送给他。
批量发送 #
既然是批量发送,表格里每个邮箱后面都要写邮件标题和内容,大量重复,即使复制粘贴也太麻烦了,于是为何不单独弄个文件夹放邮箱信息呢,然后一个表格文件存发送人呢?
还有个问题就是之前代码用的是gmail邮箱发送,因为gmail是谷歌的,发送时太慢。于是我将发送邮件服务器改成126邮箱,注册个126邮箱小号来使用吧。别忘了打开smtp服务,具体打开方法百度。
import smtplib
from email import encoders
from email.mime.base import MIMEBase
from email.mime.image import MIMEImage
from smtplib import SMTP_SSL
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
import pandas as pd
def send(receiver2):
try:
msg = MIMEMultipart()
msg["Subject"] = Header(mail_title, 'utf-8')
msg["From"] = sender_qq
msg["To"] = receiver2
msg.attach(MIMEText(mail_content, 'html'))
msg.attach(img)
msg.attach(txtAnnex)
smtp = SMTP_SSL(host_server, 465) # ssl登录连接到邮件服务器
smtp.set_debuglevel(0) # 0是关闭,1是开启debug
smtp.ehlo(host_server) # 跟服务器打招呼,告诉它我们准备连接,最好加上这行代码
smtp.login(sender_qq, pwd)
smtp.sendmail(sender_qq, [receiver2], msg.as_string())
smtp.quit()
print("邮件发送成功给", receiver2)
except smtplib.SMTPException:
print("无法发送邮件给", receiver2)
host_server = 'smtp.126.com' # qq邮箱smtp服务器
sender_qq = '邮箱@126.com' # 发件人邮箱
pwd = '授权码' # 授权码
# 标题
with open(f"./email/subject.txt", "r", encoding='utf-8') as u:
mail_title = u.read()
# 邮件正文内容
with open(f"./email/content.txt", "r", encoding='utf-8') as f:
mail_content = f.read()
# 邮箱图片 无图片可以注释掉
img_file = open(r'./email/kunkun.gif', 'rb')
img_data = img_file.read()
img_file.close()
img = MIMEImage(img_data)
img.add_header('Content-ID', 'tutu')
# 邮箱附件 无附件可以注释掉
txtAnnex = MIMEBase("application", "octet-stream")
txtAnnex.set_payload(open(r"./email/ji.mp3", "rb").read())
txtAnnex['Content-Type'] = 'application/octet-stream'
txtAnnex['Content-Disposition'] = 'attachment; filename= ji.mp3'
encoders.encode_base64(txtAnnex)
# 接收人列表
df = pd.read_excel(f'./qq/email.xlsx')
for index, item in df.iterrows():
receiver = item[0]
send(receiver)
这是改后的代码,多了增加发送图片和附件功能,并且发送的邮件并不是一个文本,而是html格式,这样邮件就可以像网页一样精彩,代码中图片和附件如果不发送可以注释掉
大体就是在当前目录下qq文件夹内读取email.xlsx文件内第一列获取接收者邮箱,然后批量将email文件夹内的邮件文件发出去。
爬取qq邮箱 #
批量发送的脚本是有了,但是有个问题,email.xlsx里面的邮箱列表要我手打一个一个输进去,太麻烦了!
为何不写个爬虫脚本直接将一个群里的人邮箱都爬来呢?
说干就干,直接上脚本
import os
import openpyxl
import pandas as pd
import requests
from openpyxl.workbook import Workbook
class Qun:
def __init__(self, cookie: str, bkn: str) -> None:
self.cookie = cookie
self.bkn = bkn
def get_data(self, data: dict, params='search_group_members') -> dict:
url = f'https://qun.qq.com/cgi-bin/qun_mgr/{params}'
headers = {
'Cookie': self.cookie,
'Origin': 'https://qun.qq.com',
'Referer': 'https://qun.qq.com/member.html',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42'
}
res = requests.post(url, headers=headers, data=data)
return res.json()
def get_count(self, gc: int) -> int:
'''
获取某个群的成员数量
'''
data = {
'gc': gc,
'st': '0',
'end': '20',
'sort': '0',
'bkn': self.bkn
}
return self.get_data(data).get('search_count')
def set_data(self, gc: int, num: int):
'''
构造表单数据
'''
n = 0
for st in range(0, num, 20):
if st != 0:
n += 1
st += n
end = st + 20
else:
end = st + 20
if st >= num:
break
elif end > num:
yield {
'gc': gc,
'st': st,
'end': num,
'sort': '0',
'bkn': self.bkn
}
break
yield {
'gc': gc,
'st': st,
'end': end,
'sort': '0',
'bkn': self.bkn
}
def main(self):
if not os.path.exists('./qq'):
os.mkdir('./qq')
num = self.get_count(gc)
uin_list = []
for data in self.set_data(gc, num):
for uins in self.get_data(data).get('mems'):
print('抓到' + str(uins.get('uin')) + '了,给爷爬!')
uin_list.append(str(uins.get('uin')) + '@qq.com')
df = pd.DataFrame(uin_list, columns=['email'])
filename = f'./qq/{gc}.xlsx'
df.to_excel(filename, index=False, startcol=0)
# https://qun.qq.com/member.html 请求头cookie
COOKIE = '你的cookie'
# https://qun.qq.com/cgi-bin/qun_mgr/get_group_list 负载参数bkn
BKN = ''
#群号
gc = '群号'
if __name__ == '__main__':
Qun(COOKIE, BKN).main()
代码主要是获取这个qq号的某个群所有成员的qq号加上@qq.com就是他们的邮箱了,然后存在当前路径/qq文件下对应群号.xlsx文件里
cookie的话在 https://qun.qq.com/member.html 抓个包就能获取,BKN我之前在写这代码测试时是可以直接抓包获取到的,但现在似乎没了,我有去找了找,发现大佬逆向了BKN的算法, 大佬的文章,下面是算法:
def bkn(skey):
e = skey
t = 5381
n = 0
o = len(e)
while n < o:
t += (t << 5) + ord(e[n])
n += 1
return t & 2147483647
skey = '从cookie里获取的skey'
print(bkn(skey))
当中的skey的值在cookie中有
这代码运行起来真的很爽
结合 #
下面就是将俩代码结合起来用了
这是主文件
import os
import pandas as pd
import requests
from pi_fa_quan import fa
class Qun:
def __init__(self, cookie: str, bkn: str) -> None:
self.cookie = cookie
self.bkn = bkn
def get_data(self, data: dict, params='search_group_members') -> dict:
url = f'https://qun.qq.com/cgi-bin/qun_mgr/{params}'
headers = {
'Cookie': self.cookie,
'Origin': 'https://qun.qq.com',
'Referer': 'https://qun.qq.com/member.html',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42'
}
res = requests.post(url, headers=headers, data=data)
return res.json()
def get_count(self, gc: int) -> int:
'''
获取某个群的成员数量
'''
data = {
'gc': gc,
'st': '0',
'end': '20',
'sort': '0',
'bkn': self.bkn
}
return self.get_data(data).get('search_count')
def set_data(self, gc: int, num: int):
'''
构造表单数据
'''
n = 0
for st in range(0, num, 20):
if st != 0:
n += 1
st += n
end = st + 20
else:
end = st + 20
if st >= num:
break
elif end > num:
yield {
'gc': gc,
'st': st,
'end': num,
'sort': '0',
'bkn': self.bkn
}
break
yield {
'gc': gc,
'st': st,
'end': end,
'sort': '0',
'bkn': self.bkn
}
def main(self):
if not os.path.exists('./qq'):
os.mkdir('./qq')
num = self.get_count(gc)
uin_list = []
for data in self.set_data(gc, num):
for uins in self.get_data(data).get('mems'):
print('抓到' + str(uins.get('uin')) + '了,给爷爬!')
uin_list.append(str(uins.get('uin')) + '@qq.com')
df = pd.DataFrame(uin_list, columns=['email'])
filename = f'./qq/{gc}.xlsx'
df.to_excel(filename, index=False, startcol=0)
# https://qun.qq.com/member.html 请求头cookie
COOKIE = ''
# https://qun.qq.com/cgi-bin/qun_mgr/get_group_list 负载参数bkn
BKN = ''
# 群号
gc = ''
if __name__ == '__main__':
Qun(COOKIE, BKN).main()
fa(gc).main()
这是调用的批量转发类:
import smtplib
from email import encoders
from email.mime.base import MIMEBase
from email.mime.image import MIMEImage
from smtplib import SMTP_SSL
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
import pandas as pd
class fa:
def __init__(self, gc: str) -> None:
self.gc = gc
def send(self, receiver2):
try:
msg = MIMEMultipart()
msg["Subject"] = Header(self.mail_title, 'utf-8')
msg["From"] = self.sender_qq
msg["To"] = receiver2
msg.attach(MIMEText(self.mail_content, 'html'))
msg.attach(self.img)
msg.attach(self.txtAnnex)
smtp = SMTP_SSL(self.host_server, 465) # ssl登录连接到邮件服务器
smtp.set_debuglevel(0) # 0是关闭,1是开启debug
smtp.ehlo(self.host_server) # 跟服务器打招呼,告诉它我们准备连接,最好加上这行代码
smtp.login(self.sender_qq, self.pwd)
smtp.sendmail(self.sender_qq, [receiver2], msg.as_string())
smtp.quit()
print("邮件发送成功给", receiver2)
except smtplib.SMTPException:
print("无法发送邮件给", receiver2)
def main(self):
self.host_server = 'smtp.126.com' # qq邮箱smtp服务器
self.sender_qq = '' # 发件人邮箱
self.pwd = '' # 授权码
# 标题
with open(f"./email/subject.txt", "r", encoding='utf-8') as u:
self.mail_title = u.read()
# 邮件正文内容
with open(f"./email/content.txt", "r", encoding='utf-8') as f:
self.mail_content = f.read()
# 邮箱图片
img_file = open(r'./email/kunkun.gif', 'rb')
img_data = img_file.read()
img_file.close()
self.img = MIMEImage(img_data)
self.img.add_header('Content-ID', 'tutu')
# 邮箱附件
self.txtAnnex = MIMEBase("application", "octet-stream")
self.txtAnnex.set_payload(open(r"./email/ji.mp3", "rb").read())
self.txtAnnex['Content-Type'] = 'application/octet-stream'
self.txtAnnex['Content-Disposition'] = 'attachment; filename= ji.mp3'
encoders.encode_base64(self.txtAnnex)
# 接收人列表
df = pd.read_excel(f'./qq/{self.gc}.xlsx')
for index, item in df.iterrows():
receiver = item[0]
self.send(receiver)
gc0 = 'email'
if __name__ == '__main__':
fa(gc0).main()
下载 #
百度云盘 提取码:6666
总结 #
这次这个脚本主要是能批量发送,可以运用到很多场景,比如举办个小型活动,给群里发通知就可以大大节省时间,初学python这次代码主要基于大佬们的代码修改而来,最后再说明下代码仅供学习,不要用于违法的事。