Skip to content

Commit daa57c5

Browse files
author
cuizhongyi
committed
'初始化,添加几个脚本程序'
0 parents  commit daa57c5

4 files changed

Lines changed: 388 additions & 0 deletions

File tree

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
## 1、12306抢票脚本---qiangpiao.py
2+
3+
## 2、段友之家贴吧数据爬取---nhdz.py
4+
5+
## 3、百思不得姐网站图片数据爬取---bsbdj.py

bsbdj.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import requests, bs4, os
2+
3+
print('百思不得姐……')
4+
url = 'http://www.budejie.com/detail-27974418.html'
5+
os.makedirs('bsbdj', exist_ok=True)
6+
statusValue = True
7+
while statusValue:
8+
# 下载网页
9+
print('Downloading page %s...' % url)
10+
headers = {
11+
'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
12+
}
13+
result = requests.get(url, headers=headers)
14+
soup = bs4.BeautifulSoup(result.text, "html.parser")
15+
# 查找图像
16+
comicElem = soup.select('.j-r-list-c-img img')
17+
if comicElem == []:
18+
print('Could not find comic image')
19+
break
20+
else:
21+
# 下载图像
22+
comicUrl = comicElem[0].get('src')
23+
print('Downloading image %s...' % (comicUrl))
24+
res = requests.get(comicUrl)
25+
res.raise_for_status()
26+
imageFile = open(os.path.join('bsbdj', os.path.basename(comicUrl)), 'wb')
27+
for chunk in res.iter_content(100000):
28+
imageFile.write(chunk)
29+
imageFile.close()
30+
nextLink = soup.select('.c-next-btn-content .c-next-btn')[0]
31+
url = 'http://www.budejie.com' + nextLink.get('href')
32+
# 爬图结束
33+
print('Done...')

nhdz.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
"""
5+
爬取百度贴吧,段友之家的图片和视频
6+
author: cuizy
7+
time:2018-05-19
8+
"""
9+
10+
import requests
11+
import bs4
12+
import os
13+
14+
15+
def write_file(file_url, file_type):
16+
"""写入文件"""
17+
res = requests.get(file_url)
18+
res.raise_for_status()
19+
# 文件类型分文件夹写入
20+
if file_type == 1:
21+
file_folder = 'nhdz\\jpg'
22+
elif file_type == 2:
23+
file_folder = 'nhdz\\mp4'
24+
else:
25+
file_folder = 'nhdz\\other'
26+
folder = os.path.exists(file_folder)
27+
# 文件夹不存在,则创建文件夹
28+
if not folder:
29+
os.makedirs(file_folder)
30+
# 打开文件资源,并写入
31+
file_name = os.path.basename(file_url)
32+
str_index = file_name.find('?')
33+
if str_index > 0:
34+
file_name = file_name[:str_index]
35+
file_path = os.path.join(file_folder, file_name)
36+
print('正在写入资源文件:', file_path)
37+
image_file = open(file_path, 'wb')
38+
for chunk in res.iter_content(100000):
39+
image_file.write(chunk)
40+
image_file.close()
41+
print('写入完成!')
42+
43+
44+
def download_file(web_url):
45+
"""获取资源的url"""
46+
# 下载网页
47+
print('正在下载网页: %s...' % web_url)
48+
result = requests.get(web_url)
49+
soup = bs4.BeautifulSoup(result.text, "html.parser")
50+
# 查找图片资源
51+
img_list = soup.select('.vpic_wrap img')
52+
if img_list == []:
53+
print('未发现图片资源!')
54+
else:
55+
# 找到资源,开始写入
56+
for img_info in img_list:
57+
file_url = img_info.get('bpic')
58+
write_file(file_url, 1)
59+
# 查找视频资源
60+
video_list = soup.select('.threadlist_video a')
61+
if video_list == []:
62+
print('未发现视频资源!')
63+
else:
64+
# 找到资源,开始写入
65+
for video_info in video_list:
66+
file_url = video_info.get('data-video')
67+
write_file(file_url, 2)
68+
print('下载资源结束:', web_url)
69+
next_link = soup.select('#frs_list_pager .next')
70+
if next_link == []:
71+
print('下载资料结束!')
72+
else:
73+
url = next_link[0].get('href')
74+
download_file('https:' + url)
75+
76+
77+
# 主程序入口
78+
if __name__ == '__main__':
79+
web_url = 'https://tieba.baidu.com/f?ie=utf-8&kw=段友之家'
80+
download_file(web_url)

qiangpiao.py

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
"""
5+
通过splinter刷12306火车票
6+
可以自动填充账号密码,同时,在登录时,也可以修改账号密码
7+
然后手动识别验证码,并登陆,接下来的事情,交由脚本来做了,静静的等待抢票结果就好(刷票过程中,浏览器不可关闭)
8+
author: cuizy
9+
time: 2018-05-30
10+
"""
11+
12+
import re
13+
from splinter.browser import Browser
14+
from time import sleep
15+
import sys
16+
import httplib2
17+
from urllib import parse
18+
import smtplib
19+
from email.mime.text import MIMEText
20+
21+
22+
class BrushTicket(object):
23+
"""买票类及实现方法"""
24+
25+
def __init__(self, user_name, password, passengers, from_time, from_station, to_station, number, seat_type, receiver_mobile, receiver_email):
26+
"""定义实例属性,初始化"""
27+
# 1206账号密码
28+
self.user_name = user_name
29+
self.password = password
30+
# 乘客姓名
31+
self.passengers = passengers
32+
# 起始站和终点站
33+
self.from_station = from_station
34+
self.to_station = to_station
35+
# 乘车日期
36+
self.from_time = from_time
37+
# 车次编号
38+
self.number = number.capitalize()
39+
# 座位类型所在td位置
40+
if seat_type == '商务座特等座':
41+
seat_type_index = 1
42+
seat_type_value = 9
43+
elif seat_type == '一等座':
44+
seat_type_index = 2
45+
seat_type_value = 'M'
46+
elif seat_type == '二等座':
47+
seat_type_index = 3
48+
seat_type_value = 0
49+
elif seat_type == '高级软卧':
50+
seat_type_index = 4
51+
seat_type_value = 6
52+
elif seat_type == '软卧':
53+
seat_type_index = 5
54+
seat_type_value = 4
55+
elif seat_type == '动卧':
56+
seat_type_index = 6
57+
seat_type_value = 'F'
58+
elif seat_type == '硬卧':
59+
seat_type_index = 7
60+
seat_type_value = 3
61+
elif seat_type == '软座':
62+
seat_type_index = 8
63+
seat_type_value = 2
64+
elif seat_type == '硬座':
65+
seat_type_index = 9
66+
seat_type_value = 1
67+
elif seat_type == '无座':
68+
seat_type_index = 10
69+
seat_type_value = 1
70+
elif seat_type == '其他':
71+
seat_type_index = 11
72+
seat_type_value = 1
73+
else:
74+
seat_type_index = 7
75+
seat_type_value = 3
76+
self.seat_type_index = seat_type_index
77+
self.seat_type_value = seat_type_value
78+
# 通知信息
79+
self.receiver_mobile = receiver_mobile
80+
self.receiver_email = receiver_email
81+
# 主要页面网址
82+
self.login_url = 'https://kyfw.12306.cn/otn/login/init'
83+
self.init_my_url = 'https://kyfw.12306.cn/otn/index/initMy12306'
84+
self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init'
85+
# 浏览器驱动信息,驱动下载页:https://sites.google.com/a/chromium.org/chromedriver/downloads
86+
self.driver_name = 'chrome'
87+
self.executable_path = 'C:\\Users\cuizy\AppData\Local\Programs\Python\Python36\Scripts\chromedriver.exe'
88+
89+
def do_login(self):
90+
"""登录功能实现,手动识别验证码进行登录"""
91+
self.driver.visit(self.login_url)
92+
sleep(1)
93+
self.driver.fill('loginUserDTO.user_name', self.user_name)
94+
self.driver.fill('userDTO.password', self.password)
95+
print('请输入验证码……')
96+
while True:
97+
if self.driver.url != self.init_my_url:
98+
sleep(1)
99+
else:
100+
break
101+
102+
def start_brush(self):
103+
"""买票功能实现"""
104+
self.driver = Browser(driver_name=self.driver_name, executable_path=self.executable_path)
105+
# 浏览器窗口的大小
106+
self.driver.driver.set_window_size(900, 700)
107+
self.do_login()
108+
self.driver.visit(self.ticket_url)
109+
try:
110+
print('开始刷票……')
111+
# 加载车票查询信息
112+
self.driver.cookies.add({"_jc_save_fromStation": self.from_station})
113+
self.driver.cookies.add({"_jc_save_toStation": self.to_station})
114+
self.driver.cookies.add({"_jc_save_fromDate": self.from_time})
115+
self.driver.reload()
116+
count = 0
117+
while self.driver.url.split('?')[0] == self.ticket_url:
118+
self.driver.find_by_text('查询').click()
119+
sleep(1)
120+
count += 1
121+
print('第%d次点击查询……' % count)
122+
try:
123+
car_no_location = self.driver.find_by_id("queryLeftTable")[0].find_by_text(self.number)[1]
124+
current_tr = car_no_location.find_by_xpath("./../../../../..")
125+
if current_tr.find_by_tag('td')[self.seat_type_index].text == '--':
126+
print('无此座位类型出售,已结束当前刷票,请重新开启!')
127+
sys.exit(1)
128+
elif current_tr.find_by_tag('td')[self.seat_type_index].text == '无':
129+
print('无票,继续尝试……')
130+
else:
131+
# 有票,尝试预订
132+
print('刷到票了(余票数:' + str(current_tr.find_by_tag('td')[self.seat_type_index].text) + '),开始尝试预订……')
133+
current_tr.find_by_css('td.no-br>a')[0].click()
134+
sleep(1)
135+
key_value = 1
136+
for p in self.passengers:
137+
# 选择用户
138+
print('开始选择用户……')
139+
self.driver.find_by_text(p).last.click()
140+
# 选择座位类型
141+
print('开始选择席别……')
142+
if self.seat_type_value != 0:
143+
seat_select = self.driver.find_by_id("seatType_" + str(key_value))[0]
144+
seat_select.find_by_xpath("//option[@value='" + str(self.seat_type_value) + "']")[0].click()
145+
key_value += 1
146+
sleep(0.5)
147+
if p[-1] == ')':
148+
self.driver.find_by_id('dialog_xsertcj_ok').click()
149+
print('正在提交订单……')
150+
self.driver.find_by_id('submitOrder_id').click()
151+
sleep(2)
152+
# 查看放回结果是否正常
153+
submit_false_info = self.driver.find_by_id('orderResultInfo_id')[0].text
154+
if submit_false_info != '':
155+
print(submit_false_info)
156+
self.driver.find_by_id('qr_closeTranforDialog_id').click()
157+
sleep(0.2)
158+
self.driver.find_by_id('preStep_id').click()
159+
sleep(0.3)
160+
continue
161+
print('正在确认订单……')
162+
self.driver.find_by_id('qr_submit_id').click()
163+
print('预订成功,请及时前往支付……')
164+
# 发送通知信息
165+
self.send_mail(self.receiver_email, '恭喜您,抢到票了,请及时前往12306支付订单!')
166+
self.send_sms(self.receiver_mobile, '您的验证码是:8888。请不要把验证码泄露给其他人。')
167+
except Exception as error_info:
168+
print(error_info)
169+
except Exception as error_info:
170+
print(error_info)
171+
172+
def send_sms(self, mobile, sms_info):
173+
"""发送手机通知短信,用的是-互亿无线-的测试短信"""
174+
host = "106.ihuyi.com"
175+
sms_send_uri = "/webservice/sms.php?method=Submit"
176+
account = "C59782899"
177+
pass_word = "19d4d9c0796532c7328e8b82e2812655"
178+
params = parse.urlencode(
179+
{'account': account, 'password': pass_word, 'content': sms_info, 'mobile': mobile, 'format': 'json'}
180+
)
181+
headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"}
182+
conn = httplib2.HTTPConnectionWithTimeout(host, port=80, timeout=30)
183+
conn.request("POST", sms_send_uri, params, headers)
184+
response = conn.getresponse()
185+
response_str = response.read()
186+
conn.close()
187+
return response_str
188+
189+
def send_mail(self, receiver_address, content):
190+
"""发送邮件通知"""
191+
# 连接邮箱服务器信息
192+
host = 'smtp.163.com'
193+
port = 25
194+
sender = 'gxcuizy@163.com' # 你的发件邮箱号码
195+
pwd = 'CUIzy9118' # 不是登陆密码,是客户端授权密码
196+
# 发件信息
197+
receiver = receiver_address
198+
body = '<h2>温馨提醒:</h2><p>' + content + '</p>'
199+
msg = MIMEText(body, 'html', _charset="utf-8")
200+
msg['subject'] = '抢票成功通知!'
201+
msg['from'] = sender
202+
msg['to'] = receiver
203+
s = smtplib.SMTP(host, port)
204+
# 开始登陆邮箱,并发送邮件
205+
s.login(sender, pwd)
206+
s.sendmail(sender, receiver, msg.as_string())
207+
208+
209+
if __name__ == '__main__':
210+
# 12306用户名
211+
user_name = input('请输入12306用户名:')
212+
while user_name == '':
213+
user_name = input('12306用户名不能为空,请重新输入:')
214+
# 12306登陆密码
215+
password = input('请输入12306登陆密码:')
216+
while password == '':
217+
password = input('12306登陆密码不能为空,请重新输入:')
218+
# 乘客姓名
219+
passengers_input = input('请输入乘车人姓名,多人用英文逗号“,”连接,(例如单人“张三”或者多人“张三,李四”):')
220+
passengers = passengers_input.split(",")
221+
while passengers_input == '' or len(passengers) > 4:
222+
print('乘车人最少1位,最多4位!')
223+
passengers_input = input('请重新输入乘车人姓名,多人用英文逗号“,”连接,(例如单人“张三”或者多人“张三,李四”):')
224+
passengers = passengers_input.split(",")
225+
# 乘车日期
226+
from_time = input('请输入乘车日期(例如“2018-08-08”):')
227+
date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}$')
228+
while from_time == '' or re.findall(date_pattern, from_time) == []:
229+
from_time = input('乘车日期不能为空或者时间格式不正确,请重新输入:')
230+
# 城市cookie字典
231+
city_list = {
232+
'bj': '%u5317%u4EAC%2CBJP', # 北京
233+
'hd': '%u5929%u6D25%2CTJP', # 邯郸
234+
'nn': '%u5357%u5B81%2CNNZ', # 南宁
235+
'wh': '%u6B66%u6C49%2CWHN', # 武汉
236+
'cs': '%u957F%u6C99%2CCSQ', # 长沙
237+
'ty': '%u592A%u539F%2CTYV', # 太原
238+
'yc': '%u8FD0%u57CE%2CYNV', # 运城
239+
'gzn': '%u5E7F%u5DDE%u5357%2CIZQ', # 广州南
240+
'wzn': '%u68A7%u5DDE%u5357%2CWBZ', # 梧州南
241+
}
242+
# 出发站
243+
from_input = input('请输入出发站,只需要输入首字母就行(例如北京“bj”):')
244+
while from_input not in city_list.keys():
245+
from_input = input('出发站不能为空或不支持当前出发站(如有需要,请联系管理员!),请重新输入:')
246+
from_station = city_list[from_input]
247+
# 终点站
248+
to_input = input('请输入终点站,只需要输入首字母就行(例如北京“bj”):')
249+
while to_input not in city_list.keys():
250+
to_input = input('终点站不能为空或不支持当前终点站(如有需要,请联系管理员!),请重新输入:')
251+
to_station = city_list[to_input]
252+
# 车次编号
253+
number = input('请输入车次号(例如“G110”):')
254+
while number == '':
255+
number = input('车次号不能为空,请重新输入:')
256+
# 座位类型
257+
seat_type = input('请输入座位类型(例如“软卧”):')
258+
while seat_type == '':
259+
seat_type = input('座位类型不能为空,请重新输入:')
260+
# 抢票成功,通知该手机号码
261+
receiver_mobile = input('请预留一个手机号码,方便抢到票后进行通知(例如:18888888888):')
262+
mobile_pattern = re.compile(r'^1{1}\d{10}$')
263+
while receiver_mobile == '' or re.findall(mobile_pattern, receiver_mobile) == []:
264+
receiver_mobile = input('预留手机号码不能为空或者格式不正确,请重新输入:')
265+
receiver_email = input('请预留一个邮箱,方便抢到票后进行通知(例如:test@163.com):')
266+
while receiver_email == '':
267+
receiver_email = input('预留邮箱不能为空,请重新输入:')
268+
# 开始抢票
269+
ticket = BrushTicket(user_name, password, passengers, from_time, from_station, to_station, number, seat_type, receiver_mobile, receiver_email)
270+
ticket.start_brush()

0 commit comments

Comments
 (0)