Python爬虫自动化抓取违章信息

#coding:utf-8
#import scrapy
from selenium import webdriver
from selenium.webdriver.support.select import Select
import time
import datetime
driver = webdriver.Firefox()
xnyNos = []
class AH122(object):
def login(self,num,pwd):
driver.get(‘https://ah.122.gov.cn/m/login?t=2′)
print(“打开登录页面,请输入密码并登录”)
time.sleep(60)

def fetch_XnyNos(self):
driver.get(“https://ah.122.gov.cn/views/memfyy/”)
print(“机动车管理”)
time.sleep(4)
sel = driver.find_element_by_xpath(“//select[@class=’switch’]”)
Select(sel).select_by_value(‘3′)
print(“选择非营运”)
time.sleep(4)
driver.find_element_by_xpath(“//li[@id=’sidebar_menu_7′]/a”).click()
time.sleep(4)
print(“选择小型新能源”)
sel = driver.find_element_by_id(“hpzl”)
Select(sel).select_by_value(’52’)
time.sleep(4)
print(“点击查询”)
driver.find_element_by_id(“btnSearch”).click()
time.sleep(4)

while True :
table_tr_list = driver.find_elements_by_xpath(“//table[@id=’my-moto-list’]//tr”)
for tr in table_tr_list:
tds = (tr.text).split(” “)
if (“违法未处理”== tds[0]) :
if tds[1] not in xnyNos :
xnyNos.append(tds[1])
print(xnyNos)

next_p = driver.find_element_by_link_text(“下一页”)
pageNo = next_p.get_attribute(“data-page”)
print(“============下一页================”)
print(pageNo)

if (pageNo == ‘0’) :
break
# if (pageNo == ‘4’) :
# break
next_p.click()
print(“============进入下一页================”)
time.sleep(10)

def fetch_XnyData(self):
driver.get(‘https://ah.122.gov.cn/views/memfyy/violation.html’)
print(“**********************************”)
print(“****** 请手动选定开始时间 *******”)
print(“**********************************”)
time.sleep(30)
sel = driver.find_element_by_id(“hpzl”)
Select(sel).select_by_value(’52’)

output = open(‘新能源.xls’, ‘w’, encoding=’gbk’)

for veh in xnyNos :
driver.find_element_by_id(“hphm”).clear()
driver.find_element_by_id(“hphm”).send_keys(veh[1:])
driver.find_element_by_css_selector(“.btn-primary”).click()
time.sleep(3)
trs = driver.find_elements_by_xpath(“//table[@id=’my-msg-list’]//tr”)
for tr in trs:
if (“车牌号码” in tr.text ):
continue
tds = tr.find_elements_by_tag_name(“td”)

if (len(tds) == 0) :
continue
if (len(tds) > 4) :
for i in range(7) :
output.write(tds[i].text)
output.write(‘\t’)
output.write(‘\n’)

# next_p = driver.find_element_by_link_text(“下一页”)
# if (next_p.get_attribute(“class”) == ‘disabled’) :
# break
# next_p.click()
# time.sleep(10)
output.close()

print(“======================== 使用说明 ==========================”)
print(“==== 打开登录页面后,手动输入用户名、密码、验证码,点击登录 ======”)
print(“===== 自动抓取违章车辆&违章数据 =========”)
print(“===== 自动抓取新能源违章车辆&违章数据 =========”)
print(“===========================================================”)
print(“===========================================================”)

ah122=AH122()
ah122.login(‘用户名’,’密码’)
ah122.fetch_XnyNos()
ah122.fetch_XnyData()

  1. Hello! I could have sworn I’ve been to this blog before but after browsing through some of the post I realized it’s new to me. Anyways, I’m definitely happy I found it and I’ll be book-marking and checking back frequently!

  2. Hello! I could have sworn I’ve been to this blog before but after browsing through some of the post I realized it’s new to me. Anyways, I’m definitely happy I found it and I’ll be book-marking and checking back frequently!

  3. Hello! I could have sworn I’ve been to this blog before but after browsing through some of the post I realized it’s new to me. Anyways, I’m definitely happy I found it and I’ll be book-marking and checking back frequently!

  4. Hello! I could have sworn I’ve been to this blog before but after browsing through some of the post I realized it’s new to me. Anyways, I’m definitely happy I found it and I’ll be book-marking and checking back frequently!

  5. Hello! I could have sworn I’ve been to this blog before but after browsing through some of the post I realized it’s new to me. Anyways, I’m definitely happy I found it and I’ll be book-marking and checking back frequently!

Leave a Reply

电子邮件地址不会被公开。 必填项已用*标注