博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Python爬虫学习笔记之点触验证码的识别
阅读量:6710 次
发布时间:2019-06-25

本文共 6821 字,大约阅读时间需要 22 分钟。

代码:

Chaojiying.py:

 

1 #!/usr/bin/env python 2 # coding:utf-8 3  4 import requests 5 from hashlib import md5 6  7  8 class Chaojiying(object): 9 10     def __init__(self, username, password, soft_id):11         self.username = username12         self.password = md5(password.encode('utf-8')).hexdigest()13         self.soft_id = soft_id14         self.base_params = {15             'user': self.username,16             'pass2': self.password,17             'softid': self.soft_id,18         }19         self.headers = {20             'Connection': 'Keep-Alive',21             'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',22         }23 24     def post_pic(self, im, codetype):25         """26         im: 图片字节27         codetype: 题目类型 参考 http://www.chaojiying.com/price.html28         """29         params = {30             'codetype': codetype,31         }32         params.update(self.base_params)33         files = {
'userfile': ('ccc.jpg', im)}34 r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)35 return r.json()36 37 def report_error(self, im_id):38 """39 im_id:报错题目的图片ID40 """41 params = {42 'id': im_id,43 }44 params.update(self.base_params)45 r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)46 return r.json()

 

 

test.py:

1 import time  2 from io import BytesIO  3 from PIL import Image  4 from selenium import webdriver  5 from selenium.webdriver import ActionChains  6 from selenium.webdriver.common.by import By  7 from selenium.webdriver.support.ui import WebDriverWait  8 from selenium.webdriver.support import expected_conditions as EC  9 from Chaojiying import Chaojiying 10  11 EMAIL = '1549687918@qq.com' 12 PASSWORD = 'zhaoxueche110' 13  14 CHAOJIYING_USERNAME = 'Azure00' 15 CHAOJIYING_PASSWORD = 'zhaoxueche110' 16 CHAOJIYING_SOFT_ID = '    897082' 17 CHAOJIYING_KIND = '9102' 18  19 class CrackTouClick(): 20     def __init__(self): 21         self.url = 'http://admin.touclick.com/login.html' 22         self.browser = webdriver.Chrome() 23         self.wait = WebDriverWait(self.browser, 20) 24         self.email = EMAIL 25         self.password = PASSWORD 26         self.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID) 27  28     def __del__(self): 29         self.browser.close() 30  31     def open(self): 32         """ 33         打开网页输入用户名密码 34         :return: None 35         """ 36         self.browser.get(self.url) 37         email = self.wait.until(EC.presence_of_element_located((By.ID, 'email'))) 38         password = self.wait.until(EC.presence_of_element_located((By.ID, 'password'))) 39         email.send_keys(self.email) 40         password.send_keys(self.password) 41  42     def get_touchclick_button(self): 43         """ 44         获取初始验证按钮 45         :return: 46         """ 47         button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'touclick-hod-wrap'))) 48         return button 49  50     def get_touch_element(self): 51         """ 52         获取验证图片对象 53         :return: 图片对象 54         """ 55         element = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'touclick-pub-content'))) 56         return element 57  58     def get_position(self): 59         """ 60         获取验证码位置 61         :return: 验证码位置元组 62         """ 63         element = self.get_touch_element() 64         time.sleep(2) 65         location = element.location 66         size = element.size 67         top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size['width'] 68         return (top, bottom, left, right) 69  70     def get_screenshot(self): 71         """ 72         获取网页截图 73         :return: 截图对象 74         """ 75         screenshot = self.browser.get_screenshot_as_png() 76         screenshot = Image.open(BytesIO(screenshot)) 77         return screenshot 78  79     def get_touch_click_image(self, name= 'captcha.png'): 80         """ 81         获取验证码图片 82         :param name:图片对象 83         :return: 84         """ 85         top, bottom, left, right = self.get_position() 86         print('验证码位置', top, bottom, left, right) 87         screenshot = self.get_screenshot() 88         captcha = screenshot.crop((left, top, right, bottom)) 89         captcha.save(name) 90         return captcha 91  92     def get_points(self, captcha_result): 93         """ 94         解析识别结果 95         :param captcha_result:识别结果 96         :return: 转化后的结果 97         """ 98         groups = captcha_result.get('pic_str').split('|') 99         locations = [[int(number) for number in group.split(',')] for group in groups]100         return locations101 102     def touch_click_words(self, locations):103         """104         点击验证图片105         :param locations:点击位置106         :return: None107         """108         for location in locations:109             print(location)110             ActionChains(self.browser).move_to_element_with_offset(self.get_touclick_element(), location[0], location[1]). click().perform()111             time.sleep(1)112 113     def touch_click_verify(self):114         """115         点击验证按钮116         :return: None117         """118         button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'touclick-pub-submit')))119         button.click()120 121     def login(self):122         """123         登陆124         :return:None125         """126         submit = self.wait.until(EC.element_to_be_clickable((By.ID, '_submit')))127         submit.click()128         time.sleep(10)129         print("登陆成功")130 131     def crack(self):132         """133         破解入口134         :return:None135         """136         # 点击验证按钮137         button = self.get_touchclick_button()138         button.click()139         # 获取验证码图片140         image = self.get_touch_click_image()141         bytes_array = BytesIO()142         image.save(bytes_array, format='PNG')143         # 识别验证码144         result = self.chaojiying.post_pic(bytes_array.getvalue(), CHAOJIYING_KIND)145         print(result)146         locations = self.get_points(result)147         self.touch_click_words(locations)148         self.touch_click_verify()149         # 判定是否成功150         success = self.wait.until(EC.text_to_be_present_in_element((By.CLASS_NAME, "touchlick_hod_note"), '验证成功'))151         print(success)152 153         # 失败后重试154         if not success:155             self.crack()156         else:157             self.login()158 159 if __name__ == '__main__':160     crack = CrackTouClick()161     crack.crack()

结果等PIL与python 3.7对应版本出来再发哦!

 

转载于:https://www.cnblogs.com/Trojan00/p/9504115.html

你可能感兴趣的文章
读取日志文件,搜索关键字,打印关键字前5行。yield、deque实例
查看>>
(转载) ExtJs大比拼JQuery:Dom文档操作
查看>>
使Android开发方便快捷的8个好工具
查看>>
递归与非递归遍历
查看>>
Nagios图像绘制插件PNP4Nagios部署和测试
查看>>
在SqlServer2008R2中,在一张表上加上insert、update、delete触发器(带游标)
查看>>
常用模块--- 正则模块 正则表达式 re 模块
查看>>
图解aclocal、autoconf、automake、autoheader、configure
查看>>
chapter 17
查看>>
C/C++ cast
查看>>
jfinal的controller默认访问的方法是什么
查看>>
Punycode
查看>>
HTML LIST 输入框自动查询追加框,自动过滤 HTML5
查看>>
file_get_contents调用接口出现的错误
查看>>
SQL Server 2008 调试存储过程(调用用户定义表类型)
查看>>
文件隐藏在一张图片里
查看>>
学c++需要先学c语言吗?
查看>>
ubuntu apt 安装 mpv
查看>>
内部类
查看>>
UNIX网络编程——Socket通信原理和实践
查看>>