8.2 极验滑动验证码的识别 · python3爬虫笔记

### 1.目标通过程序识别并通过极验验证码的验证，包括 * 分析识别思路 * 识别缺口位置 * 生成滑动拖动路径 * 模拟实现滑块拼合通过验证等步骤 ### 2.安装 [selenium安装](/1kai-fa-huan-jing-pei-zhi/12-qing-qiu-ku-de-an-zhuang/122-seleniumde-an-zhuang.md) [chromeDriver](/1kai-fa-huan-jing-pei-zhi/12-qing-qiu-ku-de-an-zhuang/123-chromedriverde-an-zhuang.md) ### 3.了解极验验证码极验验证码官网:[http://www.geetest.com/](http://www.geetest.com/) 它是一个专注于提供验证安全的系统，主要验证方式是拖动滑动拼合图像。若图像完全拼合，则验证成功，即表单成功提交，否者重新验证 ![](/assets/8.2.1.png) ### 4.极验验证码特点 * 三角防护之防模拟 * 三角防护之防伪造 * 三角防护之防爆力 * 点击一下，验证只需要0.4秒 * 全平台兼容，适用各种交互情景 * 面向未来，懂科技，更懂人性 ### 5.识别思路极验官方后台:[https://account.geetest.com/register](https://account.geetest.com/register) ![](/assets/8.2.2.png) 此按钮为智能验证按钮，即同一个会话，一段时间内第二次点击会直接通过验证，如果智能识别不通过，则会弹出滑动验证窗口，需要拖动滑块拼合图形完成二步验证 ![](/assets/8.-2.3.png) 验证成功后，按钮会变成如下的所示状态 ![](/assets/8.2.4.png) 识别验证需要完成三步: * 模拟点击验证按钮 * 识别滑动缺口的位置 * 模拟拖动滑动第一步:可以直接使用selenium模拟点击按钮第二步:识别缺口的位置，需要用到图像的相关处理方法，需要用到图像的相关处理方法第三步:拖动图片 ### 6.初始化目标网址:[https://account.geetest.com/login](https://account.geetest.com/login) 初始化配置 ``` from selenium.webdriver.support.wait import WebDriverWait import config from selenium import webdriver class CrackGreetest(): def __init__(self): self.url = "https://account.geetest.com/login" self.browser = webdriver.Chrome() self.wait = WebDriverWait(self.browser,20) self.email = config.EMAIL self.passwrod = config.PASSWORD ``` ### 7.模拟点击模拟点击初始的验证按钮 ``` def get_geetest_button(self): ''' 获取初始验证码按钮 :return: 按钮对象 ''' button = self.wait.until(expected_conditions.element_to_be_clickable((By.CLASS_NAME,'geetest_radar_tip'))) return button ``` ### 8.识别缺口 ``` def get_position(self,pos): """ 获取验证码位置 :return: 验证码位置元组 """ img = self.wait.until(expected_conditions.presence_of_element_located((By.CLASS_NAME,pos))) time.sleep(2) location = img.location size = img.size top,bottom,left,right = location['y'],location['y']+size['height'],location['x'],location['x']+size['width'] return (top,bottom,left,right) def get_unFull_image(self,name): """ 获取未完整验证码图片 :param name: :return: 图片对象 """ top,bottom,left,right = self.get_position(pos="geetest_canvas_img") print('验证码位置',top,bottom,left,right) screenshot = self.get_screenshot() unfull_captcha = screenshot.crop((left,top,right,bottom)) unfull_captcha.save(name) return unfull_captcha def get_full_image(self,name): ''' 获取完整验证码图片 :param name: :return: ''' # 这里要执行JavaScript脚本才能拿到完整图片的截图 show_Full_img1 = "document.getElementsByClassName('geetest_canvas_fullbg')[0].style.display='block'" self.browser.execute_script(show_Full_img1) show_Full_img2 = "document.getElementsByClassName('geetest_canvas_fullbg')[0].style.opacity=1" self.browser.execute_script(show_Full_img2) # 等待完整图片加载 time.sleep(2) top, bottom, left, right = self.get_position(pos="geetest_canvas_fullbg") print('验证码位置', top, bottom, left, right) screenshot = self.get_screenshot() full_captcha = screenshot.crop((left, top, right, bottom)) full_captcha.save(name) return full_captcha ``` 比较两张图片 ``` def get_gap(self,image1,image2): """ 获取缺口偏移量 :param image1: 不带缺口的图片 :param image2: 带缺口的图片 :return: 像素是否相同 """ # 缺口在滑块右侧，设定遍历初始横坐标left为59 left = 60 # 像素对比阈值 threshold = 60 for i in range(left, image2.size[0]): for j in range(image2.size[1]): rgb1 = image1.load()[i, j] rgb2 = image2.load()[i, j] res1 = abs(rgb2[0] - rgb1[0]) res2 = abs(rgb2[1] - rgb1[1]) res3 = abs(rgb2[2] - rgb1[2]) if not (res1 < threshold and res2 < threshold and res3 < threshold): return i - 7 # 返回缺口偏移距离，这里需测试几次 ``` ### 9.模拟拖动 ``` def get_track(self,distanc): ''' x=v0*t+0.5*a*t*t v=v0+a*t 根据偏移量获取移动轨迹 :param distanc: 偏移量 :return: 移动轨迹 ''' # 移动轨迹 track = [] # 当前位移 current = 0 # 减速阈值 mid = distanc * 4 / 5 # 计算间隔 t = random.randint(2,3)/10 # 初速度 v = 0 while current < distanc: if current < mid: # 加速度为正2 a = 2 else: # 加速度为负3 a = -3 v0 = v # 当前速度v = v0+at v=v0+a*t # 移动距离x=v0*t+1/2*a*t*t move = v0*t+1/2*a*t*t # 当前位移 current+=move # 加入轨迹 track.append(round(move)) return track def move_to_grap(self,slider,track): ''' 拖动滑块到缺口处 :param slider: 滑块 :param track: 轨迹 :return: ''' # 调用ActionChains的click_and_hold()方法按住拖动底部滑块，遍历运动轨迹获取每小段位置距离，调用move_by_offset()方法移动此位移，最后调用release()方法松开鼠标 ActionChains(self.browser).click_and_hold(slider).perform() for x in track: ActionChains(self.browser).move_by_offset(xoffset=x,yoffset=0).perform() time.sleep(0.3) ActionChains(self.browser).release().perform() ``` ### 10.完整代码 ``` import random from io import BytesIO from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions from selenium.webdriver.support.wait import WebDriverWait import config,time from selenium import webdriver from PIL import Image class CrackGreetest(): def __init__(self): self.url = "https://account.geetest.com/login" self.browser = webdriver.Chrome() self.wait = WebDriverWait(self.browser,20) self.email = config.EMAIL self.password = config.PASSWORD def open(self): """ 打开网页输入用户名密码 :return: None """ self.browser.get(self.url) email = self.wait.until(expected_conditions.presence_of_element_located((By.ID,'email'))) password = self.wait.until(expected_conditions.presence_of_element_located((By.ID,'password'))) email.send_keys(self.email) password.send_keys(self.password) def get_geetest_button(self): ''' 获取初始验证码按钮 :return: 按钮对象 ''' button = self.wait.until(expected_conditions.element_to_be_clickable((By.CLASS_NAME,'geetest_radar_tip'))) return button def get_screenshot(self): """ 获取网页截图 :return: 截图对象 """ screenshot = self.browser.get_screenshot_as_png() screenshot = Image.open(BytesIO(screenshot)) return screenshot def get_position(self,pos): """ 获取验证码位置 :return: 验证码位置元组 """ img = self.wait.until(expected_conditions.presence_of_element_located((By.CLASS_NAME,pos))) time.sleep(2) location = img.location size = img.size top,bottom,left,right = location['y'],location['y']+size['height'],location['x'],location['x']+size['width'] return (top,bottom,left,right) def get_unFull_image(self,name): """ 获取未完整验证码图片 :param name: :return: 图片对象 """ top,bottom,left,right = self.get_position(pos="geetest_canvas_img") print('验证码位置',top,bottom,left,right) screenshot = self.get_screenshot() unfull_captcha = screenshot.crop((left,top,right,bottom)) unfull_captcha.save(name) return unfull_captcha def get_full_image(self,name): ''' 获取完整验证码图片 :param name: :return: ''' # 这里要执行JavaScript脚本才能拿到完整图片的截图 show_Full_img1 = "document.getElementsByClassName('geetest_canvas_fullbg')[0].style.display='block'" self.browser.execute_script(show_Full_img1) show_Full_img2 = "document.getElementsByClassName('geetest_canvas_fullbg')[0].style.opacity=1" self.browser.execute_script(show_Full_img2) # 等待完整图片加载 time.sleep(2) top, bottom, left, right = self.get_position(pos="geetest_canvas_fullbg") print('验证码位置', top, bottom, left, right) screenshot = self.get_screenshot() full_captcha = screenshot.crop((left, top, right, bottom)) full_captcha.save(name) return full_captcha def get_slider(self): """ 获取滑块 :return: 滑块对象 """ slider = self.wait.until(expected_conditions.element_to_be_clickable((By.CLASS_NAME,'geetest_slider_button'))) return slider # # 判断两张图片同一位置的像素是否相同。比较两张图的RGB的绝对值是否均小于定义的阈值threshold # # 如果绝对值均在阈值值内，则代表像素点相同，继续遍历，否者代表不相同的像素点，即缺口的位置 # def is_pixel_equal(self,image1,image2,x,y): # ''' # 判断两个像素是否相同 # :param image1: 图片1 # :param image2: 图片2 # :param x: 位置x # :param y: 位置y # :return: 像素是否相同 # ''' # # 取两个图片的像素点 # pixel1 = image1.load()[x,y] # pixel2 = image2.load()[x,y] # # 像素对比阈值 # threshold = 60 # if abs(pixel1[0]-pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(pixel1[2] - pixel2[2]) < threshold: # return True # else: # return False def get_gap(self,image1,image2): """ 获取缺口偏移量 :param image1: 不带缺口的图片 :param image2: 带缺口的图片 :return: 像素是否相同 """ # 缺口在滑块右侧，设定遍历初始横坐标left为59 left = 60 # 像素对比阈值 threshold = 60 for i in range(left, image2.size[0]): for j in range(image2.size[1]): rgb1 = image1.load()[i, j] rgb2 = image2.load()[i, j] res1 = abs(rgb2[0] - rgb1[0]) res2 = abs(rgb2[1] - rgb1[1]) res3 = abs(rgb2[2] - rgb1[2]) if not (res1 < threshold and res2 < threshold and res3 < threshold): return i - 7 # 返回缺口偏移距离，这里需测试几次 def get_track(self,distanc): ''' x=v0*t+0.5*a*t*t v=v0+a*t 根据偏移量获取移动轨迹 :param distanc: 偏移量 :return: 移动轨迹 ''' # 移动轨迹 track = [] # 当前位移 current = 0 # 减速阈值 mid = distanc * 4 / 5 # 计算间隔 t = random.randint(2,3)/10 # 初速度 v = 0 while current < distanc: if current < mid: # 加速度为正2 a = 2 else: # 加速度为负3 a = -3 v0 = v # 当前速度v = v0+at v=v0+a*t # 移动距离x=v0*t+1/2*a*t*t move = v0*t+1/2*a*t*t # 当前位移 current+=move # 加入轨迹 track.append(round(move)) return track def move_to_grap(self,slider,track): ''' 拖动滑块到缺口处 :param slider: 滑块 :param track: 轨迹 :return: ''' # 调用ActionChains的click_and_hold()方法按住拖动底部滑块，遍历运动轨迹获取每小段位置距离，调用move_by_offset()方法移动此位移，最后调用release()方法松开鼠标 ActionChains(self.browser).click_and_hold(slider).perform() for x in track: ActionChains(self.browser).move_by_offset(xoffset=x,yoffset=0).perform() time.sleep(0.3) ActionChains(self.browser).release().perform() def login(self): """ 点击登陆 :return: """ submit = self.wait.until(expected_conditions.element_to_be_clickable((By.CLASS_NAME, 'login-btn'))) submit.click() time.sleep(10) print('登录成功') def crack(self): try: # 输入用户名 self.open() # 模拟点击按钮 button = self.get_geetest_button() button.click() # 获取验证码图片 image1 = self.get_unFull_image('unfull_captcha.png') # 获取带缺口的验证码图片 image2 = self.get_full_image('full_captcha.png') # 对比两张图片像素点，获取缺口位置，得到偏移距离 # 获取缺口位置 distance = self.get_gap(image1,image2) print("缺口位置",distance) # 获取移动轨迹 track = self.get_track(distance) print("滑动轨迹",track) # 模拟人的行为，拖动滑块，完成验证 slider = self.get_slider() # slider.click() # 拖动滑块 self.move_to_grap(slider,track) success = self.wait.until( expected_conditions.text_to_be_present_in_element((By.CLASS_NAME, 'geetest_success_radar_tip_content'), '验证成功')) print(success) self.login() except: self.crack() if __name__ == "__main__": crack = CrackGreetest() crack.crack() ```