这里的问题是页面源中的验证码 URL 不是
实际图像 URL。这是一个动态生成验证码的脚本
当您使用验证码求解器 API 时,您正在解决一个问题
与浏览器加载的图像不同。为了解决这个问题我们必须
保存浏览器加载的相同图像。我跟踪了图像请求
并发现,它使用的是在
浏览器加载页面。
使用硒:
from selenium import webdriver
from python3_anticaptcha import ImageToTextTask, CallbackClient
from time import sleep
import requests
def GetImageCookies():
print('Extracting Browser Cookies')
image_cookies = ''
for cookie in browser.get_cookies():
if cookie['name'] == 'ssc':
image_cookies += 'ssc={};'.format(cookie['value'])
elif cookie['name'] == 'ghsdfkjlksssalk35bbr':
image_cookies += 'ghsdfkjlksssalk35bbr={};'.format(cookie['value'])
# print(image_cookies)
return image_cookies
def SaveImage(captcha_file = "master.jpg"):
print('Saving the captcha image')
header = {
'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en,en-US;q=0.9,ar;q=0.8',
'Cookie': GetImageCookies(),
'Host': 'masked',
'Referer': 'masked',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0'}
pic = requests.get('https://masked/site/captcha/v/',verify=False,headers = header)
if pic.status_code == 200:
with open(captcha_file, 'wb') as f:
f.write(pic.content)
def SolveCapcha(captcha_file = "master.jpg"):
print('Solving the captcha image')
ANTICAPTCHA_KEY = 'masked'
result = ImageToTextTask.ImageToTextTask(
anticaptcha_key=ANTICAPTCHA_KEY).captcha_handler(captcha_file=captcha_file)
captcha_text = result['solution']['text']
print('Captcha text is :',captcha_text)
return captcha_text
browser = webdriver.Firefox()
url = 'https://masked/'
browser.get(url)
def Login():
SaveImage()
sleep(5)
username = browser.find_element_by_id("masked_username")
username.clear()
username.send_keys("testuser")
password = browser.find_element_by_id("masked")
password.clear()
password.send_keys("testpass")
captcha = browser.find_element_by_id("masked")
captcha.clear()
captcha_text = SolveCapcha()
captcha.send_keys(captcha_text)
login = browser.find_element_by_id("masked").click()
sleep(5)
err_message = browser.find_elements_by_id('masked')
if err_message :
if err_message[0].text == 'The verification code is incorrect.':
print(err_message[0].text)
return False
return True
"""The logic here is that the image gets downloaded using the cookies but sometimes
the letters are hard to be solved so each time we download the same image with the
same cookies the content of the image will be the same but how it's written is different
So we keep trying till we get it right """
while Login() == False:
Login()
使用 Requests 和 Beautiful soup:
以下是不确定是否有效的想法,您必须自己测试:
from bs4 import BeautifulSoup
def SaveImage(captcha_file = "master.jpg"):
print('Saving the captcha image')
header = {
'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en,en-US;q=0.9,ar;q=0.8',
'Host': 'masked',
'Referer': 'https://masked/',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0'}
pic = session.get('https://masked/site/captcha/v/',verify=False,headers = header)
if pic.status_code == 200:
with open(captcha_file, 'wb') as f:
f.write(pic.content)
with requests.Session() as session:
source = session.get(url = 'https://masked/',verify=False) # To get the itial cookies
soup = BeautifulSoup(source.text, 'html.parser')
token = soup.find('input', {'name': 'masked'}).get('value')
SaveImage()
captcha_text = SolveCapcha()
post_data={"masked": token,
'masked[username]': 'testuser',
'masked[password]': 'testpass',
'masked[captcha]': captcha_text,
'masked':''}
session.post('https://masked/', data=post_data,verify=False)