from __future__ import absolute_import import requests try: from urlparse import urlparse except ImportError: from urllib.parse import urlparse from ..exceptions import ( CaptchaServiceUnavailable, CaptchaAPIError, CaptchaTimeout, CaptchaParameter, CaptchaBadJobID ) try: import polling2 except ImportError: raise ImportError("Please install the python module 'polling2' via pip") from . import Captcha class captchaSolver(Captcha): def __init__(self): super(captchaSolver, self).__init__('capmonster') self.host = 'https://api.capmonster.cloud' self.session = requests.Session() # ------------------------------------------------------------------------------- # @staticmethod def checkErrorStatus(response): if response.status_code in [500, 502]: raise CaptchaServiceUnavailable( f'CapMonster: Server Side Error {response.status_code}' ) payload = response.json() if payload['errorId'] == 1: if 'errorDescription' in payload: raise CaptchaAPIError( payload['errorDescription'] ) else: raise CaptchaAPIError(payload['errorCode']) # ------------------------------------------------------------------------------- # def requestJob(self, taskID): if not taskID: raise CaptchaBadJobID( 'CapMonster: Error bad task id to request Captcha.' ) def _checkRequest(response): self.checkErrorStatus(response) if response.ok and response.json()['status'] == 'ready': return True return None response = polling2.poll( lambda: self.session.post( f'{self.host}/getTaskResult', json={ 'clientKey': self.clientKey, 'taskId': taskID }, timeout=30 ), check_success=_checkRequest, step=5, timeout=180 ) if response: return response.json()['solution']['gRecaptchaResponse'] else: raise CaptchaTimeout( "CapMonster: Error failed to solve Captcha." ) # ------------------------------------------------------------------------------- # def requestSolve(self, captchaType, url, siteKey): def _checkRequest(response): self.checkErrorStatus(response) if response.ok and response.json()['taskId']: return True return None data = { 'clientKey': self.clientKey, 'task': { 'websiteURL': url, 'websiteKey': siteKey, 'softId': 37, 'type': 'NoCaptchaTask' if captchaType == 'reCaptcha' else 'HCaptchaTask' } } if self.proxy: data['task'].update(self.proxy) else: data['task']['type'] = f"{data['task']['type']}Proxyless" response = polling2.poll( lambda: self.session.post( f'{self.host}/createTask', json=data, allow_redirects=False, timeout=30 ), check_success=_checkRequest, step=5, timeout=180 ) if response: return response.json()['taskId'] else: raise CaptchaBadJobID( 'CapMonster: Error no task id was returned.' ) # ------------------------------------------------------------------------------- # def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): taskID = None if not captchaParams.get('clientKey'): raise CaptchaParameter( "CapMonster: Missing clientKey parameter." ) self.clientKey = captchaParams.get('clientKey') if captchaParams.get('proxy') and not captchaParams.get('no_proxy'): hostParsed = urlparse(captchaParams.get('proxy', {}).get('https')) if not hostParsed.scheme: raise CaptchaParameter('Cannot parse proxy correctly, bad scheme') if not hostParsed.netloc: raise CaptchaParameter('Cannot parse proxy correctly, bad netloc') ports = { 'http': 80, 'https': 443 } self.proxy = { 'proxyType': hostParsed.scheme, 'proxyAddress': hostParsed.hostname, 'proxyPort': hostParsed.port if hostParsed.port else ports[self.proxy['proxyType']], 'proxyLogin': hostParsed.username, 'proxyPassword': hostParsed.password, } else: self.proxy = None try: taskID = self.requestSolve(captchaType, url, siteKey) return self.requestJob(taskID) except polling2.TimeoutException: try: if taskID: self.reportJob(taskID) except polling2.TimeoutException: raise CaptchaTimeout( "CapMonster: Captcha solve took to long and also failed " f"reporting the task with task id {taskID}." ) raise CaptchaTimeout( "CapMonster: Captcha solve took to long to execute " f"task id {taskID}, aborting." ) # ------------------------------------------------------------------------------- # captchaSolver()