diff --git a/libs/cloudscraper-1.2.71.dist-info/INSTALLER b/libs/cloudscraper-1.2.58.dist-info/INSTALLER similarity index 100% rename from libs/cloudscraper-1.2.71.dist-info/INSTALLER rename to libs/cloudscraper-1.2.58.dist-info/INSTALLER diff --git a/libs/cloudscraper-1.2.71.dist-info/LICENSE b/libs/cloudscraper-1.2.58.dist-info/LICENSE similarity index 100% rename from libs/cloudscraper-1.2.71.dist-info/LICENSE rename to libs/cloudscraper-1.2.58.dist-info/LICENSE diff --git a/libs/cloudscraper-1.2.71.dist-info/METADATA b/libs/cloudscraper-1.2.58.dist-info/METADATA similarity index 92% rename from libs/cloudscraper-1.2.71.dist-info/METADATA rename to libs/cloudscraper-1.2.58.dist-info/METADATA index a248c8208..b75a59542 100644 --- a/libs/cloudscraper-1.2.71.dist-info/METADATA +++ b/libs/cloudscraper-1.2.58.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: cloudscraper -Version: 1.2.71 +Version: 1.2.58 Summary: A Python module to bypass Cloudflare's anti-bot page. Home-page: https://github.com/venomous/cloudscraper Author: VeNoMouS @@ -82,6 +82,31 @@ We support the following Javascript interpreters/engines. - **[Node.js](https://nodejs.org/)** - **[V8](https://github.com/sony/v8eval/):** We use Sony's [v8eval](https://v8.dev)() python module. +# Updates + +Cloudflare modifies their anti-bot protection page occasionally, So far it has changed maybe once per year on average. + +If you notice that the anti-bot page has changed, or if this module suddenly stops working, please create a GitHub issue so that I can update the code accordingly. + +- Many issues are a result of users not updating to the latest release of this project. Before filing an issue, please run the following command: + +``` +pip show cloudscraper +``` + +If the value of the version field is not the latest release, please run the following to update your package: + +``` +pip install cloudscraper -U +``` + +If you are still encountering a problem, open an issue and please include: + +- The full exception and stack trace. +- The URL of the Cloudflare-protected page which the script does not work on. +- A Pastebin or Gist containing the HTML source of the protected page. +- The version number from `pip show cloudscraper`. + # Usage The simplest way to use cloudscraper is by calling `create_scraper()`. @@ -104,26 +129,6 @@ Consult [Requests' documentation](http://docs.python-requests.org/en/latest/user ## Options -### Disable Cloudflare V1 -#### Description - -If you don't want to even attempt Cloudflare v1 (Deprecated) solving.. - -#### Parameters - - -|Parameter|Value|Default| -|-------------|:-------------:|:-----:| -|disableCloudflareV1|(boolean)|False| - -#### Example - -```python -scraper = cloudscraper.create_scraper(disableCloudflareV1=True) -``` - ------- - ### Brotli #### Description @@ -327,7 +332,6 @@ scraper = cloudscraper.create_scraper(interpreter='nodejs') - **[2captcha](https://www.2captcha.com/)** - **[anticaptcha](https://www.anti-captcha.com/)** -- **[CapSolver](https://capsolver.com/)** - **[CapMonster Cloud](https://capmonster.cloud/)** - **[deathbycaptcha](https://www.deathbycaptcha.com/)** - **[9kw](https://www.9kw.eu/)** @@ -365,6 +369,7 @@ if proxies are set you can disable sending the proxies to 2captcha by setting `n ```python scraper = cloudscraper.create_scraper( + interpreter='nodejs', captcha={ 'provider': '2captcha', 'api_key': 'your_2captcha_api_key' @@ -392,6 +397,7 @@ if proxies are set you can disable sending the proxies to anticaptcha by setting ```python scraper = cloudscraper.create_scraper( + interpreter='nodejs', captcha={ 'provider': 'anticaptcha', 'api_key': 'your_anticaptcha_api_key' @@ -401,29 +407,6 @@ scraper = cloudscraper.create_scraper( ------ -#### CapSolver - -##### Required `captcha` Parameters - -|Parameter|Value|Required|Default| -|-------------|:-------------:|:-----:|:-----:| -|provider|(string) `captchaai`|yes|| -|api_key|(string)|yes|| - - -##### Example - -```python -scraper = cloudscraper.create_scraper( - captcha={ - 'provider': 'capsolver', - 'api_key': 'your_captchaai_api_key' - } -) -``` - ------- - #### CapMonster Cloud ##### Required `captcha` Parameters @@ -442,6 +425,7 @@ if proxies are set you can disable sending the proxies to CapMonster by setting ```python scraper = cloudscraper.create_scraper( + interpreter='nodejs', captcha={ 'provider': 'capmonster', 'clientKey': 'your_capmonster_clientKey' @@ -465,6 +449,7 @@ scraper = cloudscraper.create_scraper( ```python scraper = cloudscraper.create_scraper( + interpreter='nodejs', captcha={ 'provider': 'deathbycaptcha', 'username': 'your_deathbycaptcha_username', @@ -489,6 +474,7 @@ scraper = cloudscraper.create_scraper( ```python scraper = cloudscraper.create_scraper( + interpreter='nodejs', captcha={ 'provider': '9kw', 'api_key': 'your_9kw_api_key', @@ -512,6 +498,7 @@ Use this if you want the requests response payload without solving the Captcha. ##### Example ```python scraper = cloudscraper.create_scraper( + interpreter='nodejs', captcha={'provider': 'return_response'} ) ``` @@ -637,36 +624,3 @@ print( ) ) ``` - -### Cryptography - -#### Description - -Control communication between client and server - -#### Parameters - -Can be passed as an argument to `create_scraper()`. - -|Parameter|Value|Default| -|-------------|:-------------:|:-----:| -|cipherSuite|(string)|None| -|ecdhCurve|(string)|prime256v1| -|server_hostname|(string)|None| - -#### Example - -```python -# Some servers require the use of a more complex ecdh curve than the default "prime256v1" -# It may can solve handshake failure -scraper = cloudscraper.create_scraper(ecdhCurve='secp384r1') -``` - -```python -# Manipulate server_hostname -scraper = cloudscraper.create_scraper(server_hostname='www.somesite.com') -scraper.get( - 'https://backend.hosting.com/', - headers={'Host': 'www.somesite.com'} -) -``` diff --git a/libs/cloudscraper-1.2.71.dist-info/RECORD b/libs/cloudscraper-1.2.58.dist-info/RECORD similarity index 53% rename from libs/cloudscraper-1.2.71.dist-info/RECORD rename to libs/cloudscraper-1.2.58.dist-info/RECORD index 733225bbd..3dcf225c2 100644 --- a/libs/cloudscraper-1.2.71.dist-info/RECORD +++ b/libs/cloudscraper-1.2.58.dist-info/RECORD @@ -1,19 +1,17 @@ -cloudscraper-1.2.71.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 -cloudscraper-1.2.71.dist-info/LICENSE,sha256=luC9NJPEX0JAQUKWkzWlAOaaE69fNKnW1uIuDKmWERc,1091 -cloudscraper-1.2.71.dist-info/METADATA,sha256=ywzk5ZCEv-I8Y9gajnVCsiAR3DrdmeiRLam3EGTJ0UA,19942 -cloudscraper-1.2.71.dist-info/RECORD,, -cloudscraper-1.2.71.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 -cloudscraper-1.2.71.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92 -cloudscraper-1.2.71.dist-info/top_level.txt,sha256=OFEsobVl62sa2NzpgNtfHZkIw_qZr_wljhjmlP9oGiM,13 -cloudscraper/__init__.py,sha256=Eg8AqKak2yYcraKqt7O3LJLNmppC2uL7dvAANiyxh5w,15960 -cloudscraper/captcha/2captcha.py,sha256=yyDWvL6HVK4pM69aRpOV9mwzbtPC0yGz_mWkQ7-mkmI,10643 -cloudscraper/captcha/9kw.py,sha256=5EAUyO_vBEuLKsr4sXYa25MSVOm3BXVAdcenF6ZPsgI,7701 +cloudscraper-1.2.58.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +cloudscraper-1.2.58.dist-info/LICENSE,sha256=luC9NJPEX0JAQUKWkzWlAOaaE69fNKnW1uIuDKmWERc,1091 +cloudscraper-1.2.58.dist-info/METADATA,sha256=q25vkvMHkAxmuZRwak56i4CLAFUuG5EwEzz1oEXOY3U,19537 +cloudscraper-1.2.58.dist-info/RECORD,, +cloudscraper-1.2.58.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +cloudscraper-1.2.58.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92 +cloudscraper-1.2.58.dist-info/top_level.txt,sha256=OFEsobVl62sa2NzpgNtfHZkIw_qZr_wljhjmlP9oGiM,13 +cloudscraper/__init__.py,sha256=gsOMaKAKNfJUR4FkiEefAA2fAHVFuSwkblGgqxClsrw,32790 +cloudscraper/captcha/2captcha.py,sha256=CWF62VmLqb_KvSH-dqzo1XEwCBOQh1Aee-G18cX_7aw,10371 +cloudscraper/captcha/9kw.py,sha256=1dfhRHKeCx8yIE1opWyQ1Q7aHJlXDdkv1bV2Bfzbrf8,7387 cloudscraper/captcha/__init__.py,sha256=VORxm32xqLrEE-zxFWgEhSbtqfigjCfwodChg1VlQ6c,1511 -cloudscraper/captcha/anticaptcha.py,sha256=YUsLviq3ZtbjTUnAPq6zVEieHmeSgnmiXKcqXZeO5qA,6152 -cloudscraper/captcha/capmonster.py,sha256=_9AUr6vHG4c5XLc5XqvnnMqgcvuKnzz1ckJpSySjgKQ,6143 -cloudscraper/captcha/capsolver.py,sha256=x38fO0m_k2W8nO3IppXADZsfCYl0iyvRgajZ5s5iTSU,6060 -cloudscraper/captcha/deathbycaptcha.py,sha256=asUX_quUsjAyWVRc7_8o_ryHZFotN-NP60mQiuN-c1U,8673 -cloudscraper/cloudflare.py,sha256=i1jyJcY-aRy3IQ-7YUly8qGUovO4Nx99M_FKfz4eivQ,19993 +cloudscraper/captcha/anticaptcha.py,sha256=cK8LON8M-8MN1wx_rSMTTqxrpwbL65Z2svH-LtGiA40,3478 +cloudscraper/captcha/capmonster.py,sha256=oVXdv2Wrgh2nWFrYttUzbqW9xZU1j6A4cDDcZINIoVg,5695 +cloudscraper/captcha/deathbycaptcha.py,sha256=UJqkh35gcKVdIhwNqF7N_0ixpIPT2PHiMbT378wEM4w,8073 cloudscraper/exceptions.py,sha256=WSMgI8PRvU3g4KDFrjU-42p83lSAVOw8tN2NSqqIUfw,2397 cloudscraper/help.py,sha256=fNYNGFQjiCL1d-gCpDoulBk4iHOuzNhLBudi7NrOHSg,2100 cloudscraper/interpreters/__init__.py,sha256=mWY8LuzDRYWGGnKz5vYSdrOnoVaeWlixmAtZN8Pq6bY,1734 diff --git a/libs/cloudscraper-1.2.71.dist-info/REQUESTED b/libs/cloudscraper-1.2.58.dist-info/REQUESTED similarity index 100% rename from libs/cloudscraper-1.2.71.dist-info/REQUESTED rename to libs/cloudscraper-1.2.58.dist-info/REQUESTED diff --git a/libs/cloudscraper-1.2.71.dist-info/WHEEL b/libs/cloudscraper-1.2.58.dist-info/WHEEL similarity index 100% rename from libs/cloudscraper-1.2.71.dist-info/WHEEL rename to libs/cloudscraper-1.2.58.dist-info/WHEEL diff --git a/libs/cloudscraper-1.2.71.dist-info/top_level.txt b/libs/cloudscraper-1.2.58.dist-info/top_level.txt similarity index 100% rename from libs/cloudscraper-1.2.71.dist-info/top_level.txt rename to libs/cloudscraper-1.2.58.dist-info/top_level.txt diff --git a/libs/cloudscraper/__init__.py b/libs/cloudscraper/__init__.py index 67abd446f..077747034 100644 --- a/libs/cloudscraper/__init__.py +++ b/libs/cloudscraper/__init__.py @@ -1,14 +1,20 @@ # ------------------------------------------------------------------------------- # import logging +import re import requests import sys import ssl +from collections import OrderedDict +from copy import deepcopy + from requests.adapters import HTTPAdapter from requests.sessions import Session from requests_toolbelt.utils import dump +from time import sleep + # ------------------------------------------------------------------------------- # try: @@ -22,23 +28,37 @@ except ImportError: import copy_reg as copyreg try: - from urlparse import urlparse + from HTMLParser import HTMLParser except ImportError: - from urllib.parse import urlparse + if sys.version_info >= (3, 4): + import html + else: + from html.parser import HTMLParser + +try: + from urlparse import urlparse, urljoin +except ImportError: + from urllib.parse import urlparse, urljoin # ------------------------------------------------------------------------------- # from .exceptions import ( CloudflareLoopProtection, - CloudflareIUAMError + CloudflareCode1020, + CloudflareIUAMError, + CloudflareSolveError, + CloudflareChallengeError, + CloudflareCaptchaError, + CloudflareCaptchaProvider ) -from .cloudflare import Cloudflare +from .interpreters import JavaScriptInterpreter +from .captcha import Captcha from .user_agent import User_Agent # ------------------------------------------------------------------------------- # -__version__ = '1.2.71' +__version__ = '1.2.58' # ------------------------------------------------------------------------------- # @@ -59,8 +79,6 @@ class CipherSuiteAdapter(HTTPAdapter): self.ssl_context = kwargs.pop('ssl_context', None) self.cipherSuite = kwargs.pop('cipherSuite', None) self.source_address = kwargs.pop('source_address', None) - self.server_hostname = kwargs.pop('server_hostname', None) - self.ecdhCurve = kwargs.pop('ecdhCurve', 'prime256v1') if self.source_address: if isinstance(self.source_address, str): @@ -73,34 +91,14 @@ class CipherSuiteAdapter(HTTPAdapter): if not self.ssl_context: self.ssl_context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) - - self.ssl_context.orig_wrap_socket = self.ssl_context.wrap_socket - self.ssl_context.wrap_socket = self.wrap_socket - - if self.server_hostname: - self.ssl_context.server_hostname = self.server_hostname - self.ssl_context.set_ciphers(self.cipherSuite) - self.ssl_context.set_ecdh_curve(self.ecdhCurve) - - self.ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2 - self.ssl_context.maximum_version = ssl.TLSVersion.TLSv1_3 + self.ssl_context.set_ecdh_curve('prime256v1') + self.ssl_context.options |= (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1) super(CipherSuiteAdapter, self).__init__(**kwargs) # ------------------------------------------------------------------------------- # - def wrap_socket(self, *args, **kwargs): - if hasattr(self.ssl_context, 'server_hostname') and self.ssl_context.server_hostname: - kwargs['server_hostname'] = self.ssl_context.server_hostname - self.ssl_context.check_hostname = False - else: - self.ssl_context.check_hostname = True - - return self.ssl_context.orig_wrap_socket(*args, **kwargs) - - # ------------------------------------------------------------------------------- # - def init_poolmanager(self, *args, **kwargs): kwargs['ssl_context'] = self.ssl_context kwargs['source_address'] = self.source_address @@ -120,21 +118,15 @@ class CloudScraper(Session): def __init__(self, *args, **kwargs): self.debug = kwargs.pop('debug', False) - - self.disableCloudflareV1 = kwargs.pop('disableCloudflareV1', False) self.delay = kwargs.pop('delay', None) - self.captcha = kwargs.pop('captcha', {}) - self.doubleDown = kwargs.pop('doubleDown', True) + self.cipherSuite = kwargs.pop('cipherSuite', None) + self.ssl_context = kwargs.pop('ssl_context', None) self.interpreter = kwargs.pop('interpreter', 'native') - + self.captcha = kwargs.pop('captcha', {}) self.requestPreHook = kwargs.pop('requestPreHook', None) self.requestPostHook = kwargs.pop('requestPostHook', None) - - self.cipherSuite = kwargs.pop('cipherSuite', None) - self.ecdhCurve = kwargs.pop('ecdhCurve', 'prime256v1') self.source_address = kwargs.pop('source_address', None) - self.server_hostname = kwargs.pop('server_hostname', None) - self.ssl_context = kwargs.pop('ssl_context', None) + self.doubleDown = kwargs.pop('doubleDown', True) self.allow_brotli = kwargs.pop( 'allow_brotli', @@ -167,10 +159,8 @@ class CloudScraper(Session): 'https://', CipherSuiteAdapter( cipherSuite=self.cipherSuite, - ecdhCurve=self.ecdhCurve, - server_hostname=self.server_hostname, - source_address=self.source_address, - ssl_context=self.ssl_context + ssl_context=self.ssl_context, + source_address=self.source_address ) ) @@ -211,6 +201,20 @@ class CloudScraper(Session): except ValueError as e: print(f"Debug Error: {getattr(e, 'message', e)}") + # ------------------------------------------------------------------------------- # + # Unescape / decode html entities + # ------------------------------------------------------------------------------- # + + @staticmethod + def unescape(html_text): + if sys.version_info >= (3, 0): + if sys.version_info >= (3, 4): + return html.unescape(html_text) + + return HTMLParser().unescape(html_text) + + return HTMLParser().unescape(html_text) + # ------------------------------------------------------------------------------- # # Decode Brotli on older versions of urllib3 manually # ------------------------------------------------------------------------------- # @@ -271,44 +275,480 @@ class CloudScraper(Session): # ------------------------------------------------------------------------------- # if self.requestPostHook: - newResponse = self.requestPostHook(self, response) + response = self.requestPostHook(self, response) - if response != newResponse: # Give me walrus in 3.7!!! - response = newResponse - if self.debug: - print('==== requestPostHook Debug ====') - self.debugRequest(response) - - # ------------------------------------------------------------------------------- # - - if not self.disableCloudflareV1: - cloudflareV1 = Cloudflare(self) + if self.debug: + self.debugRequest(response) + # Check if Cloudflare anti-bot is on + if self.is_Challenge_Request(response): # ------------------------------------------------------------------------------- # - # Check if Cloudflare v1 anti-bot is on + # Try to solve the challenge and send it back # ------------------------------------------------------------------------------- # - if cloudflareV1.is_Challenge_Request(response): - # ------------------------------------------------------------------------------- # - # Try to solve the challenge and send it back - # ------------------------------------------------------------------------------- # + if self._solveDepthCnt >= self.solveDepth: + _ = self._solveDepthCnt + self.simpleException( + CloudflareLoopProtection, + f"!!Loop Protection!! We have tried to solve {_} time(s) in a row." + ) - if self._solveDepthCnt >= self.solveDepth: - _ = self._solveDepthCnt - self.simpleException( - CloudflareLoopProtection, - f"!!Loop Protection!! We have tried to solve {_} time(s) in a row." - ) + self._solveDepthCnt += 1 - self._solveDepthCnt += 1 - - response = cloudflareV1.Challenge_Response(response, **kwargs) - else: - if not response.is_redirect and response.status_code not in [429, 503]: - self._solveDepthCnt = 0 + response = self.Challenge_Response(response, **kwargs) + else: + if not response.is_redirect and response.status_code not in [429, 503]: + self._solveDepthCnt = 0 return response + # ------------------------------------------------------------------------------- # + # check if the response contains a valid Cloudflare Bot Fight Mode challenge + # ------------------------------------------------------------------------------- # + + @staticmethod + def is_BFM_Challenge(resp): + try: + return ( + resp.headers.get('Server', '').startswith('cloudflare') + and re.search( + r"\/cdn-cgi\/bm\/cv\/\d+\/api\.js.*?" + r"window\['__CF\$cv\$params'\]\s*=\s*{", + resp.text, + re.M | re.S + ) + ) + except AttributeError: + pass + + return False + + # ------------------------------------------------------------------------------- # + # check if the response contains a valid Cloudflare challenge + # ------------------------------------------------------------------------------- # + + @staticmethod + def is_IUAM_Challenge(resp): + try: + return ( + resp.headers.get('Server', '').startswith('cloudflare') + and resp.status_code in [429, 503] + and re.search( + r'
)', + body, + re.M | re.DOTALL + ).groupdict() + + if not all(key in formPayload for key in ['form', 'challengeUUID']): + self.simpleException( + CloudflareIUAMError, + "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly." + ) + + payload = OrderedDict() + for challengeParam in re.findall(r'^\s*', formPayload['form'], re.M | re.S): + inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam)) + if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']: + payload.update({inputPayload['name']: inputPayload['value']}) + + except AttributeError: + self.simpleException( + CloudflareIUAMError, + "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly." + ) + + hostParsed = urlparse(url) + + try: + payload['jschl_answer'] = JavaScriptInterpreter.dynamicImport( + interpreter + ).solveChallenge(body, hostParsed.netloc) + except Exception as e: + self.simpleException( + CloudflareIUAMError, + f"Unable to parse Cloudflare anti-bots page: {getattr(e, 'message', e)}" + ) + + return { + 'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}", + 'data': payload + } + + # ------------------------------------------------------------------------------- # + # Try to solve the Captcha challenge via 3rd party. + # ------------------------------------------------------------------------------- # + + def captcha_Challenge_Response(self, provider, provider_params, body, url): + try: + formPayload = re.search( + r')', + body, + re.M | re.DOTALL + ).groupdict() + + if not all(key in formPayload for key in ['form', 'challengeUUID']): + self.simpleException( + CloudflareCaptchaError, + "Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly." + ) + + payload = OrderedDict( + re.findall( + r'(name="r"\svalue|data-ray|data-sitekey|name="cf_captcha_kind"\svalue)="(.*?)"', + formPayload['form'] + ) + ) + + captchaType = 'reCaptcha' if payload['name="cf_captcha_kind" value'] == 're' else 'hCaptcha' + + except (AttributeError, KeyError): + self.simpleException( + CloudflareCaptchaError, + "Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly." + ) + + # ------------------------------------------------------------------------------- # + # Pass proxy parameter to provider to solve captcha. + # ------------------------------------------------------------------------------- # + + if self.proxies and self.proxies != self.captcha.get('proxy'): + self.captcha['proxy'] = self.proxies + + # ------------------------------------------------------------------------------- # + # Pass User-Agent if provider supports it to solve captcha. + # ------------------------------------------------------------------------------- # + + self.captcha['User-Agent'] = self.headers['User-Agent'] + + # ------------------------------------------------------------------------------- # + # Submit job to provider to request captcha solve. + # ------------------------------------------------------------------------------- # + + captchaResponse = Captcha.dynamicImport( + provider.lower() + ).solveCaptcha( + captchaType, + url, + payload['data-sitekey'], + provider_params + ) + + # ------------------------------------------------------------------------------- # + # Parse and handle the response of solved captcha. + # ------------------------------------------------------------------------------- # + + dataPayload = OrderedDict([ + ('r', payload.get('name="r" value', '')), + ('cf_captcha_kind', payload['name="cf_captcha_kind" value']), + ('id', payload.get('data-ray')), + ('g-recaptcha-response', captchaResponse) + ]) + + if captchaType == 'hCaptcha': + dataPayload.update({'h-captcha-response': captchaResponse}) + + hostParsed = urlparse(url) + + return { + 'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}", + 'data': dataPayload + } + + # ------------------------------------------------------------------------------- # + # Attempt to handle and send the challenge response back to cloudflare + # ------------------------------------------------------------------------------- # + + def Challenge_Response(self, resp, **kwargs): + if self.is_Captcha_Challenge(resp): + # ------------------------------------------------------------------------------- # + # double down on the request as some websites are only checking + # if cfuid is populated before issuing Captcha. + # ------------------------------------------------------------------------------- # + + if self.doubleDown: + resp = self.decodeBrotli( + self.perform_request(resp.request.method, resp.url, **kwargs) + ) + + if not self.is_Captcha_Challenge(resp): + return resp + + # ------------------------------------------------------------------------------- # + # if no captcha provider raise a runtime error. + # ------------------------------------------------------------------------------- # + + if not self.captcha or not isinstance(self.captcha, dict) or not self.captcha.get('provider'): + self.simpleException( + CloudflareCaptchaProvider, + "Cloudflare Captcha detected, unfortunately you haven't loaded an anti Captcha provider " + "correctly via the 'captcha' parameter." + ) + + # ------------------------------------------------------------------------------- # + # if provider is return_response, return the response without doing anything. + # ------------------------------------------------------------------------------- # + + if self.captcha.get('provider') == 'return_response': + return resp + + # ------------------------------------------------------------------------------- # + # Submit request to parser wrapper to solve captcha + # ------------------------------------------------------------------------------- # + + submit_url = self.captcha_Challenge_Response( + self.captcha.get('provider'), + self.captcha, + resp.text, + resp.url + ) + else: + # ------------------------------------------------------------------------------- # + # Cloudflare requires a delay before solving the challenge + # ------------------------------------------------------------------------------- # + + if not self.delay: + try: + delay = float( + re.search( + r'submit\(\);\r?\n\s*},\s*([0-9]+)', + resp.text + ).group(1) + ) / float(1000) + if isinstance(delay, (int, float)): + self.delay = delay + except (AttributeError, ValueError): + self.simpleException( + CloudflareIUAMError, + "Cloudflare IUAM possibility malformed, issue extracing delay value." + ) + + sleep(self.delay) + + # ------------------------------------------------------------------------------- # + + submit_url = self.IUAM_Challenge_Response( + resp.text, + resp.url, + self.interpreter + ) + + # ------------------------------------------------------------------------------- # + # Send the Challenge Response back to Cloudflare + # ------------------------------------------------------------------------------- # + + if submit_url: + + def updateAttr(obj, name, newValue): + try: + obj[name].update(newValue) + return obj[name] + except (AttributeError, KeyError): + obj[name] = {} + obj[name].update(newValue) + return obj[name] + + cloudflare_kwargs = deepcopy(kwargs) + cloudflare_kwargs['allow_redirects'] = False + cloudflare_kwargs['data'] = updateAttr( + cloudflare_kwargs, + 'data', + submit_url['data'] + ) + + urlParsed = urlparse(resp.url) + cloudflare_kwargs['headers'] = updateAttr( + cloudflare_kwargs, + 'headers', + { + 'Origin': f'{urlParsed.scheme}://{urlParsed.netloc}', + 'Referer': resp.url + } + ) + + challengeSubmitResponse = self.request( + 'POST', + submit_url['url'], + **cloudflare_kwargs + ) + + if challengeSubmitResponse.status_code == 400: + self.simpleException( + CloudflareSolveError, + 'Invalid challenge answer detected, Cloudflare broken?' + ) + + # ------------------------------------------------------------------------------- # + # Return response if Cloudflare is doing content pass through instead of 3xx + # else request with redirect URL also handle protocol scheme change http -> https + # ------------------------------------------------------------------------------- # + + if not challengeSubmitResponse.is_redirect: + return challengeSubmitResponse + + else: + cloudflare_kwargs = deepcopy(kwargs) + cloudflare_kwargs['headers'] = updateAttr( + cloudflare_kwargs, + 'headers', + {'Referer': challengeSubmitResponse.url} + ) + + if not urlparse(challengeSubmitResponse.headers['Location']).netloc: + redirect_location = urljoin( + challengeSubmitResponse.url, + challengeSubmitResponse.headers['Location'] + ) + else: + redirect_location = challengeSubmitResponse.headers['Location'] + + return self.request( + resp.request.method, + redirect_location, + **cloudflare_kwargs + ) + + # ------------------------------------------------------------------------------- # + # We shouldn't be here... + # Re-request the original query and/or process again.... + # ------------------------------------------------------------------------------- # + + return self.request(resp.request.method, resp.url, **kwargs) + # ------------------------------------------------------------------------------- # @classmethod @@ -321,7 +761,7 @@ class CloudScraper(Session): if sess: for attr in ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']: val = getattr(sess, attr, None) - if val is not None: + if val: setattr(scraper, attr, val) return scraper @@ -342,7 +782,7 @@ class CloudScraper(Session): 'doubleDown', 'captcha', 'interpreter', - 'source_address', + 'source_address' 'requestPreHook', 'requestPostHook' ] if field in kwargs @@ -366,7 +806,6 @@ class CloudScraper(Session): break else: cls.simpleException( - cls, CloudflareIUAMError, "Unable to find Cloudflare cookies. Does the site actually " "have Cloudflare IUAM (I'm Under Attack Mode) enabled?" @@ -374,6 +813,7 @@ class CloudScraper(Session): return ( { + '__cfduid': scraper.cookies.get('__cfduid', '', domain=cookie_domain), 'cf_clearance': scraper.cookies.get('cf_clearance', '', domain=cookie_domain) }, scraper.headers['User-Agent'] @@ -402,6 +842,5 @@ if ssl.OPENSSL_VERSION_INFO < (1, 1, 1): # ------------------------------------------------------------------------------- # create_scraper = CloudScraper.create_scraper -session = CloudScraper.create_scraper get_tokens = CloudScraper.get_tokens get_cookie_string = CloudScraper.get_cookie_string diff --git a/libs/cloudscraper/captcha/2captcha.py b/libs/cloudscraper/captcha/2captcha.py index 1052e0292..7fae7f306 100644 --- a/libs/cloudscraper/captcha/2captcha.py +++ b/libs/cloudscraper/captcha/2captcha.py @@ -29,11 +29,6 @@ class captchaSolver(Captcha): super(captchaSolver, self).__init__('2captcha') self.host = 'https://2captcha.com' self.session = requests.Session() - self.captchaType = { - 'reCaptcha': 'userrecaptcha', - 'hCaptcha': 'hcaptcha', - 'turnstile': 'turnstile' - } # ------------------------------------------------------------------------------- # @@ -180,16 +175,23 @@ class captchaSolver(Captcha): 'soft_id': 2905 } - data.update({ - 'method': self.captchaType[captchaType], - 'googlekey' if captchaType == 'reCaptcha' else 'sitekey': siteKey - }) + data.update( + { + 'method': 'userrcaptcha', + 'googlekey': siteKey + } if captchaType == 'reCaptcha' else { + 'method': 'hcaptcha', + 'sitekey': siteKey + } + ) if self.proxy: - data.update({ - 'proxy': self.proxy, - 'proxytype': self.proxyType - }) + data.update( + { + 'proxy': self.proxy, + 'proxytype': self.proxyType + } + ) response = polling2.poll( lambda: self.session.post( diff --git a/libs/cloudscraper/captcha/9kw.py b/libs/cloudscraper/captcha/9kw.py index df3589d72..143def818 100644 --- a/libs/cloudscraper/captcha/9kw.py +++ b/libs/cloudscraper/captcha/9kw.py @@ -12,35 +12,30 @@ except ImportError: ) from ..exceptions import ( - CaptchaException, - CaptchaServiceUnavailable, - CaptchaAPIError, - CaptchaTimeout, - CaptchaParameter, - CaptchaBadJobID + reCaptchaServiceUnavailable, + reCaptchaAPIError, + reCaptchaTimeout, + reCaptchaParameter, + reCaptchaBadJobID ) -from . import Captcha +from . import reCaptcha -class captchaSolver(Captcha): +class captchaSolver(reCaptcha): def __init__(self): super(captchaSolver, self).__init__('9kw') self.host = 'https://www.9kw.eu/index.cgi' self.maxtimeout = 180 self.session = requests.Session() - self.captchaType = { - 'reCaptcha': 'recaptchav2', - 'hCaptcha': 'hcaptcha' - } # ------------------------------------------------------------------------------- # @staticmethod def checkErrorStatus(response): if response.status_code in [500, 502]: - raise CaptchaServiceUnavailable( + raise reCaptchaServiceUnavailable( f'9kw: Server Side Error {response.status_code}' ) @@ -103,18 +98,18 @@ class captchaSolver(Captcha): if response.text.startswith('{'): if response.json().get('error'): - raise CaptchaAPIError(error_codes.get(int(response.json().get('error')))) + raise reCaptchaAPIError(error_codes.get(int(response.json().get('error')))) else: error_code = int(re.search(r'^00(?P