yt-dlc/youtube_dl/extractor/hotnewhiphop.py

from __future__ import unicode_literals

import base64

from .common import InfoExtractor
from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
)
from ..utils import (
    ExtractorError,
    HEADRequest,
)


class HotNewHipHopIE(InfoExtractor):
    _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
    _TEST = {
        'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
        'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
        'info_dict': {
            'id': '1435540',
            'ext': 'mp3',
            'title': 'Freddie Gibbs - Lay It Down'
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        video_url_base64 = self._search_regex(
            r'data-path="(.*?)"', webpage, 'video URL', default=None)

        if video_url_base64 is None:
            video_url = self._search_regex(
                r'"contentUrl" content="(.*?)"', webpage, 'content URL')
            return self.url_result(video_url, ie='Youtube')

        reqdata = compat_urllib_parse.urlencode([
            ('mediaType', 's'),
            ('mediaId', video_id),
        ])
        r = compat_urllib_request.Request(
            'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
        r.add_header('Content-Type', 'application/x-www-form-urlencoded')
        mkd = self._download_json(
            r, video_id, note='Requesting media key',
            errnote='Could not download media key')
        if 'mediaKey' not in mkd:
            raise ExtractorError('Did not get a media key')

        redirect_url = base64.b64decode(video_url_base64).decode('utf-8')
        redirect_req = HEADRequest(redirect_url)
        req = self._request_webpage(
            redirect_req, video_id,
            note='Resolving final URL', errnote='Could not resolve final URL')
        video_url = req.geturl()
        if video_url.endswith('.html'):
            raise ExtractorError('Redirect failed')

        video_title = self._og_search_title(webpage).strip()

        return {
            'id': video_id,
            'url': video_url,
            'title': video_title,
            'thumbnail': self._og_search_thumbnail(webpage),
        }
[hotnewhiphop] Retrieve media key 2014-01-22 00:55:50 +00:00			`from __future__ import unicode_literals`

Added HotNewHipHop IE 2013-06-27 01:38:48 +00:00			`import base64`

			`from .common import InfoExtractor`
[hotnewhiphop] Modernize 2014-11-26 11:45:40 +00:00			`from ..compat import (`
[hotnewhiphop] Retrieve media key 2014-01-22 00:55:50 +00:00			`compat_urllib_parse,`
			`compat_urllib_request,`
[hotnewhiphop] Modernize 2014-11-26 11:45:40 +00:00			`)`
			`from ..utils import (`
[hotnewhiphop] Retrieve media key 2014-01-22 00:55:50 +00:00			`ExtractorError,`
			`HEADRequest,`
			`)`
Added HotNewHipHop IE 2013-06-27 01:38:48 +00:00

			`class HotNewHipHopIE(InfoExtractor):`
Properly escape ‘.’ in some _VALID_URL properties 2014-01-25 10:48:08 +00:00			`_VALID_URL = r'http://www\.hotnewhiphop\.com/.\.(?P<id>.)\.html'`
Move tests to the IE definitions 2013-06-27 18:46:46 +00:00			`_TEST = {`
[hotnewhiphop] Retrieve media key 2014-01-22 00:55:50 +00:00			`'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',`
			`'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',`
			`'info_dict': {`
[hotnewhiphop] Modernize 2014-11-26 11:45:40 +00:00			`'id': '1435540',`
			`'ext': 'mp3',`
[hotnewhiphop] Retrieve media key 2014-01-22 00:55:50 +00:00			`'title': 'Freddie Gibbs - Lay It Down'`
Move tests to the IE definitions 2013-06-27 18:46:46 +00:00			`}`
			`}`
Added HotNewHipHop IE 2013-06-27 01:38:48 +00:00
			`def _real_extract(self, url):`
[hotnewhiphop] Modernize 2014-11-26 11:45:40 +00:00			`video_id = self._match_id(url)`
			`webpage = self._download_webpage(url, video_id)`
Added HotNewHipHop IE 2013-06-27 01:38:48 +00:00
[hotnewhiphop] Retrieve media key 2014-01-22 00:55:50 +00:00			`video_url_base64 = self._search_regex(`
[hotnewhiphop] Modernize 2014-11-26 11:45:40 +00:00			`r'data-path="(.*?)"', webpage, 'video URL', default=None)`
fix for detecting youtube embedded videos. 2013-06-27 15:39:32 +00:00
[hotnewhiphop] Retrieve media key 2014-01-22 00:55:50 +00:00			`if video_url_base64 is None:`
			`video_url = self._search_regex(`
[hotnewhiphop] Modernize 2014-11-26 11:45:40 +00:00			`r'"contentUrl" content="(.*?)"', webpage, 'content URL')`
fix for detecting youtube embedded videos. 2013-06-27 15:39:32 +00:00			`return self.url_result(video_url, ie='Youtube')`
Added HotNewHipHop IE 2013-06-27 01:38:48 +00:00
[hotnewhiphop] Retrieve media key 2014-01-22 00:55:50 +00:00			`reqdata = compat_urllib_parse.urlencode([`
			`('mediaType', 's'),`
			`('mediaId', video_id),`
			`])`
			`r = compat_urllib_request.Request(`
			`'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)`
			`r.add_header('Content-Type', 'application/x-www-form-urlencoded')`
			`mkd = self._download_json(`
			`r, video_id, note='Requesting media key',`
			`errnote='Could not download media key')`
			`if 'mediaKey' not in mkd:`
			`raise ExtractorError('Did not get a media key')`

			`redirect_url = base64.b64decode(video_url_base64).decode('utf-8')`
			`redirect_req = HEADRequest(redirect_url)`
			`req = self._request_webpage(`
			`redirect_req, video_id,`
			`note='Resolving final URL', errnote='Could not resolve final URL')`
			`video_url = req.geturl()`
			`if video_url.endswith('.html'):`
			`raise ExtractorError('Redirect failed')`
Added HotNewHipHop IE 2013-06-27 01:38:48 +00:00
[hotnewhiphop] Modernize 2014-11-26 11:45:40 +00:00			`video_title = self._og_search_title(webpage).strip()`
Added HotNewHipHop IE 2013-06-27 01:38:48 +00:00
[hotnewhiphop] Retrieve media key 2014-01-22 00:55:50 +00:00			`return {`
			`'id': video_id,`
			`'url': video_url,`
			`'title': video_title,`
[hotnewhiphop] Modernize 2014-11-26 11:45:40 +00:00			`'thumbnail': self._og_search_thumbnail(webpage),`
[hotnewhiphop] Retrieve media key 2014-01-22 00:55:50 +00:00			`}`