Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Allan Zhou 2013-08-26 15:16:13 -07:00
commit 99859d436c
10 changed files with 127 additions and 18 deletions

View File

@ -26,9 +26,9 @@ tests = [
# 85 # 85
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
# 84 # 84 - vflh9ybst 2013/08/23 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"), "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
# 83 # 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),

View File

@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE
from .generic import GenericIE from .generic import GenericIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .hark import HarkIE
from .hotnewhiphop import HotNewHipHopIE from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
@ -57,6 +58,7 @@ from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .sina import SinaIE from .sina import SinaIE
@ -116,12 +118,14 @@ _ALL_CLASSES = [
] ]
_ALL_CLASSES.append(GenericIE) _ALL_CLASSES.append(GenericIE)
def gen_extractors(): def gen_extractors():
""" Return a list of an instance of every supported extractor. """ Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL. The order does matter; the first extractor matched is the one handling the URL.
""" """
return [klass() for klass in _ALL_CLASSES] return [klass() for klass in _ALL_CLASSES]
def get_info_extractor(ie_name): def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name""" """Returns the info extractor class with the given ie_name"""
return globals()[ie_name+'IE'] return globals()[ie_name+'IE']

View File

@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
_TEST ={ _TEST ={
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html', u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
u'file': u'93440716.mp4', u'file': u'93440716.flv',
u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e', u'md5': u'e59995ac63d0457783ea05f93f12a866',
u'info_dict': { u'info_dict': {
u'title': u'网事知多少 第32期车怒', u'title': u'网事知多少 第32期车怒',
}, },

View File

@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
u'file': u'x33vw9.mp4', u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb', u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': { u'info_dict': {
u"uploader": u"Alex and Van .", u"uploader": u"Amphora Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
} }
} }

View File

@ -7,12 +7,14 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_error, compat_urllib_error,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
ExtractorError, ExtractorError,
) )
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
IE_DESC = u'Generic downloader that works on some sites' IE_DESC = u'Generic downloader that works on some sites'
_VALID_URL = r'.*' _VALID_URL = r'.*'
@ -124,7 +126,7 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
self.report_extraction(video_id) self.report_extraction(video_id)
# Look for BrigthCove: # Look for BrightCove:
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
if m_brightcove is not None: if m_brightcove is not None:
self.to_screen(u'Brightcove video detected.') self.to_screen(u'Brightcove video detected.')
@ -161,6 +163,10 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
video_url = compat_urllib_parse.unquote(mobj.group(1)) video_url = compat_urllib_parse.unquote(mobj.group(1))
if video_url.startswith('//'):
video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
if '://' not in video_url:
video_url = url + ('' if url.endswith('/') else '/') + video_url
video_id = os.path.basename(video_url) video_id = os.path.basename(video_url)
# here's a fun little line of code for you: # here's a fun little line of code for you:

View File

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
import re
from .common import InfoExtractor
from ..utils import determine_ext
class HarkIE(InfoExtractor):
_VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
_TEST = {
u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
u'file': u'mmbzyhkgny.mp3',
u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
u'info_dict': {
u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
webpage = self._download_webpage(embed_url, video_id)
final_url = self._search_regex(r'src="(.+?).mp3"',
webpage, 'video url')+'.mp3'
title = self._html_search_regex(r'<title>(.+?)</title>',
webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
'Sound Clip , Quote, MP3, and Ringtone - Hark','')
return {'id': video_id,
'url' : final_url,
'title': title,
'ext': determine_ext(final_url),
}

View File

@ -0,0 +1,42 @@
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
compat_parse_qs,
)
class Ro220IE(InfoExtractor):
IE_NAME = '220.ro'
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
_TEST = {
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
u'file': u'LYV6doKo7f.mp4',
u'md5': u'03af18b73a07b4088753930db7a34add',
u'info_dict': {
u"title": u"Luati-le Banii sez 4 ep 1",
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
flashVars_str = self._search_regex(
r'<param name="flashVars" value="([^"]+)"',
webpage, u'flashVars')
flashVars = compat_parse_qs(flashVars_str)
info = {
'_type': 'video',
'id': video_id,
'ext': 'mp4',
'url': flashVars['videoURL'][0],
'title': flashVars['title'][0],
'description': clean_html(flashVars['desc'][0]),
'thumbnail': flashVars['preview'][0],
}
return info

View File

@ -8,8 +8,8 @@ from ..utils import (
) )
class RTLnowIE(InfoExtractor): class RTLnowIE(InfoExtractor):
"""Information Extractor for RTLnow, RTL2now and VOXnow""" """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_TESTS = [{ _TESTS = [{
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
u'file': u'90419.flv', u'file': u'90419.flv',
@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
u'params': { u'params': {
u'skip_download': True, u'skip_download': True,
}, },
},
{
u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
u'file': u'99205.flv',
u'info_dict': {
u'upload_date': u'20080928',
u'title': u'Medicopter 117 - Angst!',
u'description': u'Angst!',
u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
},
u'params': {
u'skip_download': True,
},
}] }]
def _real_extract(self,url): def _real_extract(self,url):

View File

@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif len(s) == 85: elif len(s) == 85:
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
elif len(s) == 84: elif len(s) == 84:
return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
elif len(s) == 83: elif len(s) == 83:
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
elif len(s) == 82: elif len(s) == 82:

View File

@ -476,7 +476,7 @@ def formatSeconds(secs):
def make_HTTPS_handler(opts): def make_HTTPS_handler(opts):
if sys.version_info < (3,2): if sys.version_info < (3,2):
# Python's 2.x handler is very simplistic # Python's 2.x handler is very simplistic
return compat_urllib_request.HTTPSHandler() return YoutubeDLHandlerHTTPS()
else: else:
import ssl import ssl
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
@ -485,7 +485,7 @@ def make_HTTPS_handler(opts):
context.verify_mode = (ssl.CERT_NONE context.verify_mode = (ssl.CERT_NONE
if opts.no_check_certificate if opts.no_check_certificate
else ssl.CERT_REQUIRED) else ssl.CERT_REQUIRED)
return compat_urllib_request.HTTPSHandler(context=context) return YoutubeDLHandlerHTTPS(context=context)
class ExtractorError(Exception): class ExtractorError(Exception):
"""Error during info extraction.""" """Error during info extraction."""
@ -569,7 +569,8 @@ class ContentTooShortError(Exception):
self.downloaded = downloaded self.downloaded = downloaded
self.expected = expected self.expected = expected
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler
"""Handler for HTTP requests and responses. """Handler for HTTP requests and responses.
This class, when installed with an OpenerDirector, automatically adds This class, when installed with an OpenerDirector, automatically adds
@ -602,8 +603,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
ret.code = code ret.code = code
return ret return ret
def http_request(self, req): def _http_request(self, req):
for h,v in std_headers.items(): for h, v in std_headers.items():
if h in req.headers: if h in req.headers:
del req.headers[h] del req.headers[h]
req.add_header(h, v) req.add_header(h, v)
@ -618,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
del req.headers['Youtubedl-user-agent'] del req.headers['Youtubedl-user-agent']
return req return req
def http_response(self, req, resp): def _http_response(self, req, resp):
old_resp = resp old_resp = resp
# gzip # gzip
if resp.headers.get('Content-encoding', '') == 'gzip': if resp.headers.get('Content-encoding', '') == 'gzip':
@ -632,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
resp.msg = old_resp.msg resp.msg = old_resp.msg
return resp return resp
https_request = http_request
https_response = http_response class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
http_request = YoutubeDLHandler_Template._http_request
http_response = YoutubeDLHandler_Template._http_response
class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
https_request = YoutubeDLHandler_Template._http_request
https_response = YoutubeDLHandler_Template._http_response
def unified_strdate(date_str): def unified_strdate(date_str):
"""Return a string with the date in the format YYYYMMDD""" """Return a string with the date in the format YYYYMMDD"""