Add an extractor for 56.com (related #1039)

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-07-18 20:47:10 +02:00
parent 0932300e3a
commit e1f6e61e6a
2 changed files with 37 additions and 0 deletions

View File

@ -71,6 +71,7 @@ from .veoh import VeohIE
from .vevo import VevoIE
from .vimeo import VimeoIE
from .vine import VineIE
from .c56 import C56IE
from .wat import WatIE
from .wimp import WimpIE
from .worldstarhiphop import WorldStarHipHopIE

View File

@ -0,0 +1,36 @@
# coding: utf-8
import re
import json
from .common import InfoExtractor
from ..utils import determine_ext
class C56IE(InfoExtractor):
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
IE_NAME = u'56.com'
_TEST ={
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
u'file': u'93440716.mp4',
u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
u'info_dict': {
u'title': u'网事知多少 第32期车怒',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
text_id = mobj.group('textid')
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
text_id, u'Downloading video info')
info = json.loads(info_page)['info']
best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1]
video_url = best_format['url']
return {'id': info['vid'],
'title': info['Subject'],
'url': video_url,
'ext': determine_ext(video_url),
'thumbnail': info.get('bimg') or info.get('img'),
}