[stitcher] Add extractor

Stitcher review updates

Removed re import

Stitcher review updates
This commit is contained in:
mjdubell 2015-10-19 03:36:07 +02:00 committed by Sergey M․
parent d01949dc89
commit 4211c83aa4
2 changed files with 38 additions and 0 deletions

View File

@ -586,6 +586,7 @@ from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegel import SpiegelIE, SpiegelArticleIE
from .spiegeltv import SpiegeltvIE from .spiegeltv import SpiegeltvIE
from .spike import SpikeIE from .spike import SpikeIE
from .stitcher import StitcherIE
from .sport5 import Sport5IE from .sport5 import Sport5IE
from .sportbox import ( from .sportbox import (
SportBoxIE, SportBoxIE,

View File

@ -0,0 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
class StitcherIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/[\/a-z\-]+(?P<id>\d+)'
_TEST = {
'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
'md5': '391dd4e021e6edeb7b8e68fbf2e9e940',
'info_dict': {
'id': '40789481',
'ext': 'mp3',
'title': 'Machine Learning Mastery and Cancer Clusters from Talking Machines',
}
}
def _real_extract(self, url):
audio_id = self._match_id(url)
webpage = self._download_webpage(url, audio_id)
title = self._og_search_title(webpage)
url = self._search_regex(r'episodeURL: "(.+?)"', webpage, 'url')
episode_image = self._search_regex(r'episodeImage: "(.+?)"', webpage, 'episode_image', fatal=False)
duration = int_or_none(self._search_regex(r'duration: (\d+?),', webpage, 'duration', fatal=False))
return {
'id': audio_id,
'url': url,
'title': title,
'duration': duration,
'thumbnail': episode_image,
'ext': 'mp3',
'vcodec': 'none',
}