mirror of
https://github.com/blackjack4494/yt-dlc.git
synced 2024-12-25 17:27:36 +00:00
[youtube:user] Extract in terms of load_more_widget_html
This commit is contained in:
parent
ed553379df
commit
eb0f3e7ec0
1 changed files with 4 additions and 45 deletions
|
@ -1358,6 +1358,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||||
class YoutubeChannelIE(InfoExtractor):
|
class YoutubeChannelIE(InfoExtractor):
|
||||||
IE_DESC = 'YouTube.com channels'
|
IE_DESC = 'YouTube.com channels'
|
||||||
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
|
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
|
||||||
|
_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
|
||||||
IE_NAME = 'youtube:channel'
|
IE_NAME = 'youtube:channel'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'note': 'paginated channel',
|
'note': 'paginated channel',
|
||||||
|
@ -1386,7 +1387,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_id = self._match_id(url)
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
url = 'https://www.youtube.com/channel/%s/videos' % channel_id
|
url = self._TEMPLATE_URL % channel_id
|
||||||
channel_page = self._download_webpage(url, channel_id)
|
channel_page = self._download_webpage(url, channel_id)
|
||||||
autogenerated = re.search(r'''(?x)
|
autogenerated = re.search(r'''(?x)
|
||||||
class="[^"]*?(?:
|
class="[^"]*?(?:
|
||||||
|
@ -1429,12 +1430,10 @@ class YoutubeChannelIE(InfoExtractor):
|
||||||
return self.playlist_result(_entries(), channel_id)
|
return self.playlist_result(_entries(), channel_id)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeUserIE(InfoExtractor):
|
class YoutubeUserIE(YoutubeChannelIE):
|
||||||
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
||||||
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
|
_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
|
||||||
_GDATA_PAGE_SIZE = 50
|
|
||||||
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
|
||||||
IE_NAME = 'youtube:user'
|
IE_NAME = 'youtube:user'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -1458,46 +1457,6 @@ class YoutubeUserIE(InfoExtractor):
|
||||||
else:
|
else:
|
||||||
return super(YoutubeUserIE, cls).suitable(url)
|
return super(YoutubeUserIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
username = self._match_id(url)
|
|
||||||
|
|
||||||
# Download video ids using YouTube Data API. Result size per
|
|
||||||
# query is limited (currently to 50 videos) so we need to query
|
|
||||||
# page by page until there are no video ids - it means we got
|
|
||||||
# all of them.
|
|
||||||
|
|
||||||
def download_page(pagenum):
|
|
||||||
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
|
|
||||||
|
|
||||||
gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
|
|
||||||
page = self._download_webpage(
|
|
||||||
gdata_url, username,
|
|
||||||
'Downloading video ids from %d to %d' % (
|
|
||||||
start_index, start_index + self._GDATA_PAGE_SIZE))
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = json.loads(page)
|
|
||||||
except ValueError as err:
|
|
||||||
raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
|
|
||||||
if 'entry' not in response['feed']:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Extract video identifiers
|
|
||||||
entries = response['feed']['entry']
|
|
||||||
for entry in entries:
|
|
||||||
title = entry['title']['$t']
|
|
||||||
video_id = entry['id']['$t'].split('/')[-1]
|
|
||||||
yield {
|
|
||||||
'_type': 'url',
|
|
||||||
'url': video_id,
|
|
||||||
'ie_key': 'Youtube',
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
}
|
|
||||||
url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
|
|
||||||
|
|
||||||
return self.playlist_result(url_results, playlist_title=username)
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchIE(SearchInfoExtractor):
|
class YoutubeSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com searches'
|
IE_DESC = 'YouTube.com searches'
|
||||||
|
|
Loading…
Reference in a new issue