[udemy] Switch to api 2.0 (Closes #9035)

This commit is contained in:
Sergey M․ 2016-03-31 22:05:25 +06:00
parent 5299bc3f91
commit 81da8cbc45
1 changed files with 36 additions and 24 deletions

View File

@ -17,6 +17,7 @@ from ..utils import (
int_or_none, int_or_none,
sanitized_Request, sanitized_Request,
unescapeHTML, unescapeHTML,
update_url_query,
urlencode_postdata, urlencode_postdata,
) )
@ -54,6 +55,16 @@ class UdemyIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _extract_course_info(self, webpage, video_id):
course = self._parse_json(
unescapeHTML(self._search_regex(
r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')),
video_id, fatal=False) or {}
course_id = course.get('id') or self._search_regex(
(r'"id"\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'),
webpage, 'course id')
return course_id, course.get('title')
def _enroll_course(self, base_url, webpage, course_id): def _enroll_course(self, base_url, webpage, course_id):
def combine_url(base_url, url): def combine_url(base_url, url):
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
@ -98,7 +109,7 @@ class UdemyIE(InfoExtractor):
error_str += ' - %s' % error_data.get('formErrors') error_str += ' - %s' % error_data.get('formErrors')
raise ExtractorError(error_str, expected=True) raise ExtractorError(error_str, expected=True)
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): def _download_json(self, url_or_request, *args, **kwargs):
headers = { headers = {
'X-Udemy-Snail-Case': 'true', 'X-Udemy-Snail-Case': 'true',
'X-Requested-With': 'XMLHttpRequest', 'X-Requested-With': 'XMLHttpRequest',
@ -116,7 +127,7 @@ class UdemyIE(InfoExtractor):
else: else:
url_or_request = sanitized_Request(url_or_request, headers=headers) url_or_request = sanitized_Request(url_or_request, headers=headers)
response = super(UdemyIE, self)._download_json(url_or_request, video_id, note) response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
self._handle_error(response) self._handle_error(response)
return response return response
@ -166,9 +177,7 @@ class UdemyIE(InfoExtractor):
webpage = self._download_webpage(url, lecture_id) webpage = self._download_webpage(url, lecture_id)
course_id = self._search_regex( course_id, _ = self._extract_course_info(webpage, lecture_id)
(r'data-course-id=["\'](\d+)', r'"id"\s*:\s*(\d+)'),
webpage, 'course id')
try: try:
lecture = self._download_lecture(course_id, lecture_id) lecture = self._download_lecture(course_id, lecture_id)
@ -309,29 +318,32 @@ class UdemyCourseIE(UdemyIE):
webpage = self._download_webpage(url, course_path) webpage = self._download_webpage(url, course_path)
response = self._download_json( course_id, title = self._extract_course_info(webpage, course_path)
'https://www.udemy.com/api-1.1/courses/%s' % course_path,
course_path, 'Downloading course JSON')
course_id = response['id']
course_title = response.get('title')
self._enroll_course(url, webpage, course_id) self._enroll_course(url, webpage, course_id)
course_url = update_url_query(
'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
{
'fields[chapter]': 'title,object_index',
'fields[lecture]': 'title',
'page_size': '1000',
})
response = self._download_json( response = self._download_json(
'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, course_url, course_id, 'Downloading course curriculum')
course_id, 'Downloading course curriculum')
entries = [] entries = []
chapter, chapter_number = None, None chapter, chapter_number = [None] * 2
for asset in response: for entry in response['results']:
asset_type = asset.get('assetType') or asset.get('asset_type') clazz = entry.get('_class')
if asset_type == 'Video': if clazz == 'lecture':
asset_id = asset.get('id') lecture_id = entry.get('id')
if asset_id: if lecture_id:
entry = { entry = {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, entry['id']),
'title': entry.get('title'),
'ie_key': UdemyIE.ie_key(), 'ie_key': UdemyIE.ie_key(),
} }
if chapter_number: if chapter_number:
@ -339,8 +351,8 @@ class UdemyCourseIE(UdemyIE):
if chapter: if chapter:
entry['chapter'] = chapter entry['chapter'] = chapter
entries.append(entry) entries.append(entry)
elif asset.get('type') == 'chapter': elif clazz == 'chapter':
chapter_number = asset.get('index') or asset.get('object_index') chapter_number = entry.get('object_index')
chapter = asset.get('title') chapter = entry.get('title')
return self.playlist_result(entries, course_id, course_title) return self.playlist_result(entries, course_id, title)