Compare commits

...

5 Commits

Author SHA1 Message Date
Tom-Oliver Heidel 228385340e
Merge pull request #187 from pukkandan/break-on-existing
Stop download after encountering video in archive
2020-11-16 23:00:44 +01:00
Tom-Oliver Heidel 63dcccd07c
Merge pull request #181 from pukkandan/precheck-archive
Check IDs in the archive before downloading webpage
2020-11-16 22:59:24 +01:00
pukkandan ea6e0c2b0d Add --break-on-existing by @gergesh
Authored-by: Yoav Shai <gergesh@gmail.com>
2020-11-13 14:19:00 +05:30
pukkandan fe5caa2a7c Handle IndexError 2020-11-13 03:05:29 +05:30
pukkandan 9a68de1217 Pre-check video IDs in the archive before downloading 2020-11-13 02:44:47 +05:30
4 changed files with 26 additions and 3 deletions

View File

@ -217,6 +217,8 @@ I will add some memorable short links to the binaries so you can download them e
--download-archive FILE Download only videos not listed in the --download-archive FILE Download only videos not listed in the
archive file. Record the IDs of all archive file. Record the IDs of all
downloaded videos in it. downloaded videos in it.
--break-on-existing Stop the download process after attempting
to download a file that's in the archive.
--include-ads Download advertisements as well --include-ads Download advertisements as well
(experimental) (experimental)

View File

@ -210,6 +210,8 @@ class YoutubeDL(object):
download_archive: File name of a file where all downloads are recorded. download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded Videos already present in the file are not downloaded
again. again.
break_on_existing: Stop the download process after attempting to download a file that's
in the archive.
cookiefile: File name where cookies should be read from and dumped to. cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information. prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
@ -821,12 +823,22 @@ class YoutubeDL(object):
if not ie.suitable(url): if not ie.suitable(url):
continue continue
ie = self.get_info_extractor(ie.ie_key()) ie_key = ie.ie_key()
ie = self.get_info_extractor(ie_key)
if not ie.working(): if not ie.working():
self.report_warning('The program functionality for this site has been marked as broken, ' self.report_warning('The program functionality for this site has been marked as broken, '
'and will probably not work.') 'and will probably not work.')
try: try:
try:
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
except (AssertionError, IndexError):
temp_id = None
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
self.to_screen("[%s] %s: has already been recorded in archive" % (
ie_key, temp_id))
break
ie_result = ie.extract(url) ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
break break
@ -1038,8 +1050,12 @@ class YoutubeDL(object):
reason = self._match_entry(entry, incomplete=True) reason = self._match_entry(entry, incomplete=True)
if reason is not None: if reason is not None:
self.to_screen('[download] ' + reason) if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
continue print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
break
else:
self.to_screen('[download] ' + reason)
continue
entry_result = self.process_ie_result(entry, entry_result = self.process_ie_result(entry,
download=download, download=download,

View File

@ -405,6 +405,7 @@ def _real_main(argv=None):
'youtube_print_sig_code': opts.youtube_print_sig_code, 'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit, 'age_limit': opts.age_limit,
'download_archive': download_archive_fn, 'download_archive': download_archive_fn,
'break_on_existing': opts.break_on_existing,
'cookiefile': opts.cookiefile, 'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate, 'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure, 'prefer_insecure': opts.prefer_insecure,

View File

@ -344,6 +344,10 @@ def parseOpts(overrideArguments=None):
'--download-archive', metavar='FILE', '--download-archive', metavar='FILE',
dest='download_archive', dest='download_archive',
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.') help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
selection.add_option(
'--break-on-existing',
action='store_true', dest='break_on_existing', default=False,
help="Stop the download process after attempting to download a file that's in the archive.")
selection.add_option( selection.add_option(
'--include-ads', '--include-ads',
dest='include_ads', action='store_true', dest='include_ads', action='store_true',