Merge pull request #187 from pukkandan/break-on-existing

Stop download after encountering video in archive
Merge pull request #181 from pukkandan/precheck-archive
2020-11-16 23:00:44 +01:00 · 2020-11-16 22:59:24 +01:00 · 2020-11-13 14:19:00 +05:30 · 2020-11-13 03:05:29 +05:30 · 2020-11-13 02:44:47 +05:30
4 changed files with 26 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -217,6 +217,8 @@ I will add some memorable short links to the binaries so you can download them e
    --download-archive FILE          Download only videos not listed in the
                                     archive file. Record the IDs of all
                                     downloaded videos in it.
+    --break-on-existing              Stop the download process after attempting
+                                     to download a file that's in the archive.
    --include-ads                    Download advertisements as well
                                     (experimental)

--- a/youtube_dlc/YoutubeDL.py
+++ b/youtube_dlc/YoutubeDL.py
@ -210,6 +210,8 @@ class YoutubeDL(object):
    download_archive:  File name of a file where all downloads are recorded.
                       Videos already present in the file are not downloaded
                       again.
+    break_on_existing: Stop the download process after attempting to download a file that's
+                       in the archive.
    cookiefile:        File name where cookies should be read from and dumped to.
    nocheckcertificate:Do not verify SSL certificates
    prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
@ -821,12 +823,22 @@ class YoutubeDL(object):
            if not ie.suitable(url):
                continue

-            ie = self.get_info_extractor(ie.ie_key())
+            ie_key = ie.ie_key()
+            ie = self.get_info_extractor(ie_key)
            if not ie.working():
                self.report_warning('The program functionality for this site has been marked as broken, '
                                    'and will probably not work.')

            try:
+                try:
+                    temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
+                except (AssertionError, IndexError):
+                    temp_id = None
+                if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
+                    self.to_screen("[%s] %s: has already been recorded in archive" % (
+                                   ie_key, temp_id))
+                    break
+
                ie_result = ie.extract(url)
                if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
                    break
@ -1038,8 +1050,12 @@ class YoutubeDL(object):

                reason = self._match_entry(entry, incomplete=True)
                if reason is not None:
-                    self.to_screen('[download] ' + reason)
-                    continue
+                    if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
+                        print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
+                        break
+                    else:
+                        self.to_screen('[download] ' + reason)
+                        continue

                entry_result = self.process_ie_result(entry,
                                                      download=download,
--- a/youtube_dlc/init.py
+++ b/youtube_dlc/init.py
@ -405,6 +405,7 @@ def _real_main(argv=None):
        'youtube_print_sig_code': opts.youtube_print_sig_code,
        'age_limit': opts.age_limit,
        'download_archive': download_archive_fn,
+        'break_on_existing': opts.break_on_existing,
        'cookiefile': opts.cookiefile,
        'nocheckcertificate': opts.no_check_certificate,
        'prefer_insecure': opts.prefer_insecure,
--- a/youtube_dlc/options.py
+++ b/youtube_dlc/options.py
@ -344,6 +344,10 @@ def parseOpts(overrideArguments=None):
        '--download-archive', metavar='FILE',
        dest='download_archive',
        help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
+    selection.add_option(
+        '--break-on-existing',
+        action='store_true', dest='break_on_existing', default=False,
+        help="Stop the download process after attempting to download a file that's in the archive.")
    selection.add_option(
        '--include-ads',
        dest='include_ads', action='store_true',
Author	SHA1	Message	Date
Tom-Oliver Heidel	228385340e	Merge pull request #187 from pukkandan/break-on-existing Stop download after encountering video in archive	2020-11-16 23:00:44 +01:00
Tom-Oliver Heidel	63dcccd07c	Merge pull request #181 from pukkandan/precheck-archive Check IDs in the archive before downloading webpage	2020-11-16 22:59:24 +01:00
pukkandan	ea6e0c2b0d	Add --break-on-existing by @gergesh Authored-by: Yoav Shai <gergesh@gmail.com>	2020-11-13 14:19:00 +05:30
pukkandan	fe5caa2a7c	Handle IndexError	2020-11-13 03:05:29 +05:30
pukkandan	9a68de1217	Pre-check video IDs in the archive before downloading	2020-11-13 02:44:47 +05:30