Browse Source

Merge branch 'master' into rcs

pull/144/head
Tom-Oliver Heidel 1 year ago
committed by GitHub
parent
commit
9e4043faa9
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
100 changed files with 4534 additions and 2370 deletions
  1. +5
    -5
      .github/ISSUE_TEMPLATE/1_broken_site.md
  2. +4
    -4
      .github/ISSUE_TEMPLATE/2_site_support_request.md
  3. +3
    -3
      .github/ISSUE_TEMPLATE/3_site_feature_request.md
  4. +6
    -6
      .github/ISSUE_TEMPLATE/4_bug_report.md
  5. +3
    -3
      .github/ISSUE_TEMPLATE/5_feature_request.md
  6. +3
    -3
      .github/ISSUE_TEMPLATE/6_question.md
  7. +3
    -3
      .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md
  8. +3
    -3
      .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md
  9. +2
    -2
      .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md
  10. +4
    -4
      .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md
  11. +2
    -2
      .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md
  12. +27
    -15
      .github/workflows/build.yml
  13. +8
    -2
      README.md
  14. +1
    -1
      devscripts/make_lazy_extractors.py
  15. +25
    -15
      docs/supportedsites.md
  16. +1
    -1
      make_win.bat
  17. +1
    -1
      setup.py
  18. +1
    -1
      test/parameters.json
  19. +70
    -0
      test/test_YoutubeDL.py
  20. +21
    -19
      test/test_all_urls.py
  21. +28
    -0
      test/test_utils.py
  22. +64
    -29
      youtube_dlc/YoutubeDL.py
  23. +1
    -0
      youtube_dlc/__init__.py
  24. +1
    -1
      youtube_dlc/compat.py
  25. +20
    -11
      youtube_dlc/downloader/common.py
  26. +18
    -11
      youtube_dlc/downloader/external.py
  27. +12
    -2
      youtube_dlc/downloader/fragment.py
  28. +3
    -1
      youtube_dlc/downloader/http.py
  29. +4
    -1
      youtube_dlc/downloader/youtube_live_chat.py
  30. +7
    -0
      youtube_dlc/extractor/adobepass.py
  31. +1
    -1
      youtube_dlc/extractor/afreecatv.py
  32. +103
    -0
      youtube_dlc/extractor/amara.py
  33. +110
    -57
      youtube_dlc/extractor/arte.py
  34. +111
    -157
      youtube_dlc/extractor/bandcamp.py
  35. +53
    -4
      youtube_dlc/extractor/bbc.py
  36. +8
    -0
      youtube_dlc/extractor/bitchute.py
  37. +61
    -0
      youtube_dlc/extractor/bitwave.py
  38. +98
    -0
      youtube_dlc/extractor/box.py
  39. +15
    -6
      youtube_dlc/extractor/brightcove.py
  40. +32
    -3
      youtube_dlc/extractor/cda.py
  41. +12
    -7
      youtube_dlc/extractor/cnbc.py
  42. +31
    -3
      youtube_dlc/extractor/common.py
  43. +23
    -4
      youtube_dlc/extractor/condenast.py
  44. +4
    -1
      youtube_dlc/extractor/discoverynetworks.py
  45. +2
    -2
      youtube_dlc/extractor/europa.py
  46. +32
    -9
      youtube_dlc/extractor/extractors.py
  47. +3
    -0
      youtube_dlc/extractor/franceinter.py
  48. +36
    -11
      youtube_dlc/extractor/francetv.py
  49. +11
    -6
      youtube_dlc/extractor/generic.py
  50. +21
    -37
      youtube_dlc/extractor/googledrive.py
  51. +4
    -1
      youtube_dlc/extractor/ina.py
  52. +4
    -3
      youtube_dlc/extractor/infoq.py
  53. +1
    -1
      youtube_dlc/extractor/iqiyi.py
  54. +2
    -2
      youtube_dlc/extractor/kusi.py
  55. +3
    -0
      youtube_dlc/extractor/la7.py
  56. +91
    -0
      youtube_dlc/extractor/lbry.py
  57. +36
    -55
      youtube_dlc/extractor/lrt.py
  58. +11
    -2
      youtube_dlc/extractor/mailru.py
  59. +46
    -14
      youtube_dlc/extractor/malltv.py
  60. +131
    -0
      youtube_dlc/extractor/medaltv.py
  61. +7
    -3
      youtube_dlc/extractor/mgtv.py
  62. +16
    -3
      youtube_dlc/extractor/mtv.py
  63. +2
    -3
      youtube_dlc/extractor/nbc.py
  64. +38
    -0
      youtube_dlc/extractor/ndr.py
  65. +24
    -23
      youtube_dlc/extractor/netzkino.py
  66. +71
    -36
      youtube_dlc/extractor/newgrounds.py
  67. +167
    -0
      youtube_dlc/extractor/nitter.py
  68. +1
    -1
      youtube_dlc/extractor/npr.py
  69. +248
    -176
      youtube_dlc/extractor/nrk.py
  70. +38
    -0
      youtube_dlc/extractor/nytimes.py
  71. +1
    -1
      youtube_dlc/extractor/pbs.py
  72. +201
    -0
      youtube_dlc/extractor/pinterest.py
  73. +64
    -81
      youtube_dlc/extractor/rai.py
  74. +67
    -0
      youtube_dlc/extractor/rumble.py
  75. +95
    -16
      youtube_dlc/extractor/servus.py
  76. +123
    -0
      youtube_dlc/extractor/skyitalia.py
  77. +1
    -1
      youtube_dlc/extractor/soundcloud.py
  78. +1
    -1
      youtube_dlc/extractor/southpark.py
  79. +24
    -129
      youtube_dlc/extractor/spiegel.py
  80. +176
    -0
      youtube_dlc/extractor/spreaker.py
  81. +37
    -11
      youtube_dlc/extractor/svt.py
  82. +1
    -1
      youtube_dlc/extractor/tagesschau.py
  83. +1
    -1
      youtube_dlc/extractor/theplatform.py
  84. +97
    -0
      youtube_dlc/extractor/thisvid.py
  85. +3
    -3
      youtube_dlc/extractor/turner.py
  86. +7
    -4
      youtube_dlc/extractor/twentythreevideo.py
  87. +51
    -26
      youtube_dlc/extractor/urplay.py
  88. +16
    -66
      youtube_dlc/extractor/usanetwork.py
  89. +5
    -2
      youtube_dlc/extractor/ustream.py
  90. +117
    -88
      youtube_dlc/extractor/viki.py
  91. +9
    -6
      youtube_dlc/extractor/vimeo.py
  92. +216
    -235
      youtube_dlc/extractor/vlive.py
  93. +4
    -4
      youtube_dlc/extractor/xiami.py
  94. +1
    -17
      youtube_dlc/extractor/xtube.py
  95. +3
    -4
      youtube_dlc/extractor/youporn.py
  96. +1046
    -880
      youtube_dlc/extractor/youtube.py
  97. +82
    -0
      youtube_dlc/extractor/zoom.py
  98. +5
    -1
      youtube_dlc/options.py
  99. +7
    -4
      youtube_dlc/postprocessor/embedthumbnail.py
  100. +14
    -8
      youtube_dlc/postprocessor/ffmpeg.py

+ 5
- 5
.github/ISSUE_TEMPLATE/1_broken_site.md View File

@@ -21,15 +21,15 @@ assignees: ''

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.31. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/blackjack4494/yt-dlc.
- Search the bugtracker for similar issues: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->

- [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dlc version **2020.10.26**
- [ ] I've verified that I'm running youtube-dlc version **2020.10.31**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones
@@ -44,7 +44,7 @@ Add the `-v` flag to your command line you run youtube-dlc with (`youtube-dlc -v
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dlc version 2020.10.26
[debug] youtube-dlc version 2020.10.31
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}


+ 4
- 4
.github/ISSUE_TEMPLATE/2_site_support_request.md View File

@@ -21,15 +21,15 @@ assignees: ''

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.31. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dlc does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/blackjack4494/yt-dlc. youtube-dlc does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->

- [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dlcc version **2020.10.26**
- [ ] I've verified that I'm running youtube-dlcc version **2020.10.31**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones


+ 3
- 3
.github/ISSUE_TEMPLATE/3_site_feature_request.md View File

@@ -21,13 +21,13 @@ assignees: ''

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.31. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->

- [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dlc version **2020.10.26**
- [ ] I've verified that I'm running youtube-dlc version **2020.10.31**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones




+ 6
- 6
.github/ISSUE_TEMPLATE/4_bug_report.md View File

@@ -21,16 +21,16 @@ assignees: ''

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.31. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Read bugs section in FAQ: http://yt-dl.org/reporting
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/blackjack4494/yt-dlc.
- Search the bugtracker for similar issues: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
- Read bugs section in FAQ: https://github.com/blackjack4494/yt-dlc
- Finally, put x into all relevant boxes (like this [x])
-->

- [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dlc version **2020.10.26**
- [ ] I've verified that I'm running youtube-dlc version **2020.10.31**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones
@@ -46,7 +46,7 @@ Add the `-v` flag to your command line you run youtube-dlc with (`youtube-dlc -v
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dlc version 2020.10.26
[debug] youtube-dlc version 2020.10.31
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}


+ 3
- 3
.github/ISSUE_TEMPLATE/5_feature_request.md View File

@@ -21,13 +21,13 @@ assignees: ''

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.31. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->

- [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dlc version **2020.10.26**
- [ ] I've verified that I'm running youtube-dlc version **2020.10.31**
- [ ] I've searched the bugtracker for similar feature requests including closed ones




+ 3
- 3
.github/ISSUE_TEMPLATE/6_question.md View File

@@ -21,8 +21,8 @@ assignees: ''

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- Look through the README (http://yt-dl.org/readme) and FAQ (http://yt-dl.org/faq) for similar questions
- Search the bugtracker for similar questions: http://yt-dl.org/search-issues
- Look through the README (https://github.com/blackjack4494/yt-dlc) and FAQ (https://github.com/blackjack4494/yt-dlc) for similar questions
- Search the bugtracker for similar questions: https://github.com/blackjack4494/yt-dlc
- Finally, put x into all relevant boxes (like this [x])
-->

@@ -34,7 +34,7 @@ Carefully read and work through this check list in order to prevent the most com
## Question

<!--
Ask your question in an arbitrary form. Please make sure it's worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient.
Ask your question in an arbitrary form. Please make sure it's worded well enough to be understood, see https://github.com/blackjack4494/yt-dlc.
-->

WRITE QUESTION HERE

+ 3
- 3
.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md View File

@@ -18,10 +18,10 @@ title: ''

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/blackjack4494/yt-dlc.
- Search the bugtracker for similar issues: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->



+ 3
- 3
.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md View File

@@ -19,10 +19,10 @@ labels: 'site-support-request'

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dlc does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/blackjack4494/yt-dlc. youtube-dlc does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->



+ 2
- 2
.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md View File

@@ -18,8 +18,8 @@ title: ''

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->



+ 4
- 4
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md View File

@@ -18,11 +18,11 @@ title: ''

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Read bugs section in FAQ: http://yt-dl.org/reporting
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/blackjack4494/yt-dlc.
- Search the bugtracker for similar issues: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
- Read bugs section in FAQ: https://github.com/blackjack4494/yt-dlc
- Finally, put x into all relevant boxes (like this [x])
-->



+ 2
- 2
.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md View File

@@ -19,8 +19,8 @@ labels: 'request'

<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->



+ 27
- 15
.github/workflows/build.yml View File

@@ -20,7 +20,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
python-version: '3.8'
- name: Install packages
run: sudo apt-get -y install zip pandoc man
- name: Bump version
@@ -57,7 +57,7 @@ jobs:
id: sha2_file
env:
SHA2: ${{ hashFiles('youtube-dlc') }}
run: echo "::set-output name=sha2_unix::${env:SHA2}"
run: echo "::set-output name=sha2_unix::$SHA2"
- name: Install dependencies for pypi
run: |
python -m pip install --upgrade pip
@@ -82,7 +82,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
python-version: '3.8'
- name: Install Requirements
run: pip install pyinstaller
- name: Bump version
@@ -98,25 +98,25 @@ jobs:
upload_url: ${{ needs.build_unix.outputs.upload_url }}
asset_path: ./dist/youtube-dlc.exe
asset_name: youtube-dlc.exe
asset_content_type: application/octet-stream
asset_content_type: application/vnd.microsoft.portable-executable
- name: Get SHA2-256SUMS for youtube-dlc.exe
id: sha2_file_win
env:
SHA2: ${{ hashFiles('dist/youtube-dlc.exe') }}
run: echo "::set-output name=sha2_windows::${env:SHA2}"
SHA2_win: ${{ hashFiles('dist/youtube-dlc.exe') }}
run: echo "::set-output name=sha2_windows::$SHA2_win"

build_windows32:

runs-on: windows-latest

needs: build_unix
needs: [build_unix, build_windows]

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.5.4 32-Bit
- name: Set up Python 3.4.4 32-Bit
uses: actions/setup-python@v2
with:
python-version: '3.5.4'
python-version: '3.4.4'
architecture: 'x86'
- name: Install Requirements for 32 Bit
run: pip install pyinstaller==3.5
@@ -133,12 +133,12 @@ jobs:
upload_url: ${{ needs.build_unix.outputs.upload_url }}
asset_path: ./dist/youtube-dlc_x86.exe
asset_name: youtube-dlc_x86.exe
asset_content_type: application/octet-stream
asset_content_type: application/vnd.microsoft.portable-executable
- name: Get SHA2-256SUMS for youtube-dlc_x86.exe
id: sha2_file_win32
env:
SHA2: ${{ hashFiles('dist/youtube-dlc_x86.exe') }}
run: echo "::set-output name=sha2_windows32::${env:SHA2}"
SHA2_win32: ${{ hashFiles('dist/youtube-dlc_x86.exe') }}
run: echo "::set-output name=sha2_windows32::$SHA2_win32"
- name: Make SHA2-256SUMS file
env:
SHA2_WINDOWS: ${{ needs.build_windows.outputs.sha2_windows }}
@@ -146,6 +146,18 @@ jobs:
SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }}
YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }}
run: |
echo "$SHA2_WINDOWS youtube-dlc.exe" > SHA2-256SUMS
echo "$SHA2_WINDOWS32 youtube-dlc32.exe" > SHA2-256SUMS
echo "$SHA2_UNIX youtube-dlc" >> SHA2-256SUMS
echo "version:${env:YTDLC_VERSION}" >> SHA2-256SUMS
echo "youtube-dlc.exe:${env:SHA2_WINDOWS}" >> SHA2-256SUMS
echo "youtube-dlc_x86.exe:${env:SHA2_WINDOWS32}" >> SHA2-256SUMS
echo "youtube-dlc:${env:SHA2_UNIX}" >> SHA2-256SUMS

- name: Upload 256SUMS file
id: upload-sums
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.build_unix.outputs.upload_url }}
asset_path: ./SHA2-256SUMS
asset_name: SHA2-256SUMS
asset_content_type: text/plain

+ 8
- 2
README.md View File

@@ -1,15 +1,15 @@
[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc)
[![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc)
[![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc)

[![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/youtube-dlc/blob/master/LICENSE)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/yt-dlc/blob/master/LICENSE)

youtube-dlc - download videos from youtube.com or other video platforms.

youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462)

- [INSTALLATION](#installation)
- [UPDATE](#update)
- [DESCRIPTION](#description)
- [OPTIONS](#options)
- [Network Options:](#network-options)
@@ -44,6 +44,10 @@ You may want to use `python3` instead of `python`

python -m pip install --upgrade youtube-dlc

If you want to install the current master branch

python -m pip install git+https://github.com/blackjack4494/yt-dlc

**UNIX** (Linux, macOS, etc.)
Using wget:

@@ -213,6 +217,8 @@ I will add some memorable short links to the binaries so you can download them e
--download-archive FILE Download only videos not listed in the
archive file. Record the IDs of all
downloaded videos in it.
--break-on-existing Stop the download process after attempting
to download a file that's in the archive.
--include-ads Download advertisements as well
(experimental)



+ 1
- 1
devscripts/make_lazy_extractors.py View File

@@ -61,7 +61,7 @@ def build_lazy_ie(ie, name):
return s


# find the correct sorting and add the required base classes so that sublcasses
# find the correct sorting and add the required base classes so that subclasses
# can be correctly created
classes = _ALL_CLASSES[:-1]
ordered_cls = []


+ 25
- 15
docs/supportedsites.md View File

@@ -59,9 +59,9 @@
- **ARD:mediathek**
- **ARDBetaMediathek**
- **Arkena**
- **arte.tv:+7**
- **arte.tv:embed**
- **arte.tv:playlist**
- **ArteTV**
- **ArteTVEmbed**
- **ArteTVPlaylist**
- **AsianCrush**
- **AsianCrushPlaylist**
- **AtresPlayer**
@@ -104,12 +104,14 @@
- **BIQLE**
- **BitChute**
- **BitChuteChannel**
- **bitwave.tv**
- **BleacherReport**
- **BleacherReportCMS**
- **blinkx**
- **Bloomberg**
- **BokeCC**
- **BostonGlobe**
- **Box**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk
- **BravoTV**
@@ -157,6 +159,7 @@
- **Chilloutzone**
- **chirbit**
- **chirbit:profile**
- **cielotv.it**
- **Cinchcast**
- **Cinemax**
- **CiscoLiveSearch**
@@ -424,6 +427,7 @@
- **la7.it**
- **laola1tv**
- **laola1tv:embed**
- **lbry.tv**
- **LCI**
- **Lcp**
- **LcpPlay**
@@ -474,6 +478,7 @@
- **massengeschmack.tv**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **MedalTV**
- **media.ccc.de**
- **media.ccc.de:lists**
- **Medialaan**
@@ -582,6 +587,7 @@
- **niconico**: ニコニコ動画
- **NiconicoPlaylist**
- **Nintendo**
- **Nitter**
- **njoy**: N-JOY
- **njoy:embed**
- **NJPWWorld**: 新日本プロレスワールド
@@ -616,6 +622,7 @@
- **Nuvid**
- **NYTimes**
- **NYTimesArticle**
- **NYTimesCooking**
- **NZZ**
- **ocw.mit.edu**
- **OdaTV**
@@ -668,6 +675,8 @@
- **PicartoVod**
- **Piksel**
- **Pinkbike**
- **Pinterest**
- **PinterestCollection**
- **Pladform**
- **Platzi**
- **PlatziCourse**
@@ -764,6 +773,7 @@
- **RTVNH**
- **RTVS**
- **RUHD**
- **RumbleEmbed**
- **rutube**: Rutube videos
- **rutube:channel**: Rutube channels
- **rutube:embed**: Rutube embedded videos
@@ -834,12 +844,14 @@
- **SpankBangPlaylist**
- **Spankwire**
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Spiegeltv**
- **sport.francetvinfo.fr**
- **Sport5**
- **SportBox**
- **SportDeutschland**
- **Spreaker**
- **SpreakerPage**
- **SpreakerShow**
- **SpreakerShowPage**
- **SpringboardPlatform**
- **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk
@@ -943,6 +955,7 @@
- **TV2DKBornholmPlay**
- **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **tv8.it**
- **TVA**
- **TVANouvelles**
- **TVANouvellesArticle**
@@ -1057,7 +1070,7 @@
- **vk:wallpost**
- **vlive**
- **vlive:channel**
- **vlive:playlist**
- **vlive:post**
- **Vodlocker**
- **VODPl**
- **VODPlatform**
@@ -1146,20 +1159,17 @@
- **YourPorn**
- **YourUpload**
- **youtube**: YouTube.com
- **youtube:channel**: YouTube.com channels
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- **youtube:favorites**: YouTube.com liked videos, ":ytfav" for short (requires authentication)
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- **youtube:live**: YouTube.com live streams
- **youtube:playlist**: YouTube.com playlists
- **youtube:playlists**: YouTube.com user/channel playlists
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search**: YouTube.com searches, "ytsearch" keyword
- **youtube:search:date**: YouTube.com searches, newest videos first, "ytsearchdate" keyword
- **youtube:search_url**: YouTube.com search URLs
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:subscriptions**: YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)
- **youtube:tab**: YouTube.com tab
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **YoutubeYtUser**: YouTube.com user videos, URL or "ytuser" keyword
- **Zapiks**
- **Zaq1**
- **Zattoo**


+ 1
- 1
make_win.bat View File

@@ -1 +1 @@
py -m PyInstaller youtube_dlc\__main__.py --onefile --name youtube-dlc --version-file win\ver.txt --icon win\icon\cloud.ico
py -m PyInstaller youtube_dlc\__main__.py --onefile --name youtube-dlc --version-file win\ver.txt --icon win\icon\cloud.ico --upx-exclude=vcruntime140.dll

+ 1
- 1
setup.py View File

@@ -66,7 +66,7 @@ setup(
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
# long_description_content_type="text/markdown",
url="https://github.com/blackjack4494/youtube-dlc",
url="https://github.com/blackjack4494/yt-dlc",
packages=find_packages(exclude=("youtube_dl","test",)),
#packages=[
# 'youtube_dlc',


+ 1
- 1
test/parameters.json View File

@@ -37,7 +37,7 @@
"writeinfojson": true,
"writesubtitles": false,
"allsubtitles": false,
"listssubtitles": false,
"listsubtitles": false,
"socket_timeout": 20,
"fixup": "never"
}

+ 70
- 0
test/test_YoutubeDL.py View File

@@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(downloaded['extractor'], 'testex')
self.assertEqual(downloaded['extractor_key'], 'TestEx')

# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):

class _YDL(YDL):
def __init__(self, *args, **kwargs):
super(_YDL, self).__init__(*args, **kwargs)

def trouble(self, s, tb=None):
pass

ydl = _YDL({
'format': 'extra',
'ignoreerrors': True,
})

class VideoIE(InfoExtractor):
_VALID_URL = r'video:(?P<id>\d+)'

def _real_extract(self, url):
video_id = self._match_id(url)
formats = [{
'format_id': 'default',
'url': 'url:',
}]
if video_id == '0':
raise ExtractorError('foo')
if video_id == '2':
formats.append({
'format_id': 'extra',
'url': TEST_URL,
})
return {
'id': video_id,
'title': 'Video %s' % video_id,
'formats': formats,
}

class PlaylistIE(InfoExtractor):
_VALID_URL = r'playlist:'

def _entries(self):
for n in range(3):
video_id = compat_str(n)
yield {
'_type': 'url_transparent',
'ie_key': VideoIE.ie_key(),
'id': video_id,
'url': 'video:%s' % video_id,
'title': 'Video Transparent %s' % video_id,
}

def _real_extract(self, url):
return self.playlist_result(self._entries())

ydl.add_info_extractor(VideoIE(ydl))
ydl.add_info_extractor(PlaylistIE(ydl))
info = ydl.extract_info('playlist:')
entries = info['entries']
self.assertEqual(len(entries), 3)
self.assertTrue(entries[0] is None)
self.assertTrue(entries[1] is None)
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(entries[2], downloaded)
self.assertEqual(downloaded['url'], TEST_URL)
self.assertEqual(downloaded['title'], 'Video Transparent 2')
self.assertEqual(downloaded['id'], '2')
self.assertEqual(downloaded['extractor'], 'Video')
self.assertEqual(downloaded['extractor_key'], 'Video')


if __name__ == '__main__':
unittest.main()

+ 21
- 19
test/test_all_urls.py View File

@@ -31,45 +31,47 @@ class TestAllURLsMatching(unittest.TestCase):

def test_youtube_playlist_matching(self):
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist('PL63F0C78739B09958')
assertTab('https://www.youtube.com/AsapSCIENCE')
assertTab('https://www.youtube.com/embedded')
assertTab('https://www.youtube.com/feed') # Own channel's home page
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
# Top tracks
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')

def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
# self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) # /v/ is no longer valid
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])

def test_youtube_channel_matching(self):
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')

def test_youtube_user_matching(self):
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
# def test_youtube_user_matching(self):
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])

def test_youtube_feeds(self):
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])

def test_youtube_show_matching(self):
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])

def test_youtube_search_matching(self):
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])

# def test_youtube_search_matching(self):
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])

def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)


+ 28
- 0
test/test_utils.py View File

@@ -937,6 +937,28 @@ class TestUtil(unittest.TestCase):
self.assertEqual(d['x'], 1)
self.assertEqual(d['y'], 'a')

# Just drop ! prefix for now though this results in a wrong value
on = js_to_json('''{
a: !0,
b: !1,
c: !!0,
d: !!42.42,
e: !!![],
f: !"abc",
g: !"",
!42: 42
}''')
self.assertEqual(json.loads(on), {
'a': 0,
'b': 1,
'c': 0,
'd': 42.42,
'e': [],
'f': "abc",
'g': "",
'42': 42
})

on = js_to_json('["abc", "def",]')
self.assertEqual(json.loads(on), ['abc', 'def'])

@@ -994,6 +1016,12 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{42:4.2e1}')
self.assertEqual(json.loads(on), {'42': 42.0})

on = js_to_json('{ "0x40": "0x40" }')
self.assertEqual(json.loads(on), {'0x40': '0x40'})

on = js_to_json('{ "040": "040" }')
self.assertEqual(json.loads(on), {'040': '040'})

def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')


+ 64
- 29
youtube_dlc/YoutubeDL.py View File

@@ -210,6 +210,8 @@ class YoutubeDL(object):
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
break_on_existing: Stop the download process after attempting to download a file that's
in the archive.
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
@@ -801,7 +803,7 @@ class YoutubeDL(object):
for key, value in extra_info.items():
info_dict.setdefault(key, value)

def extract_info(self, url, download=True, ie_key=None, extra_info={},
def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
process=True, force_generic_extractor=False):
'''
Returns a list with a dictionary for each video we find.
@@ -821,26 +823,30 @@ class YoutubeDL(object):
if not ie.suitable(url):
continue

ie = self.get_info_extractor(ie.ie_key())
ie_key = ie.ie_key()
ie = self.get_info_extractor(ie_key)
if not ie.working():
self.report_warning('The program functionality for this site has been marked as broken, '
'and will probably not work.')

try:
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
break
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
except (AssertionError, IndexError, AttributeError):
temp_id = None
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
self.to_screen("[%s] %s: has already been recorded in archive" % (
ie_key, temp_id))
break

return self.__extract_info(url, ie, download, extra_info, process, info_dict)

else:
self.report_error('no suitable InfoExtractor for URL %s' % url)

def __handle_extraction_exceptions(func):
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except GeoRestrictedError as e:
msg = e.msg
if e.countries:
@@ -848,20 +854,38 @@ class YoutubeDL(object):
map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
break
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
break
except MaxDownloadsReached:
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
break
else:
raise
return wrapper

@__handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process, info_dict):
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
return
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
if info_dict:
if info_dict.get('id'):
ie_result['id'] = info_dict['id']
if info_dict.get('title'):
ie_result['title'] = info_dict['title']
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
return ie_result

def add_default_extra_info(self, ie_result, ie, url):
self.add_extra_info(ie_result, {
@@ -898,7 +922,7 @@ class YoutubeDL(object):
# We have to add extra_info to the results because it may be
# contained in a playlist
return self.extract_info(ie_result['url'],
download,
download, info_dict=ie_result,
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'url_transparent':
@@ -1033,12 +1057,15 @@ class YoutubeDL(object):

reason = self._match_entry(entry, incomplete=True)
if reason is not None:
self.to_screen('[download] ' + reason)
continue
if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
break
else:
self.to_screen('[download] ' + reason)
continue

entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
entry_result = self.__process_iterable_entry(entry, download, extra)
# TODO: skip failed (empty) entries?
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
@@ -1067,6 +1094,11 @@ class YoutubeDL(object):
else:
raise Exception('Invalid result type: %s' % result_type)

@__handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)

def _build_format_filter(self, filter_spec):
" Returns a function to filter the formats according to the filter_spec "

@@ -1852,13 +1884,13 @@ class YoutubeDL(object):
self.report_error('Cannot write annotations file: ' + annofn)
return

def dl(name, info):
def dl(name, info, subtitle=False):
fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)
return fd.download(name, info, subtitle)

subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
@@ -1867,7 +1899,7 @@ class YoutubeDL(object):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
ie = self.get_info_extractor(info_dict['extractor_key'])
# ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
@@ -1886,6 +1918,8 @@ class YoutubeDL(object):
return
else:
try:
dl(sub_filename, sub_info, subtitle=True)
'''
if self.params.get('sleep_interval_subtitles', False):
dl(sub_filename, sub_info)
else:
@@ -1893,6 +1927,7 @@ class YoutubeDL(object):
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
'''
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))


+ 1
- 0
youtube_dlc/__init__.py View File

@@ -405,6 +405,7 @@ def _real_main(argv=None):
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
'download_archive': download_archive_fn,
'break_on_existing': opts.break_on_existing,
'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure,


+ 1
- 1
youtube_dlc/compat.py View File

@@ -2345,7 +2345,7 @@ except ImportError: # Python <3.4

# HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exceptiong handling
# and uniform cross-version exception handling
class compat_HTMLParseError(Exception):
pass



+ 20
- 11
youtube_dlc/downloader/common.py View File

@@ -326,7 +326,7 @@ class FileDownloader(object):
"""Report it was impossible to resume download."""
self.to_screen('[download] Unable to resume')

def download(self, filename, info_dict):
def download(self, filename, info_dict, subtitle=False):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
@@ -353,16 +353,25 @@ class FileDownloader(object):
})
return True

min_sleep_interval = self.params.get('sleep_interval')
if min_sleep_interval:
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
self.to_screen(
'[download] Sleeping %s seconds...' % (
int(sleep_interval) if sleep_interval.is_integer()
else '%.2f' % sleep_interval))
time.sleep(sleep_interval)

if subtitle is False:
min_sleep_interval = self.params.get('sleep_interval')
if min_sleep_interval:
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
self.to_screen(
'[download] Sleeping %s seconds...' % (
int(sleep_interval) if sleep_interval.is_integer()
else '%.2f' % sleep_interval))
time.sleep(sleep_interval)
else:
sleep_interval_sub = 0
if type(self.params.get('sleep_interval_subtitles')) is int:
sleep_interval_sub = self.params.get('sleep_interval_subtitles')
if sleep_interval_sub > 0:
self.to_screen(
'[download] Sleeping %s seconds...' % (
sleep_interval_sub))
time.sleep(sleep_interval_sub)
return self.real_download(filename, info_dict)

def real_download(self, filename, info_dict):


+ 18
- 11
youtube_dlc/downloader/external.py View File

@@ -115,8 +115,10 @@ class CurlFD(ExternalFD):

def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename]
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]

cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress')
cmd += self._valueless_option('--verbose', 'verbose')
@@ -150,8 +152,9 @@ class AxelFD(ExternalFD):

def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename]
for key, val in info_dict['http_headers'].items():
cmd += ['-H', '%s: %s' % (key, val)]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['-H', '%s: %s' % (key, val)]
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -162,8 +165,9 @@ class WgetFD(ExternalFD):

def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries')
if len(retry) == 2:
@@ -189,8 +193,9 @@ class Aria2cFD(ExternalFD):
if dn:
cmd += ['--dir', dn]
cmd += ['--out', os.path.basename(tmpfilename)]
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
@@ -206,8 +211,10 @@ class HttpieFD(ExternalFD):

def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
for key, val in info_dict['http_headers'].items():
cmd += ['%s:%s' % (key, val)]

if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['%s:%s' % (key, val)]
return cmd


@@ -253,7 +260,7 @@ class FFmpegFD(ExternalFD):
# if end_time:
# args += ['-t', compat_str(end_time - start_time)]

if info_dict['http_headers'] and re.match(r'^https?://', url):
if info_dict.get('http_headers') is not None and re.match(r'^https?://', url):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers'])


+ 12
- 2
youtube_dlc/downloader/fragment.py View File

@@ -97,12 +97,15 @@ class FragmentFD(FileDownloader):

def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
success = ctx['dl'].download(fragment_filename, {
fragment_info_dict = {
'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'),
})
}
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False, None
if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
@@ -258,6 +261,13 @@ class FragmentFD(FileDownloader):
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
else:
self.try_rename(ctx['tmpfilename'], ctx['filename'])
if self.params.get('updatetime', True):
filetime = ctx.get('fragment_filetime')
if filetime:
try:
os.utime(ctx['filename'], (time.time(), filetime))
except Exception:
pass
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))

self._hook_progress({


+ 3
- 1
youtube_dlc/downloader/http.py View File

@@ -109,7 +109,9 @@ class HttpFD(FileDownloader):
try:
ctx.data = self.ydl.urlopen(request)
except (compat_urllib_error.URLError, ) as err:
if isinstance(err.reason, socket.timeout):
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
reason = getattr(err, 'reason', None)
if isinstance(reason, socket.timeout):
raise RetryDownload(err)
raise err
# When trying to resume, Content-Range HTTP header of response has to be checked


+ 4
- 1
youtube_dlc/downloader/youtube_live_chat.py View File

@@ -82,7 +82,10 @@ class YoutubeLiveChatReplayFD(FragmentFD):
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
try:
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
except KeyError:
continuation_id = None

self._append_fragment(ctx, processed_fragment)



+ 7
- 0
youtube_dlc/extractor/adobepass.py View File

@@ -1438,6 +1438,13 @@ class AdobePassIE(InfoExtractor):
provider_redirect_page, 'oauth redirect')
self._download_webpage(
oauth_redirect_url, video_id, 'Confirming auto login')
elif 'automatically signed in with' in provider_redirect_page:
# Seems like comcast is rolling up new way of automatically signing customers
oauth_redirect_url = self._html_search_regex(
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
'oauth redirect (signed)')
# Just need to process the request. No useful data comes back
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
else:
if '<form name="signin"' in provider_redirect_page:
provider_login_page_res = provider_redirect_page_res


+ 1
- 1
youtube_dlc/extractor/afreecatv.py View File

@@ -275,7 +275,7 @@ class AfreecaTVIE(InfoExtractor):
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
if video_element is None or video_element.text is None:
raise ExtractorError(
'Video %s video does not exist' % video_id, expected=True)
'Video %s does not exist' % video_id, expected=True)

video_url = video_element.text.strip()



+ 103
- 0
youtube_dlc/extractor/amara.py View File

@@ -0,0 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE
from ..utils import (
int_or_none,
parse_iso8601,
update_url_query,
)


class AmaraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
_TESTS = [{
# Youtube
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
'info_dict': {
'id': 'h6ZuVdvYnfE',
'ext': 'mp4',
'title': 'Why jury trials are becoming less common',
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20160813',
'uploader': 'PBS NewsHour',
'uploader_id': 'PBSNewsHour',
'timestamp': 1549639570,
}
}, {
# Vimeo
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
'md5': '99392c75fa05d432a8f11df03612195e',
'info_dict': {
'id': '18622084',
'ext': 'mov',
'title': 'Vimeo at CES 2011!',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'timestamp': 1294763658,
'upload_date': '20110111',
'uploader': 'Sam Morrill',
'uploader_id': 'sammorrill'
}
}, {
# Direct Link
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
'md5': 'd3970f08512738ee60c5807311ff5d3f',
'info_dict': {
'id': 's8KL7I3jLmh6',
'ext': 'mp4',
'title': 'The danger of a single story',
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20091007',
'timestamp': 1254942511,
}
}]

def _real_extract(self, url):
video_id = self._match_id(url)
meta = self._download_json(
'https://amara.org/api/videos/%s/' % video_id,
video_id, query={'format': 'json'})
title = meta['title']
video_url = meta['all_urls'][0]

subtitles = {}
for language in (meta.get('languages') or []):
subtitles_uri = language.get('subtitles_uri')
if not (subtitles_uri and language.get('published')):
continue
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
for f in ('json', 'srt', 'vtt'):
subtitle.append({
'ext': f,
'url': update_url_query(subtitles_uri, {'format': f}),
})

info = {
'url': video_url,
'id': video_id,
'subtitles': subtitles,
'title': title,
'description': meta.get('description'),
'thumbnail': meta.get('thumbnail'),
'duration': int_or_none(meta.get('duration')),
'timestamp': parse_iso8601(meta.get('created')),
}

for ie in (YoutubeIE, VimeoIE):
if ie.suitable(video_url):
info.update({
'_type': 'url_transparent',
'ie_key': ie.ie_key(),
})
break

return info

+ 110
- 57
youtube_dlc/extractor/arte.py View File

@@ -4,23 +4,57 @@ from __future__ import unicode_literals
import re

from .common import InfoExtractor
from ..compat import compat_str
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
qualities,
try_get,
unified_strdate,
url_or_none,
)

# There are different sources of video in arte.tv, the extraction process
# is different for each one. The videos usually expire in 7 days, so we can't
# add tests.


class ArteTVBaseIE(InfoExtractor):
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
info = self._download_json(json_url, video_id)
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
_API_BASE = 'https://api.arte.tv/api/player/v1'


class ArteTVIE(ArteTVBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
)
/(?P<id>\d{6}-\d{3}-[AF])
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'info_dict': {
'id': '088501-000-A',
'ext': 'mp4',
'title': 'Mexico: Stealing Petrol to Survive',
'upload_date': '20190628',
},
}, {
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
'only_matching': True,
}, {
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
}]

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2')

info = self._download_json(
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
player_info = info['videoJsonPlayer']

vsr = try_get(player_info, lambda x: x['VSR'], dict)
@@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor):
if not upload_date_str:
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]

title = (player_info.get('VTI') or title or player_info['VID']).strip()
title = (player_info.get('VTI') or player_info['VID']).strip()
subtitle = player_info.get('VSU', '').strip()
if subtitle:
title += ' - %s' % subtitle

info_dict = {
'id': player_info['VID'],
'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])

LANGS = {
@@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor):
formats = []
for format_id, format_dict in vsr.items():
f = dict(format_dict)
format_url = url_or_none(f.get('url'))
streamer = f.get('streamer')
if not format_url and not streamer:
continue
versionCode = f.get('versionCode')
l = re.escape(langcode)

@@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor):
else:
lang_pref = -1

media_type = f.get('mediaType')
if media_type == 'hls':
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
for m3u8_format in m3u8_formats:
m3u8_format['language_preference'] = lang_pref
formats.extend(m3u8_formats)
continue

format = {
'format_id': format_id,
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
@@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor):
'quality': qfunc(f.get('quality')),
}

if f.get('mediaType') == 'rtmp':
if media_type == 'rtmp':
format['url'] = f['streamer']
format['play_path'] = 'mp4:' + f['url']
format['ext'] = 'flv'
@@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor):

formats.append(format)

self._check_formats(formats, video_id)
self._sort_formats(formats)

info_dict['formats'] = formats
return info_dict

return {
'id': player_info.get('VID') or video_id,
'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
'formats': formats,
}

class ArteTVPlus7IE(ArteTVBaseIE):
IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'

class ArteTVEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
'info_dict': {
'id': '088501-000-A',
'id': '100605-013-A',
'ext': 'mp4',
'title': 'Mexico: Stealing Petrol to Survive',
'upload_date': '20190628',
'title': 'United we Stream November Lockdown Edition #13',
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
'upload_date': '20201116',
},
}, {
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
}]

def _real_extract(self, url):
lang, video_id = re.match(self._VALID_URL, url).groups()
return self._extract_from_json_url(
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
video_id, lang)


class ArteTVEmbedIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:embed'
_VALID_URL = r'''(?x)
https://www\.arte\.tv
/player/v3/index\.php\?json_url=
(?P<json_url>
https?://api\.arte\.tv/api/player/v1/config/
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
)
'''

_TESTS = []
@staticmethod
def _extract_urls(webpage):
return [url for _, url in re.findall(
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
webpage)]

def _real_extract(self, url):
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
return self._extract_from_json_url(json_url, video_id, lang)
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
json_url = qs['json_url'][0]
video_id = ArteTVIE._match_id(json_url)
return self.url_result(
json_url, ie=ArteTVIE.ie_key(), video_id=video_id)


class ArteTVPlaylistIE(ArteTVBaseIE):
IE_NAME = 'arte.tv:playlist'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'

_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
'info_dict': {
@@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
},
'playlist_mincount': 6,
}, {
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
'only_matching': True,
}]

def _real_extract(self, url):
lang, playlist_id = re.match(self._VALID_URL, url).groups()
collection = self._download_json(
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
% (lang, playlist_id), playlist_id)
'%s/collectionData/%s/%s?source=videos'
% (self._API_BASE, lang, playlist_id), playlist_id)
entries = []
for video in collection['videos']:
if not isinstance(video, dict):
continue
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
if not video_url:
continue
video_id = video.get('programId')
entries.append({
'_type': 'url_transparent',
'url': video_url,
'id': video_id,
'title': video.get('title'),
'alt_title': video.get('subtitle'),
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
'duration': int_or_none(video.get('durationSeconds')),
'view_count': int_or_none(video.get('views')),
'ie_key': ArteTVIE.ie_key(),
})
title = collection.get('title')
description = collection.get('shortDescription') or collection.get('teaserText')
entries = [
self._extract_from_json_url(
video['jsonUrl'], video.get('programId') or playlist_id, lang)
for video in collection['videos'] if video.get('jsonUrl')]
return self.playlist_result(entries, playlist_id, title, description)

+ 111
- 157
youtube_dlc/extractor/bandcamp.py View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals

import random
@@ -5,10 +6,7 @@ import re
import time

from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urlparse,
)
from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
@@ -17,71 +15,32 @@ from ..utils import (
parse_filesize,
str_or_none,
try_get,
unescapeHTML,
update_url_query,
unified_strdate,
unified_timestamp,
url_or_none,
urljoin,
)


class BandcampBaseIE(InfoExtractor):
"""Provide base functions for Bandcamp extractors"""

def _extract_json_from_html_data_attribute(self, webpage, suffix, video_id):
json_string = self._html_search_regex(
r' data-%s="([^"]*)' % suffix,
webpage, '%s json' % suffix, default='{}')

return self._parse_json(json_string, video_id)

def _parse_json_track(self, json):
formats = []
file_ = json.get('file')
if isinstance(file_, dict):
for format_id, format_url in file_.items():
if not url_or_none(format_url):
continue
ext, abr_str = format_id.split('-', 1)
formats.append({
'format_id': format_id,
'url': self._proto_relative_url(format_url, 'http:'),
'ext': ext,
'vcodec': 'none',
'acodec': ext,
'abr': int_or_none(abr_str),
})

return {
'duration': float_or_none(json.get('duration')),
'id': str_or_none(json.get('track_id') or json.get('id')),
'title': json.get('title'),
'title_link': json.get('title_link'),
'number': int_or_none(json.get('track_num')),
'formats': formats
}


class BandcampIE(BandcampBaseIE):
IE_NAME = "Bandcamp:track"
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
class BandcampIE(InfoExtractor):
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439',
'info_dict': {
'id': '1812978515',
'ext': 'mp3',
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
'duration': 9.8485,
'uploader': "youtube-dl \"'/\\\u00e4\u21ad",
'timestamp': 1354224127,
'uploader': 'youtube-dl "\'/\\ä↭',
'upload_date': '20121129',
'timestamp': 1354224127,
},
'_skip': 'There is a limit of 200 free downloads / month for the test song'
}, {
# free download
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
'md5': '5d92af55811e47f38962a54c30b07ef0',
'info_dict': {
'id': '2650410135',
'ext': 'aiff',
@@ -120,52 +79,59 @@ class BandcampIE(BandcampBaseIE):
},
}]

def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
return self._parse_json(self._html_search_regex(
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
attr + ' data', group=2), video_id, fatal=fatal)

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
url_track_title = title
title = self._match_id(url)
webpage = self._download_webpage(url, title)
thumbnail = self._html_search_meta('og:image', webpage, default=None)

json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", url_track_title)
json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", url_track_title)

json_tracks = json_tralbum.get('trackinfo')
if not json_tracks:
raise ExtractorError('Could not extract track')

track = self._parse_json_track(json_tracks[0])
artist = json_tralbum.get('artist')
album_title = json_embed.get('album_title')

json_album = json_tralbum.get('packages')
if json_album:
json_album = json_album[0]
album_publish_date = json_album.get('album_publish_date')
album_release_date = json_album.get('album_release_date')
else:
album_publish_date = None
album_release_date = json_tralbum.get('album_release_date')

timestamp = unified_timestamp(json_tralbum.get('current', {}).get('publish_date') or album_publish_date)
release_date = unified_strdate(album_release_date)

download_link = self._search_regex(
r'freeDownloadPage(?:["\']|&quot;):\s*(["\']|&quot;)(?P<url>(?:(?!\1).)+)\1', webpage,
'download link', default=None, group='url')
tralbum = self._extract_data_attr(webpage, title)
thumbnail = self._og_search_thumbnail(webpage)

track_id = None
track = None
track_number = None
duration = None

formats = []
track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
if track_info:
file_ = track_info.get('file')
if isinstance(file_, dict):
for format_id, format_url in file_.items():
if not url_or_none(format_url):
continue
ext, abr_str = format_id.split('-', 1)
formats.append({
'format_id': format_id,
'url': self._proto_relative_url(format_url, 'http:'),
'ext': ext,
'vcodec': 'none',
'acodec': ext,
'abr': int_or_none(abr_str),
})
track = track_info.get('title')
track_id = str_or_none(
track_info.get('track_id') or track_info.get('id'))
track_number = int_or_none(track_info.get('track_num'))
duration = float_or_none(track_info.get('duration'))

embed = self._extract_data_attr(webpage, title, 'embed', False)
current = tralbum.get('current') or {}
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
timestamp = unified_timestamp(
current.get('publish_date') or tralbum.get('album_publish_date'))

download_link = tralbum.get('freeDownloadPage')
if download_link:
track_id = self._search_regex(
r'\?id=(?P<id>\d+)&',
download_link, 'track id')
track_id = compat_str(tralbum['id'])

download_webpage = self._download_webpage(
download_link, track_id, 'Downloading free downloads page')

blob = self._parse_json(
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
'blob', group='blob'),
track_id, transform_source=unescapeHTML)
blob = self._extract_data_attr(download_webpage, track_id, 'blob')

info = try_get(
blob, (lambda x: x['digital_items'][0],
@@ -173,6 +139,8 @@ class BandcampIE(BandcampBaseIE):
if info:
downloads = info.get('downloads')
if isinstance(downloads, dict):
if not track:
track = info.get('title')
if not artist:
artist = info.get('artist')
if not thumbnail:
@@ -206,7 +174,7 @@ class BandcampIE(BandcampBaseIE):
retry_url = url_or_none(stat.get('retry_url'))
if not retry_url:
continue
track['formats'].append({
formats.append({
'url': self._proto_relative_url(retry_url, 'http:'),
'ext': download_formats.get(format_id),
'format_id': format_id,
@@ -215,30 +183,34 @@ class BandcampIE(BandcampBaseIE):
'vcodec': 'none',
})

self._sort_formats(track['formats'])
self._sort_formats(formats)

title = '%s - %s' % (artist, track.get('title')) if artist else track.get('title')
title = '%s - %s' % (artist, track) if artist else track

if not duration:
duration = float_or_none(self._html_search_meta(
'duration', webpage, default=None))

return {
'album': album_title,
'artist': artist,
'duration': track['duration'],
'formats': track['formats'],
'id': track['id'],
'release_date': release_date,
'id': track_id,
'title': title,
'thumbnail': thumbnail,
'uploader': artist,
'timestamp': timestamp,
'title': title,
'track': track['title'],
'track_id': track['id'],
'track_number': track['number'],
'uploader': artist
'release_date': unified_strdate(tralbum.get('album_release_date')),
'duration': duration,
'track': track,
'track_number': track_number,
'track_id': track_id,
'artist': artist,
'album': embed.get('album_title'),
'formats': formats,
}


class BandcampAlbumIE(BandcampBaseIE):
class BandcampAlbumIE(BandcampIE):
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'

_TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -248,7 +220,10 @@ class BandcampAlbumIE(BandcampBaseIE):
'info_dict': {
'id': '1353101989',
'ext': 'mp3',
'title': 'Intro',
'title': 'Blazo - Intro',
'timestamp': 1311756226,
'upload_date': '20110727',
'uploader': 'Blazo',
}
},
{