Compare commits
156 Commits
2020.10.31
...
master
Author | SHA1 | Date |
---|---|---|
Tom-Oliver Heidel | f9401f2a91 | |
Heidel | a915526e08 | |
Tom-Oliver Heidel | db6926618f | |
Tom-Oliver Heidel | 19a6fa72eb | |
Tom-Oliver Heidel | 98e248faa4 | |
Unknown | 40ec740f7b | |
Tom-Oliver Heidel | 8662875551 | |
nixxo | 4f618e64f5 | |
Remita Amine | 12300fa45a | |
Tom-Oliver Heidel | e8d46fc979 | |
Tom-Oliver Heidel | b662fc8d20 | |
Tom-Oliver Heidel | 929576bb9e | |
Tom-Oliver Heidel | 7dde463e86 | |
Tom-Oliver Heidel | e29288d667 | |
Tom-Oliver Heidel | 9e4043faa9 | |
Tom-Oliver Heidel | 94c29091d0 | |
Tom-Oliver Heidel | 7b400ac40b | |
Tom-Oliver Heidel | e8dfaa0fd7 | |
Tom-Oliver Heidel | 9693a34773 | |
Tom-Oliver Heidel | 6a03f4f2a8 | |
Tom-Oliver Heidel | 6248b34ad2 | |
Tom-Oliver Heidel | 8e423ae86a | |
Tom-Oliver Heidel | 284ec6f48a | |
Tom-Oliver Heidel | bccdb02e93 | |
Tom-Oliver Heidel | ef5a4db06c | |
bopol | 9b664dc420 | |
Tom-Oliver Heidel | 93201d50aa | |
lorpus | ae7c01431d | |
pukkandan | c78b936af4 | |
pukkandan | 2fa90513e5 | |
pukkandan | f0c532a430 | |
pukkandan | a62cf34298 | |
pukkandan | 38d7028407 | |
pukkandan | 02ced43cbf | |
Tom-Oliver Heidel | 17fbbff940 | |
pukkandan | 3d3dddc948 | |
pukkandan | 70d5c17b08 | |
pukkandan | 70c5802b5d | |
pukkandan | a93f71ee5e | |
xypwn | f8fb3b8a78 | |
pukkandan | 036fcf3aa1 | |
pukkandan | 434406a982 | |
pukkandan | 386e1dd908 | |
pukkandan | 7bd4a9b611 | |
pukkandan | ef2f3c7f58 | |
pukkandan | a0566bbf5c | |
pukkandan | 3462ffa892 | |
pukkandan | d3260f40cb | |
pukkandan | 097f1663a9 | |
pukkandan | 8bdd16b499 | |
Diego Fernando Rodríguez Varón | d71eb83b05 | |
Tom-Oliver Heidel | 228385340e | |
Tom-Oliver Heidel | 63dcccd07c | |
Kyu Yeun Kim | d02f12107f | |
lorpus | d9c2b0a6de | |
lorpus | 2b547dd782 | |
pukkandan | ec57f903c9 | |
renalid | 711bd5d362 | |
Matthew | 9da76d30de | |
pukkandan | 958804ad4e | |
pukkandan | 55faba7ed7 | |
Matthew | 0366ae8756 | |
Diego Fernando Rodríguez Varón | a2044d57ca | |
pukkandan | ea6e0c2b0d | |
Jody Bruchon | 63c00011d4 | |
pukkandan | fe5caa2a7c | |
pukkandan | 9a68de1217 | |
Tom-Oliver Heidel | d052b9a112 | |
Unknown | 5e6cdcecdd | |
Tom-Oliver Heidel | c297a6c661 | |
Unknown | 6bd79800c3 | |
nao20010128nao | a1d6041497 | |
Tom-Oliver Heidel | b28e751688 | |
Tom-Oliver Heidel | 7ee5015a34 | |
Tom-Oliver Heidel | 00c38ef28d | |
Tom-Oliver Heidel | 34861f1c96 | |
Unknown | 104bfdd24d | |
Luc Ritchie | 73ac856785 | |
Tom-Oliver Heidel | d91fdaff03 | |
Tom-Oliver Heidel | c54f4aada5 | |
Unknown | 0f8566e90b | |
Tom-Oliver Heidel | 0e0cffb8fe | |
rigstot | d7aec208f2 | |
Tom-Oliver Heidel | 69e3c6df5c | |
pukkandan | 002ea8fe17 | |
Tom-Oliver Heidel | c924a219ea | |
Roman Karwacik | 8f109ad4ad | |
Luc Ritchie | 9833e7a015 | |
Tom-Oliver Heidel | da8fb75df5 | |
Robin Dunn | 142f2c8e99 | |
Ali Sherief | 876f1c17ff | |
nixxo | 5867a16789 | |
nixxo | 8924ddc3ee | |
nixxo | 595188ec71 | |
nixxo | 6c1c3e5b85 | |
nixxo | 902784a2a9 | |
Diego Fernando Rodríguez Varón | fff5071112 | |
WolfganP | 85da4055c0 | |
WolfganP | 6857df609b | |
Nicolas SAPA | 8263104fe4 | |
Nicolas SAPA | b860e4cc2f | |
Tom-Oliver Heidel | 651bae3d23 | |
Tom-Oliver Heidel | 5943bb6214 | |
Tom-Oliver Heidel | 4a82c025da | |
Unknown | 7d94c06743 | |
Unknown | 5db4014b23 | |
The Hatsune Daishi | 987d2e079a | |
nixxo | 8abd647c59 | |
Tom-Oliver Heidel | adb118da26 | |
Tom-Oliver Heidel | 06a8be981b | |
Tom-Oliver Heidel | f406ab6a14 | |
Tom-Oliver Heidel | 206de9b233 | |
Tom-Oliver Heidel | 123049d1ce | |
Tom-Oliver Heidel | f8ddb38977 | |
pukkandan | 503d4a44f6 | |
insaneracist | 366a7a4753 | |
insaneracist | 7f4f0b21c2 | |
insaneracist | 659ddd7f70 | |
nixxo | ab36800b1f | |
exwm | 9c8bc84fd2 | |
exwm | c434e9f504 | |
exwm | be5d6c213c | |
insaneracist | 15f6397c19 | |
Tom-Oliver Heidel | 7166f47b18 | |
Tom-Oliver Heidel | 471115dbee | |
Tom-Oliver Heidel | 8934f61717 | |
Tom-Oliver Heidel | 4481cfb570 | |
Roman Sebastian Karwacik | b11a88fc24 | |
Roman Sebastian Karwacik | aa13f124a5 | |
Roman Sebastian Karwacik | 81acad1279 | |
Roman Sebastian Karwacik | abd273e17b | |
Roman Sebastian Karwacik | 55cd2999ed | |
Roman Sebastian Karwacik | ef6be42014 | |
Roman Sebastian Karwacik | 3f0852e35f | |
exwm | 130599af94 | |
exwm | 73cc1b9125 | |
exwm | 3417362556 | |
exwm | 8ba3ad0a48 | |
exwm | 1923b146b3 | |
exwm | 5dcfd2508a | |
exwm | 0536e60b48 | |
insaneracist | 5c15c1a0d7 | |
Tom-Oliver Heidel | 167c108f70 | |
Diego Fernando Rodríguez Varón | 60351178a5 | |
Tom-Oliver Heidel | 764876a01f | |
Unknown | 31108ce946 | |
Unknown | ae306df7e0 | |
nixxo | e61f360157 | |
insaneracist | 712799bd30 | |
Peter Oettig | 59c5fa91c1 | |
insaneracist | 5b0a6a8010 | |
nixxo | 508649e6f5 | |
nixxo | a85e131b48 | |
nixxo | 165ce9f773 | |
nixxo | d4ca287459 | |
nixxo | a916af123c |
|
@ -20,7 +20,7 @@ jobs:
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v2
|
||||||
with:
|
with:
|
||||||
python-version: '3.x'
|
python-version: '3.8'
|
||||||
- name: Install packages
|
- name: Install packages
|
||||||
run: sudo apt-get -y install zip pandoc man
|
run: sudo apt-get -y install zip pandoc man
|
||||||
- name: Bump version
|
- name: Bump version
|
||||||
|
@ -55,9 +55,7 @@ jobs:
|
||||||
asset_content_type: application/octet-stream
|
asset_content_type: application/octet-stream
|
||||||
- name: Get SHA2-256SUMS for youtube-dlc
|
- name: Get SHA2-256SUMS for youtube-dlc
|
||||||
id: sha2_file
|
id: sha2_file
|
||||||
env:
|
run: echo "::set-output name=sha2_unix::$(sha256sum youtube-dlc | awk '{print $1}')"
|
||||||
SHA2: ${{ hashFiles('youtube-dlc') }}
|
|
||||||
run: echo "::set-output name=sha2_unix::$SHA2"
|
|
||||||
- name: Install dependencies for pypi
|
- name: Install dependencies for pypi
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
|
@ -76,13 +74,16 @@ jobs:
|
||||||
runs-on: windows-latest
|
runs-on: windows-latest
|
||||||
|
|
||||||
needs: build_unix
|
needs: build_unix
|
||||||
|
|
||||||
|
outputs:
|
||||||
|
sha2_windows: ${{ steps.sha2_file_win.outputs.sha2_windows }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v2
|
||||||
with:
|
with:
|
||||||
python-version: '3.x'
|
python-version: '3.8'
|
||||||
- name: Install Requirements
|
- name: Install Requirements
|
||||||
run: pip install pyinstaller
|
run: pip install pyinstaller
|
||||||
- name: Bump version
|
- name: Bump version
|
||||||
|
@ -101,22 +102,23 @@ jobs:
|
||||||
asset_content_type: application/vnd.microsoft.portable-executable
|
asset_content_type: application/vnd.microsoft.portable-executable
|
||||||
- name: Get SHA2-256SUMS for youtube-dlc.exe
|
- name: Get SHA2-256SUMS for youtube-dlc.exe
|
||||||
id: sha2_file_win
|
id: sha2_file_win
|
||||||
env:
|
run: echo "::set-output name=sha2_windows::$((Get-FileHash dist\youtube-dlc.exe -Algorithm SHA256).Hash.ToLower())"
|
||||||
SHA2_win: ${{ hashFiles('dist/youtube-dlc.exe') }}
|
|
||||||
run: echo "::set-output name=sha2_windows::$SHA2_win"
|
|
||||||
|
|
||||||
build_windows32:
|
build_windows32:
|
||||||
|
|
||||||
runs-on: windows-latest
|
runs-on: windows-latest
|
||||||
|
|
||||||
needs: build_unix
|
needs: [build_unix, build_windows]
|
||||||
|
|
||||||
|
outputs:
|
||||||
|
sha2_windows32: ${{ steps.sha2_file_win.outputs.sha2_windows32 }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- name: Set up Python 3.5.4 32-Bit
|
- name: Set up Python 3.4.4 32-Bit
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v2
|
||||||
with:
|
with:
|
||||||
python-version: '3.5.4'
|
python-version: '3.4.4'
|
||||||
architecture: 'x86'
|
architecture: 'x86'
|
||||||
- name: Install Requirements for 32 Bit
|
- name: Install Requirements for 32 Bit
|
||||||
run: pip install pyinstaller==3.5
|
run: pip install pyinstaller==3.5
|
||||||
|
@ -136,9 +138,15 @@ jobs:
|
||||||
asset_content_type: application/vnd.microsoft.portable-executable
|
asset_content_type: application/vnd.microsoft.portable-executable
|
||||||
- name: Get SHA2-256SUMS for youtube-dlc_x86.exe
|
- name: Get SHA2-256SUMS for youtube-dlc_x86.exe
|
||||||
id: sha2_file_win32
|
id: sha2_file_win32
|
||||||
env:
|
run: echo "::set-output name=sha256_windows32::$((Get-FileHash dist\youtube-dlc_x86.exe -Algorithm SHA256).Hash.ToLower())"
|
||||||
SHA2_win32: ${{ hashFiles('dist/youtube-dlc_x86.exe') }}
|
|
||||||
run: echo "::set-output name=sha2_windows32::$SHA2_win32"
|
checksums:
|
||||||
|
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
needs: [build_unix, build_windows, build_windows32]
|
||||||
|
|
||||||
|
steps:
|
||||||
- name: Make SHA2-256SUMS file
|
- name: Make SHA2-256SUMS file
|
||||||
env:
|
env:
|
||||||
SHA2_WINDOWS: ${{ needs.build_windows.outputs.sha2_windows }}
|
SHA2_WINDOWS: ${{ needs.build_windows.outputs.sha2_windows }}
|
||||||
|
@ -146,10 +154,10 @@ jobs:
|
||||||
SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }}
|
SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }}
|
||||||
YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }}
|
YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }}
|
||||||
run: |
|
run: |
|
||||||
echo "version:$YTDLC_VERSION" >> SHA2-256SUMS
|
echo "version:${env:YTDLC_VERSION}" >> SHA2-256SUMS
|
||||||
echo "youtube-dlc.exe:$SHA2_WINDOWS" >> SHA2-256SUMS
|
echo "youtube-dlc.exe:${env:SHA2_WINDOWS}" >> SHA2-256SUMS
|
||||||
echo "youtube-dlc32.exe:$SHA2_WINDOWS32" >> SHA2-256SUMS
|
echo "youtube-dlc_x86.exe:${env:SHA2_WINDOWS32}" >> SHA2-256SUMS
|
||||||
echo "youtube-dlc:$SHA2_UNIX" >> SHA2-256SUMS
|
echo "youtube-dlc:${env:SHA2_UNIX}" >> SHA2-256SUMS
|
||||||
|
|
||||||
- name: Upload 256SUMS file
|
- name: Upload 256SUMS file
|
||||||
id: upload-sums
|
id: upload-sums
|
||||||
|
|
19
README.md
19
README.md
|
@ -1,15 +1,15 @@
|
||||||
[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc)
|
[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc)
|
||||||
[![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc)
|
[![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc)
|
||||||
[![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc)
|
|
||||||
|
|
||||||
[![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc)
|
[![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc)
|
||||||
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/youtube-dlc/blob/master/LICENSE)
|
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/yt-dlc/blob/master/LICENSE)
|
||||||
|
|
||||||
youtube-dlc - download videos from youtube.com or other video platforms.
|
youtube-dlc - download videos from youtube.com or other video platforms.
|
||||||
|
|
||||||
youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462)
|
youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462)
|
||||||
|
|
||||||
- [INSTALLATION](#installation)
|
- [INSTALLATION](#installation)
|
||||||
|
- [UPDATE](#update)
|
||||||
- [DESCRIPTION](#description)
|
- [DESCRIPTION](#description)
|
||||||
- [OPTIONS](#options)
|
- [OPTIONS](#options)
|
||||||
- [Network Options:](#network-options)
|
- [Network Options:](#network-options)
|
||||||
|
@ -44,6 +44,14 @@ You may want to use `python3` instead of `python`
|
||||||
|
|
||||||
python -m pip install --upgrade youtube-dlc
|
python -m pip install --upgrade youtube-dlc
|
||||||
|
|
||||||
|
To get the latest release with pip git
|
||||||
|
|
||||||
|
python -m pip install --upgrade git+https://github.com/blackjack4494/yt-dlc@release
|
||||||
|
|
||||||
|
If you want to install the current master branch
|
||||||
|
|
||||||
|
python -m pip install --upgrade git+https://github.com/blackjack4494/yt-dlc@master
|
||||||
|
|
||||||
**UNIX** (Linux, macOS, etc.)
|
**UNIX** (Linux, macOS, etc.)
|
||||||
Using wget:
|
Using wget:
|
||||||
|
|
||||||
|
@ -57,6 +65,7 @@ Using curl:
|
||||||
|
|
||||||
|
|
||||||
**Windows** users can download [youtube-dlc.exe](https://github.com/blackjack4494/yt-dlc/releases/latest/download/youtube-dlc.exe) (**do not** put in `C:\Windows\System32`!).
|
**Windows** users can download [youtube-dlc.exe](https://github.com/blackjack4494/yt-dlc/releases/latest/download/youtube-dlc.exe) (**do not** put in `C:\Windows\System32`!).
|
||||||
|
Or get the 32 Bit version [youtube-dlc_x86.exe](https://github.com/blackjack4494/yt-dlc/releases/latest/download/youtube-dlc_x86.exe) and rename it if needed.
|
||||||
|
|
||||||
**Compile**
|
**Compile**
|
||||||
To build the Windows executable yourself (without version info!)
|
To build the Windows executable yourself (without version info!)
|
||||||
|
@ -68,7 +77,7 @@ Or simply execute the `make_win.bat` if pyinstaller is installed.
|
||||||
There will be a `youtube-dlc.exe` in `/dist`
|
There will be a `youtube-dlc.exe` in `/dist`
|
||||||
|
|
||||||
New way to build Windows is to use `python pyinst.py` (please use python3 64Bit)
|
New way to build Windows is to use `python pyinst.py` (please use python3 64Bit)
|
||||||
For 32Bit Version use a 32Bit Version of python (3 preferred here as well) and run `python pyinst32.py`
|
For 32 Bit Version use a 32 Bit Version of python (3 preferred here as well) and run `python pyinst32.py`
|
||||||
|
|
||||||
For Unix:
|
For Unix:
|
||||||
You will need the required build tools
|
You will need the required build tools
|
||||||
|
@ -80,7 +89,7 @@ Then simply type this
|
||||||
|
|
||||||
# UPDATE
|
# UPDATE
|
||||||
**DO NOT UPDATE using `-U` !** instead download binaries again or when installed with pip use a described above when installing.
|
**DO NOT UPDATE using `-U` !** instead download binaries again or when installed with pip use a described above when installing.
|
||||||
I will add some memorable short links to the binaries so you can download them easier.
|
_Next release will include in-built updater_
|
||||||
|
|
||||||
# DESCRIPTION
|
# DESCRIPTION
|
||||||
**youtube-dlc** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
**youtube-dlc** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
||||||
|
@ -213,6 +222,8 @@ I will add some memorable short links to the binaries so you can download them e
|
||||||
--download-archive FILE Download only videos not listed in the
|
--download-archive FILE Download only videos not listed in the
|
||||||
archive file. Record the IDs of all
|
archive file. Record the IDs of all
|
||||||
downloaded videos in it.
|
downloaded videos in it.
|
||||||
|
--break-on-existing Stop the download process after attempting
|
||||||
|
to download a file that's in the archive.
|
||||||
--include-ads Download advertisements as well
|
--include-ads Download advertisements as well
|
||||||
(experimental)
|
(experimental)
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,7 @@ def build_lazy_ie(ie, name):
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
# find the correct sorting and add the required base classes so that sublcasses
|
# find the correct sorting and add the required base classes so that subclasses
|
||||||
# can be correctly created
|
# can be correctly created
|
||||||
classes = _ALL_CLASSES[:-1]
|
classes = _ALL_CLASSES[:-1]
|
||||||
ordered_cls = []
|
ordered_cls = []
|
||||||
|
|
|
@ -59,9 +59,9 @@
|
||||||
- **ARD:mediathek**
|
- **ARD:mediathek**
|
||||||
- **ARDBetaMediathek**
|
- **ARDBetaMediathek**
|
||||||
- **Arkena**
|
- **Arkena**
|
||||||
- **arte.tv:+7**
|
- **ArteTV**
|
||||||
- **arte.tv:embed**
|
- **ArteTVEmbed**
|
||||||
- **arte.tv:playlist**
|
- **ArteTVPlaylist**
|
||||||
- **AsianCrush**
|
- **AsianCrush**
|
||||||
- **AsianCrushPlaylist**
|
- **AsianCrushPlaylist**
|
||||||
- **AtresPlayer**
|
- **AtresPlayer**
|
||||||
|
@ -104,12 +104,14 @@
|
||||||
- **BIQLE**
|
- **BIQLE**
|
||||||
- **BitChute**
|
- **BitChute**
|
||||||
- **BitChuteChannel**
|
- **BitChuteChannel**
|
||||||
|
- **bitwave.tv**
|
||||||
- **BleacherReport**
|
- **BleacherReport**
|
||||||
- **BleacherReportCMS**
|
- **BleacherReportCMS**
|
||||||
- **blinkx**
|
- **blinkx**
|
||||||
- **Bloomberg**
|
- **Bloomberg**
|
||||||
- **BokeCC**
|
- **BokeCC**
|
||||||
- **BostonGlobe**
|
- **BostonGlobe**
|
||||||
|
- **Box**
|
||||||
- **Bpb**: Bundeszentrale für politische Bildung
|
- **Bpb**: Bundeszentrale für politische Bildung
|
||||||
- **BR**: Bayerischer Rundfunk
|
- **BR**: Bayerischer Rundfunk
|
||||||
- **BravoTV**
|
- **BravoTV**
|
||||||
|
@ -157,6 +159,7 @@
|
||||||
- **Chilloutzone**
|
- **Chilloutzone**
|
||||||
- **chirbit**
|
- **chirbit**
|
||||||
- **chirbit:profile**
|
- **chirbit:profile**
|
||||||
|
- **cielotv.it**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
- **Cinemax**
|
- **Cinemax**
|
||||||
- **CiscoLiveSearch**
|
- **CiscoLiveSearch**
|
||||||
|
@ -424,6 +427,7 @@
|
||||||
- **la7.it**
|
- **la7.it**
|
||||||
- **laola1tv**
|
- **laola1tv**
|
||||||
- **laola1tv:embed**
|
- **laola1tv:embed**
|
||||||
|
- **lbry.tv**
|
||||||
- **LCI**
|
- **LCI**
|
||||||
- **Lcp**
|
- **Lcp**
|
||||||
- **LcpPlay**
|
- **LcpPlay**
|
||||||
|
@ -474,6 +478,7 @@
|
||||||
- **massengeschmack.tv**
|
- **massengeschmack.tv**
|
||||||
- **MatchTV**
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
|
- **MedalTV**
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
- **media.ccc.de:lists**
|
- **media.ccc.de:lists**
|
||||||
- **Medialaan**
|
- **Medialaan**
|
||||||
|
@ -617,6 +622,7 @@
|
||||||
- **Nuvid**
|
- **Nuvid**
|
||||||
- **NYTimes**
|
- **NYTimes**
|
||||||
- **NYTimesArticle**
|
- **NYTimesArticle**
|
||||||
|
- **NYTimesCooking**
|
||||||
- **NZZ**
|
- **NZZ**
|
||||||
- **ocw.mit.edu**
|
- **ocw.mit.edu**
|
||||||
- **OdaTV**
|
- **OdaTV**
|
||||||
|
@ -669,6 +675,8 @@
|
||||||
- **PicartoVod**
|
- **PicartoVod**
|
||||||
- **Piksel**
|
- **Piksel**
|
||||||
- **Pinkbike**
|
- **Pinkbike**
|
||||||
|
- **Pinterest**
|
||||||
|
- **PinterestCollection**
|
||||||
- **Pladform**
|
- **Pladform**
|
||||||
- **Platzi**
|
- **Platzi**
|
||||||
- **PlatziCourse**
|
- **PlatziCourse**
|
||||||
|
@ -765,6 +773,7 @@
|
||||||
- **RTVNH**
|
- **RTVNH**
|
||||||
- **RTVS**
|
- **RTVS**
|
||||||
- **RUHD**
|
- **RUHD**
|
||||||
|
- **RumbleEmbed**
|
||||||
- **rutube**: Rutube videos
|
- **rutube**: Rutube videos
|
||||||
- **rutube:channel**: Rutube channels
|
- **rutube:channel**: Rutube channels
|
||||||
- **rutube:embed**: Rutube embedded videos
|
- **rutube:embed**: Rutube embedded videos
|
||||||
|
@ -835,12 +844,14 @@
|
||||||
- **SpankBangPlaylist**
|
- **SpankBangPlaylist**
|
||||||
- **Spankwire**
|
- **Spankwire**
|
||||||
- **Spiegel**
|
- **Spiegel**
|
||||||
- **Spiegel:Article**: Articles on spiegel.de
|
|
||||||
- **Spiegeltv**
|
|
||||||
- **sport.francetvinfo.fr**
|
- **sport.francetvinfo.fr**
|
||||||
- **Sport5**
|
- **Sport5**
|
||||||
- **SportBox**
|
- **SportBox**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
|
- **Spreaker**
|
||||||
|
- **SpreakerPage**
|
||||||
|
- **SpreakerShow**
|
||||||
|
- **SpreakerShowPage**
|
||||||
- **SpringboardPlatform**
|
- **SpringboardPlatform**
|
||||||
- **Sprout**
|
- **Sprout**
|
||||||
- **sr:mediathek**: Saarländischer Rundfunk
|
- **sr:mediathek**: Saarländischer Rundfunk
|
||||||
|
@ -944,6 +955,7 @@
|
||||||
- **TV2DKBornholmPlay**
|
- **TV2DKBornholmPlay**
|
||||||
- **TV4**: tv4.se and tv4play.se
|
- **TV4**: tv4.se and tv4play.se
|
||||||
- **TV5MondePlus**: TV5MONDE+
|
- **TV5MondePlus**: TV5MONDE+
|
||||||
|
- **tv8.it**
|
||||||
- **TVA**
|
- **TVA**
|
||||||
- **TVANouvelles**
|
- **TVANouvelles**
|
||||||
- **TVANouvellesArticle**
|
- **TVANouvellesArticle**
|
||||||
|
@ -1058,7 +1070,7 @@
|
||||||
- **vk:wallpost**
|
- **vk:wallpost**
|
||||||
- **vlive**
|
- **vlive**
|
||||||
- **vlive:channel**
|
- **vlive:channel**
|
||||||
- **vlive:playlist**
|
- **vlive:post**
|
||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
- **VODPl**
|
- **VODPl**
|
||||||
- **VODPlatform**
|
- **VODPlatform**
|
||||||
|
@ -1147,20 +1159,17 @@
|
||||||
- **YourPorn**
|
- **YourPorn**
|
||||||
- **YourUpload**
|
- **YourUpload**
|
||||||
- **youtube**: YouTube.com
|
- **youtube**: YouTube.com
|
||||||
- **youtube:channel**: YouTube.com channels
|
- **youtube:favorites**: YouTube.com liked videos, ":ytfav" for short (requires authentication)
|
||||||
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
|
|
||||||
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
||||||
- **youtube:live**: YouTube.com live streams
|
|
||||||
- **youtube:playlist**: YouTube.com playlists
|
- **youtube:playlist**: YouTube.com playlists
|
||||||
- **youtube:playlists**: YouTube.com user/channel playlists
|
|
||||||
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
||||||
- **youtube:search**: YouTube.com searches
|
- **youtube:search**: YouTube.com searches, "ytsearch" keyword
|
||||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
- **youtube:search:date**: YouTube.com searches, newest videos first, "ytsearchdate" keyword
|
||||||
- **youtube:search_url**: YouTube.com search URLs
|
- **youtube:search_url**: YouTube.com search URLs
|
||||||
- **youtube:show**: YouTube.com (multi-season) shows
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)
|
||||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
- **youtube:tab**: YouTube.com tab
|
||||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
|
||||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||||
|
- **YoutubeYtUser**: YouTube.com user videos, URL or "ytuser" keyword
|
||||||
- **Zapiks**
|
- **Zapiks**
|
||||||
- **Zaq1**
|
- **Zaq1**
|
||||||
- **Zattoo**
|
- **Zattoo**
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
py -m PyInstaller youtube_dlc\__main__.py --onefile --name youtube-dlc --version-file win\ver.txt --icon win\icon\cloud.ico
|
py -m PyInstaller youtube_dlc\__main__.py --onefile --name youtube-dlc --version-file win\ver.txt --icon win\icon\cloud.ico --upx-exclude=vcruntime140.dll
|
2
setup.py
2
setup.py
|
@ -66,7 +66,7 @@ setup(
|
||||||
description=DESCRIPTION,
|
description=DESCRIPTION,
|
||||||
long_description=LONG_DESCRIPTION,
|
long_description=LONG_DESCRIPTION,
|
||||||
# long_description_content_type="text/markdown",
|
# long_description_content_type="text/markdown",
|
||||||
url="https://github.com/blackjack4494/youtube-dlc",
|
url="https://github.com/blackjack4494/yt-dlc",
|
||||||
packages=find_packages(exclude=("youtube_dl","test",)),
|
packages=find_packages(exclude=("youtube_dl","test",)),
|
||||||
#packages=[
|
#packages=[
|
||||||
# 'youtube_dlc',
|
# 'youtube_dlc',
|
||||||
|
|
|
@ -37,7 +37,7 @@
|
||||||
"writeinfojson": true,
|
"writeinfojson": true,
|
||||||
"writesubtitles": false,
|
"writesubtitles": false,
|
||||||
"allsubtitles": false,
|
"allsubtitles": false,
|
||||||
"listssubtitles": false,
|
"listsubtitles": false,
|
||||||
"socket_timeout": 20,
|
"socket_timeout": 20,
|
||||||
"fixup": "never"
|
"fixup": "never"
|
||||||
}
|
}
|
||||||
|
|
|
@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
self.assertEqual(downloaded['extractor'], 'testex')
|
self.assertEqual(downloaded['extractor'], 'testex')
|
||||||
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
||||||
|
|
||||||
|
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
|
||||||
|
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
|
||||||
|
|
||||||
|
class _YDL(YDL):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(_YDL, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def trouble(self, s, tb=None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
ydl = _YDL({
|
||||||
|
'format': 'extra',
|
||||||
|
'ignoreerrors': True,
|
||||||
|
})
|
||||||
|
|
||||||
|
class VideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'video:(?P<id>\d+)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'default',
|
||||||
|
'url': 'url:',
|
||||||
|
}]
|
||||||
|
if video_id == '0':
|
||||||
|
raise ExtractorError('foo')
|
||||||
|
if video_id == '2':
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'extra',
|
||||||
|
'url': TEST_URL,
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': 'Video %s' % video_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
class PlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'playlist:'
|
||||||
|
|
||||||
|
def _entries(self):
|
||||||
|
for n in range(3):
|
||||||
|
video_id = compat_str(n)
|
||||||
|
yield {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': VideoIE.ie_key(),
|
||||||
|
'id': video_id,
|
||||||
|
'url': 'video:%s' % video_id,
|
||||||
|
'title': 'Video Transparent %s' % video_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.playlist_result(self._entries())
|
||||||
|
|
||||||
|
ydl.add_info_extractor(VideoIE(ydl))
|
||||||
|
ydl.add_info_extractor(PlaylistIE(ydl))
|
||||||
|
info = ydl.extract_info('playlist:')
|
||||||
|
entries = info['entries']
|
||||||
|
self.assertEqual(len(entries), 3)
|
||||||
|
self.assertTrue(entries[0] is None)
|
||||||
|
self.assertTrue(entries[1] is None)
|
||||||
|
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(entries[2], downloaded)
|
||||||
|
self.assertEqual(downloaded['url'], TEST_URL)
|
||||||
|
self.assertEqual(downloaded['title'], 'Video Transparent 2')
|
||||||
|
self.assertEqual(downloaded['id'], '2')
|
||||||
|
self.assertEqual(downloaded['extractor'], 'Video')
|
||||||
|
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -31,45 +31,47 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
|
|
||||||
def test_youtube_playlist_matching(self):
|
def test_youtube_playlist_matching(self):
|
||||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||||
|
assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
|
||||||
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
||||||
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
assertPlaylist('PL63F0C78739B09958')
|
||||||
|
assertTab('https://www.youtube.com/AsapSCIENCE')
|
||||||
|
assertTab('https://www.youtube.com/embedded')
|
||||||
|
assertTab('https://www.youtube.com/feed') # Own channel's home page
|
||||||
|
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||||
# Top tracks
|
# Top tracks
|
||||||
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||||
|
|
||||||
def test_youtube_matching(self):
|
def test_youtube_matching(self):
|
||||||
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||||
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
|
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
|
||||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
# self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) # /v/ is no longer valid
|
||||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
||||||
|
|
||||||
def test_youtube_channel_matching(self):
|
def test_youtube_channel_matching(self):
|
||||||
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
|
assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
|
||||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
|
||||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||||
|
|
||||||
def test_youtube_user_matching(self):
|
# def test_youtube_user_matching(self):
|
||||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||||
|
|
||||||
def test_youtube_feeds(self):
|
def test_youtube_feeds(self):
|
||||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
|
||||||
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
|
||||||
|
|
||||||
def test_youtube_show_matching(self):
|
# def test_youtube_search_matching(self):
|
||||||
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
|
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
|
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
def test_youtube_search_matching(self):
|
|
||||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
|
||||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
|
|
|
@ -937,6 +937,28 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(d['x'], 1)
|
self.assertEqual(d['x'], 1)
|
||||||
self.assertEqual(d['y'], 'a')
|
self.assertEqual(d['y'], 'a')
|
||||||
|
|
||||||
|
# Just drop ! prefix for now though this results in a wrong value
|
||||||
|
on = js_to_json('''{
|
||||||
|
a: !0,
|
||||||
|
b: !1,
|
||||||
|
c: !!0,
|
||||||
|
d: !!42.42,
|
||||||
|
e: !!![],
|
||||||
|
f: !"abc",
|
||||||
|
g: !"",
|
||||||
|
!42: 42
|
||||||
|
}''')
|
||||||
|
self.assertEqual(json.loads(on), {
|
||||||
|
'a': 0,
|
||||||
|
'b': 1,
|
||||||
|
'c': 0,
|
||||||
|
'd': 42.42,
|
||||||
|
'e': [],
|
||||||
|
'f': "abc",
|
||||||
|
'g': "",
|
||||||
|
'42': 42
|
||||||
|
})
|
||||||
|
|
||||||
on = js_to_json('["abc", "def",]')
|
on = js_to_json('["abc", "def",]')
|
||||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||||
|
|
||||||
|
@ -994,6 +1016,12 @@ class TestUtil(unittest.TestCase):
|
||||||
on = js_to_json('{42:4.2e1}')
|
on = js_to_json('{42:4.2e1}')
|
||||||
self.assertEqual(json.loads(on), {'42': 42.0})
|
self.assertEqual(json.loads(on), {'42': 42.0})
|
||||||
|
|
||||||
|
on = js_to_json('{ "0x40": "0x40" }')
|
||||||
|
self.assertEqual(json.loads(on), {'0x40': '0x40'})
|
||||||
|
|
||||||
|
on = js_to_json('{ "040": "040" }')
|
||||||
|
self.assertEqual(json.loads(on), {'040': '040'})
|
||||||
|
|
||||||
def test_js_to_json_malformed(self):
|
def test_js_to_json_malformed(self):
|
||||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||||
|
|
|
@ -210,6 +210,8 @@ class YoutubeDL(object):
|
||||||
download_archive: File name of a file where all downloads are recorded.
|
download_archive: File name of a file where all downloads are recorded.
|
||||||
Videos already present in the file are not downloaded
|
Videos already present in the file are not downloaded
|
||||||
again.
|
again.
|
||||||
|
break_on_existing: Stop the download process after attempting to download a file that's
|
||||||
|
in the archive.
|
||||||
cookiefile: File name where cookies should be read from and dumped to.
|
cookiefile: File name where cookies should be read from and dumped to.
|
||||||
nocheckcertificate:Do not verify SSL certificates
|
nocheckcertificate:Do not verify SSL certificates
|
||||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||||
|
@ -821,31 +823,30 @@ class YoutubeDL(object):
|
||||||
if not ie.suitable(url):
|
if not ie.suitable(url):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ie = self.get_info_extractor(ie.ie_key())
|
ie_key = ie.ie_key()
|
||||||
|
ie = self.get_info_extractor(ie_key)
|
||||||
if not ie.working():
|
if not ie.working():
|
||||||
self.report_warning('The program functionality for this site has been marked as broken, '
|
self.report_warning('The program functionality for this site has been marked as broken, '
|
||||||
'and will probably not work.')
|
'and will probably not work.')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ie_result = ie.extract(url)
|
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
|
||||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
except (AssertionError, IndexError, AttributeError):
|
||||||
break
|
temp_id = None
|
||||||
if isinstance(ie_result, list):
|
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
|
||||||
# Backwards compatibility: old IE result format
|
self.to_screen("[%s] %s: has already been recorded in archive" % (
|
||||||
ie_result = {
|
ie_key, temp_id))
|
||||||
'_type': 'compat_list',
|
break
|
||||||
'entries': ie_result,
|
|
||||||
}
|
return self.__extract_info(url, ie, download, extra_info, process, info_dict)
|
||||||
if info_dict:
|
|
||||||
if info_dict.get('id'):
|
else:
|
||||||
ie_result['id'] = info_dict['id']
|
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||||
if info_dict.get('title'):
|
|
||||||
ie_result['title'] = info_dict['title']
|
def __handle_extraction_exceptions(func):
|
||||||
self.add_default_extra_info(ie_result, ie, url)
|
def wrapper(self, *args, **kwargs):
|
||||||
if process:
|
try:
|
||||||
return self.process_ie_result(ie_result, download, extra_info)
|
return func(self, *args, **kwargs)
|
||||||
else:
|
|
||||||
return ie_result
|
|
||||||
except GeoRestrictedError as e:
|
except GeoRestrictedError as e:
|
||||||
msg = e.msg
|
msg = e.msg
|
||||||
if e.countries:
|
if e.countries:
|
||||||
|
@ -853,20 +854,38 @@ class YoutubeDL(object):
|
||||||
map(ISO3166Utils.short2full, e.countries))
|
map(ISO3166Utils.short2full, e.countries))
|
||||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||||
self.report_error(msg)
|
self.report_error(msg)
|
||||||
break
|
|
||||||
except ExtractorError as e: # An error we somewhat expected
|
except ExtractorError as e: # An error we somewhat expected
|
||||||
self.report_error(compat_str(e), e.format_traceback())
|
self.report_error(compat_str(e), e.format_traceback())
|
||||||
break
|
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.params.get('ignoreerrors', False):
|
if self.params.get('ignoreerrors', False):
|
||||||
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
@__handle_extraction_exceptions
|
||||||
|
def __extract_info(self, url, ie, download, extra_info, process, info_dict):
|
||||||
|
ie_result = ie.extract(url)
|
||||||
|
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||||
|
return
|
||||||
|
if isinstance(ie_result, list):
|
||||||
|
# Backwards compatibility: old IE result format
|
||||||
|
ie_result = {
|
||||||
|
'_type': 'compat_list',
|
||||||
|
'entries': ie_result,
|
||||||
|
}
|
||||||
|
if info_dict:
|
||||||
|
if info_dict.get('id'):
|
||||||
|
ie_result['id'] = info_dict['id']
|
||||||
|
if info_dict.get('title'):
|
||||||
|
ie_result['title'] = info_dict['title']
|
||||||
|
self.add_default_extra_info(ie_result, ie, url)
|
||||||
|
if process:
|
||||||
|
return self.process_ie_result(ie_result, download, extra_info)
|
||||||
else:
|
else:
|
||||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
return ie_result
|
||||||
|
|
||||||
def add_default_extra_info(self, ie_result, ie, url):
|
def add_default_extra_info(self, ie_result, ie, url):
|
||||||
self.add_extra_info(ie_result, {
|
self.add_extra_info(ie_result, {
|
||||||
|
@ -1038,12 +1057,15 @@ class YoutubeDL(object):
|
||||||
|
|
||||||
reason = self._match_entry(entry, incomplete=True)
|
reason = self._match_entry(entry, incomplete=True)
|
||||||
if reason is not None:
|
if reason is not None:
|
||||||
self.to_screen('[download] ' + reason)
|
if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
|
||||||
continue
|
print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.to_screen('[download] ' + reason)
|
||||||
|
continue
|
||||||
|
|
||||||
entry_result = self.process_ie_result(entry,
|
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||||
download=download,
|
# TODO: skip failed (empty) entries?
|
||||||
extra_info=extra)
|
|
||||||
playlist_results.append(entry_result)
|
playlist_results.append(entry_result)
|
||||||
ie_result['entries'] = playlist_results
|
ie_result['entries'] = playlist_results
|
||||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||||
|
@ -1072,6 +1094,11 @@ class YoutubeDL(object):
|
||||||
else:
|
else:
|
||||||
raise Exception('Invalid result type: %s' % result_type)
|
raise Exception('Invalid result type: %s' % result_type)
|
||||||
|
|
||||||
|
@__handle_extraction_exceptions
|
||||||
|
def __process_iterable_entry(self, entry, download, extra_info):
|
||||||
|
return self.process_ie_result(
|
||||||
|
entry, download=download, extra_info=extra_info)
|
||||||
|
|
||||||
def _build_format_filter(self, filter_spec):
|
def _build_format_filter(self, filter_spec):
|
||||||
" Returns a function to filter the formats according to the filter_spec "
|
" Returns a function to filter the formats according to the filter_spec "
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ from .utils import (
|
||||||
write_string,
|
write_string,
|
||||||
render_table,
|
render_table,
|
||||||
)
|
)
|
||||||
from .update import update_self
|
from .update import update_binary
|
||||||
from .downloader import (
|
from .downloader import (
|
||||||
FileDownloader,
|
FileDownloader,
|
||||||
)
|
)
|
||||||
|
@ -405,6 +405,7 @@ def _real_main(argv=None):
|
||||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||||
'age_limit': opts.age_limit,
|
'age_limit': opts.age_limit,
|
||||||
'download_archive': download_archive_fn,
|
'download_archive': download_archive_fn,
|
||||||
|
'break_on_existing': opts.break_on_existing,
|
||||||
'cookiefile': opts.cookiefile,
|
'cookiefile': opts.cookiefile,
|
||||||
'nocheckcertificate': opts.no_check_certificate,
|
'nocheckcertificate': opts.no_check_certificate,
|
||||||
'prefer_insecure': opts.prefer_insecure,
|
'prefer_insecure': opts.prefer_insecure,
|
||||||
|
@ -454,7 +455,8 @@ def _real_main(argv=None):
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
# Update version
|
# Update version
|
||||||
if opts.update_self:
|
if opts.update_self:
|
||||||
update_self(ydl.to_screen, opts.verbose, ydl._opener)
|
# update_self(ydl.to_screen, opts.verbose, ydl._opener)
|
||||||
|
update_binary(ydl)
|
||||||
|
|
||||||
# Remove cache dir
|
# Remove cache dir
|
||||||
if opts.rm_cachedir:
|
if opts.rm_cachedir:
|
||||||
|
|
|
@ -2345,7 +2345,7 @@ except ImportError: # Python <3.4
|
||||||
|
|
||||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||||
# and uniform cross-version exceptiong handling
|
# and uniform cross-version exception handling
|
||||||
class compat_HTMLParseError(Exception):
|
class compat_HTMLParseError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -364,11 +364,14 @@ class FileDownloader(object):
|
||||||
else '%.2f' % sleep_interval))
|
else '%.2f' % sleep_interval))
|
||||||
time.sleep(sleep_interval)
|
time.sleep(sleep_interval)
|
||||||
else:
|
else:
|
||||||
sleep_interval_sub = self.params.get('sleep_interval_subtitles')
|
sleep_interval_sub = 0
|
||||||
self.to_screen(
|
if type(self.params.get('sleep_interval_subtitles')) is int:
|
||||||
'[download] Sleeping %s seconds...' % (
|
sleep_interval_sub = self.params.get('sleep_interval_subtitles')
|
||||||
int(sleep_interval_sub)))
|
if sleep_interval_sub > 0:
|
||||||
time.sleep(sleep_interval_sub)
|
self.to_screen(
|
||||||
|
'[download] Sleeping %s seconds...' % (
|
||||||
|
sleep_interval_sub))
|
||||||
|
time.sleep(sleep_interval_sub)
|
||||||
return self.real_download(filename, info_dict)
|
return self.real_download(filename, info_dict)
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
|
|
|
@ -115,8 +115,10 @@ class CurlFD(ExternalFD):
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||||
for key, val in info_dict['http_headers'].items():
|
if info_dict.get('http_headers') is not None:
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
|
||||||
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
||||||
cmd += self._valueless_option('--silent', 'noprogress')
|
cmd += self._valueless_option('--silent', 'noprogress')
|
||||||
cmd += self._valueless_option('--verbose', 'verbose')
|
cmd += self._valueless_option('--verbose', 'verbose')
|
||||||
|
@ -150,8 +152,9 @@ class AxelFD(ExternalFD):
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-o', tmpfilename]
|
cmd = [self.exe, '-o', tmpfilename]
|
||||||
for key, val in info_dict['http_headers'].items():
|
if info_dict.get('http_headers') is not None:
|
||||||
cmd += ['-H', '%s: %s' % (key, val)]
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['-H', '%s: %s' % (key, val)]
|
||||||
cmd += self._configuration_args()
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
@ -162,8 +165,9 @@ class WgetFD(ExternalFD):
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||||
for key, val in info_dict['http_headers'].items():
|
if info_dict.get('http_headers') is not None:
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--limit-rate', 'ratelimit')
|
cmd += self._option('--limit-rate', 'ratelimit')
|
||||||
retry = self._option('--tries', 'retries')
|
retry = self._option('--tries', 'retries')
|
||||||
if len(retry) == 2:
|
if len(retry) == 2:
|
||||||
|
@ -189,8 +193,9 @@ class Aria2cFD(ExternalFD):
|
||||||
if dn:
|
if dn:
|
||||||
cmd += ['--dir', dn]
|
cmd += ['--dir', dn]
|
||||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||||
for key, val in info_dict['http_headers'].items():
|
if info_dict.get('http_headers') is not None:
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
cmd += self._option('--all-proxy', 'proxy')
|
cmd += self._option('--all-proxy', 'proxy')
|
||||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||||
|
@ -206,8 +211,10 @@ class HttpieFD(ExternalFD):
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||||
for key, val in info_dict['http_headers'].items():
|
|
||||||
cmd += ['%s:%s' % (key, val)]
|
if info_dict.get('http_headers') is not None:
|
||||||
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['%s:%s' % (key, val)]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
|
@ -253,7 +260,7 @@ class FFmpegFD(ExternalFD):
|
||||||
# if end_time:
|
# if end_time:
|
||||||
# args += ['-t', compat_str(end_time - start_time)]
|
# args += ['-t', compat_str(end_time - start_time)]
|
||||||
|
|
||||||
if info_dict['http_headers'] and re.match(r'^https?://', url):
|
if info_dict.get('http_headers') is not None and re.match(r'^https?://', url):
|
||||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||||
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
||||||
|
|
|
@ -97,12 +97,15 @@ class FragmentFD(FileDownloader):
|
||||||
|
|
||||||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
||||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
||||||
success = ctx['dl'].download(fragment_filename, {
|
fragment_info_dict = {
|
||||||
'url': frag_url,
|
'url': frag_url,
|
||||||
'http_headers': headers or info_dict.get('http_headers'),
|
'http_headers': headers or info_dict.get('http_headers'),
|
||||||
})
|
}
|
||||||
|
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
||||||
if not success:
|
if not success:
|
||||||
return False, None
|
return False, None
|
||||||
|
if fragment_info_dict.get('filetime'):
|
||||||
|
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
|
||||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||||
frag_content = down.read()
|
frag_content = down.read()
|
||||||
|
@ -258,6 +261,13 @@ class FragmentFD(FileDownloader):
|
||||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||||
else:
|
else:
|
||||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||||
|
if self.params.get('updatetime', True):
|
||||||
|
filetime = ctx.get('fragment_filetime')
|
||||||
|
if filetime:
|
||||||
|
try:
|
||||||
|
os.utime(ctx['filename'], (time.time(), filetime))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
|
|
@ -109,7 +109,9 @@ class HttpFD(FileDownloader):
|
||||||
try:
|
try:
|
||||||
ctx.data = self.ydl.urlopen(request)
|
ctx.data = self.ydl.urlopen(request)
|
||||||
except (compat_urllib_error.URLError, ) as err:
|
except (compat_urllib_error.URLError, ) as err:
|
||||||
if isinstance(err.reason, socket.timeout):
|
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
|
||||||
|
reason = getattr(err, 'reason', None)
|
||||||
|
if isinstance(reason, socket.timeout):
|
||||||
raise RetryDownload(err)
|
raise RetryDownload(err)
|
||||||
raise err
|
raise err
|
||||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||||
|
|
|
@ -82,7 +82,10 @@ class YoutubeLiveChatReplayFD(FragmentFD):
|
||||||
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
||||||
processed_fragment.extend(
|
processed_fragment.extend(
|
||||||
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||||
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
|
try:
|
||||||
|
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
|
||||||
|
except KeyError:
|
||||||
|
continuation_id = None
|
||||||
|
|
||||||
self._append_fragment(ctx, processed_fragment)
|
self._append_fragment(ctx, processed_fragment)
|
||||||
|
|
||||||
|
|
|
@ -275,7 +275,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
||||||
if video_element is None or video_element.text is None:
|
if video_element is None or video_element.text is None:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Video %s video does not exist' % video_id, expected=True)
|
'Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
video_url = video_element.text.strip()
|
video_url = video_element.text.strip()
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,103 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
from .vimeo import VimeoIE
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AmaraIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Youtube
|
||||||
|
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||||
|
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'h6ZuVdvYnfE',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Why jury trials are becoming less common',
|
||||||
|
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20160813',
|
||||||
|
'uploader': 'PBS NewsHour',
|
||||||
|
'uploader_id': 'PBSNewsHour',
|
||||||
|
'timestamp': 1549639570,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Vimeo
|
||||||
|
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||||
|
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '18622084',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Vimeo at CES 2011!',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'timestamp': 1294763658,
|
||||||
|
'upload_date': '20110111',
|
||||||
|
'uploader': 'Sam Morrill',
|
||||||
|
'uploader_id': 'sammorrill'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Direct Link
|
||||||
|
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||||
|
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's8KL7I3jLmh6',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The danger of a single story',
|
||||||
|
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20091007',
|
||||||
|
'timestamp': 1254942511,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
meta = self._download_json(
|
||||||
|
'https://amara.org/api/videos/%s/' % video_id,
|
||||||
|
video_id, query={'format': 'json'})
|
||||||
|
title = meta['title']
|
||||||
|
video_url = meta['all_urls'][0]
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for language in (meta.get('languages') or []):
|
||||||
|
subtitles_uri = language.get('subtitles_uri')
|
||||||
|
if not (subtitles_uri and language.get('published')):
|
||||||
|
continue
|
||||||
|
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||||
|
for f in ('json', 'srt', 'vtt'):
|
||||||
|
subtitle.append({
|
||||||
|
'ext': f,
|
||||||
|
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||||
|
})
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'url': video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'title': title,
|
||||||
|
'description': meta.get('description'),
|
||||||
|
'thumbnail': meta.get('thumbnail'),
|
||||||
|
'duration': int_or_none(meta.get('duration')),
|
||||||
|
'timestamp': parse_iso8601(meta.get('created')),
|
||||||
|
}
|
||||||
|
|
||||||
|
for ie in (YoutubeIE, VimeoIE):
|
||||||
|
if ie.suitable(video_url):
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': ie.ie_key(),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
|
||||||
|
return info
|
|
@ -4,23 +4,57 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
# There are different sources of video in arte.tv, the extraction process
|
|
||||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
|
||||||
# add tests.
|
|
||||||
|
|
||||||
|
|
||||||
class ArteTVBaseIE(InfoExtractor):
|
class ArteTVBaseIE(InfoExtractor):
|
||||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
|
||||||
info = self._download_json(json_url, video_id)
|
_API_BASE = 'https://api.arte.tv/api/player/v1'
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVIE(ArteTVBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
|
||||||
|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
|
||||||
|
)
|
||||||
|
/(?P<id>\d{6}-\d{3}-[AF])
|
||||||
|
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '088501-000-A',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Mexico: Stealing Petrol to Survive',
|
||||||
|
'upload_date': '20190628',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
|
||||||
player_info = info['videoJsonPlayer']
|
player_info = info['videoJsonPlayer']
|
||||||
|
|
||||||
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
||||||
|
@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
if not upload_date_str:
|
if not upload_date_str:
|
||||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||||
|
|
||||||
title = (player_info.get('VTI') or title or player_info['VID']).strip()
|
title = (player_info.get('VTI') or player_info['VID']).strip()
|
||||||
subtitle = player_info.get('VSU', '').strip()
|
subtitle = player_info.get('VSU', '').strip()
|
||||||
if subtitle:
|
if subtitle:
|
||||||
title += ' - %s' % subtitle
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
info_dict = {
|
|
||||||
'id': player_info['VID'],
|
|
||||||
'title': title,
|
|
||||||
'description': player_info.get('VDE'),
|
|
||||||
'upload_date': unified_strdate(upload_date_str),
|
|
||||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
|
||||||
}
|
|
||||||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
||||||
|
|
||||||
LANGS = {
|
LANGS = {
|
||||||
|
@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_dict in vsr.items():
|
for format_id, format_dict in vsr.items():
|
||||||
f = dict(format_dict)
|
f = dict(format_dict)
|
||||||
|
format_url = url_or_none(f.get('url'))
|
||||||
|
streamer = f.get('streamer')
|
||||||
|
if not format_url and not streamer:
|
||||||
|
continue
|
||||||
versionCode = f.get('versionCode')
|
versionCode = f.get('versionCode')
|
||||||
l = re.escape(langcode)
|
l = re.escape(langcode)
|
||||||
|
|
||||||
|
@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
else:
|
else:
|
||||||
lang_pref = -1
|
lang_pref = -1
|
||||||
|
|
||||||
|
media_type = f.get('mediaType')
|
||||||
|
if media_type == 'hls':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False)
|
||||||
|
for m3u8_format in m3u8_formats:
|
||||||
|
m3u8_format['language_preference'] = lang_pref
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
continue
|
||||||
|
|
||||||
format = {
|
format = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
||||||
|
@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
'quality': qfunc(f.get('quality')),
|
'quality': qfunc(f.get('quality')),
|
||||||
}
|
}
|
||||||
|
|
||||||
if f.get('mediaType') == 'rtmp':
|
if media_type == 'rtmp':
|
||||||
format['url'] = f['streamer']
|
format['url'] = f['streamer']
|
||||||
format['play_path'] = 'mp4:' + f['url']
|
format['play_path'] = 'mp4:' + f['url']
|
||||||
format['ext'] = 'flv'
|
format['ext'] = 'flv'
|
||||||
|
@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
|
|
||||||
formats.append(format)
|
formats.append(format)
|
||||||
|
|
||||||
self._check_formats(formats, video_id)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info_dict['formats'] = formats
|
return {
|
||||||
return info_dict
|
'id': player_info.get('VID') or video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': player_info.get('VDE'),
|
||||||
|
'upload_date': unified_strdate(upload_date_str),
|
||||||
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
class ArteTVEmbedIE(InfoExtractor):
|
||||||
IE_NAME = 'arte.tv:+7'
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '088501-000-A',
|
'id': '100605-013-A',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Mexico: Stealing Petrol to Survive',
|
'title': 'United we Stream November Lockdown Edition #13',
|
||||||
'upload_date': '20190628',
|
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
|
||||||
|
'upload_date': '20201116',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
@staticmethod
|
||||||
lang, video_id = re.match(self._VALID_URL, url).groups()
|
def _extract_urls(webpage):
|
||||||
return self._extract_from_json_url(
|
return [url for _, url in re.findall(
|
||||||
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
|
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
|
||||||
video_id, lang)
|
webpage)]
|
||||||
|
|
||||||
|
|
||||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
|
||||||
IE_NAME = 'arte.tv:embed'
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https://www\.arte\.tv
|
|
||||||
/player/v3/index\.php\?json_url=
|
|
||||||
(?P<json_url>
|
|
||||||
https?://api\.arte\.tv/api/player/v1/config/
|
|
||||||
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
|
|
||||||
)
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TESTS = []
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
return self._extract_from_json_url(json_url, video_id, lang)
|
json_url = qs['json_url'][0]
|
||||||
|
video_id = ArteTVIE._match_id(json_url)
|
||||||
|
return self.url_result(
|
||||||
|
json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
|
|
||||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||||
IE_NAME = 'arte.tv:playlist'
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
collection = self._download_json(
|
collection = self._download_json(
|
||||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
'%s/collectionData/%s/%s?source=videos'
|
||||||
% (lang, playlist_id), playlist_id)
|
% (self._API_BASE, lang, playlist_id), playlist_id)
|
||||||
|
entries = []
|
||||||
|
for video in collection['videos']:
|
||||||
|
if not isinstance(video, dict):
|
||||||
|
continue
|
||||||
|
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
video_id = video.get('programId')
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'title': video.get('title'),
|
||||||
|
'alt_title': video.get('subtitle'),
|
||||||
|
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
|
||||||
|
'duration': int_or_none(video.get('durationSeconds')),
|
||||||
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
'ie_key': ArteTVIE.ie_key(),
|
||||||
|
})
|
||||||
title = collection.get('title')
|
title = collection.get('title')
|
||||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||||
entries = [
|
|
||||||
self._extract_from_json_url(
|
|
||||||
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
|
||||||
for video in collection['videos'] if video.get('jsonUrl')]
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import random
|
import random
|
||||||
|
@ -5,10 +6,7 @@ import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
@ -17,71 +15,32 @@ from ..utils import (
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
|
||||||
update_url_query,
|
update_url_query,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BandcampBaseIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
"""Provide base functions for Bandcamp extractors"""
|
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
def _extract_json_from_html_data_attribute(self, webpage, suffix, video_id):
|
|
||||||
json_string = self._html_search_regex(
|
|
||||||
r' data-%s="([^"]*)' % suffix,
|
|
||||||
webpage, '%s json' % suffix, default='{}')
|
|
||||||
|
|
||||||
return self._parse_json(json_string, video_id)
|
|
||||||
|
|
||||||
def _parse_json_track(self, json):
|
|
||||||
formats = []
|
|
||||||
file_ = json.get('file')
|
|
||||||
if isinstance(file_, dict):
|
|
||||||
for format_id, format_url in file_.items():
|
|
||||||
if not url_or_none(format_url):
|
|
||||||
continue
|
|
||||||
ext, abr_str = format_id.split('-', 1)
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': self._proto_relative_url(format_url, 'http:'),
|
|
||||||
'ext': ext,
|
|
||||||
'vcodec': 'none',
|
|
||||||
'acodec': ext,
|
|
||||||
'abr': int_or_none(abr_str),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'duration': float_or_none(json.get('duration')),
|
|
||||||
'id': str_or_none(json.get('track_id') or json.get('id')),
|
|
||||||
'title': json.get('title'),
|
|
||||||
'title_link': json.get('title_link'),
|
|
||||||
'number': int_or_none(json.get('track_num')),
|
|
||||||
'formats': formats
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(BandcampBaseIE):
|
|
||||||
IE_NAME = "Bandcamp:track"
|
|
||||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||||
'md5': 'c557841d5e50261777a6585648adf439',
|
'md5': 'c557841d5e50261777a6585648adf439',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1812978515',
|
'id': '1812978515',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
|
||||||
'duration': 9.8485,
|
'duration': 9.8485,
|
||||||
'uploader': "youtube-dl \"'/\\\u00e4\u21ad",
|
'uploader': 'youtube-dl "\'/\\ä↭',
|
||||||
'timestamp': 1354224127,
|
|
||||||
'upload_date': '20121129',
|
'upload_date': '20121129',
|
||||||
|
'timestamp': 1354224127,
|
||||||
},
|
},
|
||||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||||
}, {
|
}, {
|
||||||
# free download
|
# free download
|
||||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||||
'md5': '5d92af55811e47f38962a54c30b07ef0',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2650410135',
|
'id': '2650410135',
|
||||||
'ext': 'aiff',
|
'ext': 'aiff',
|
||||||
|
@ -120,52 +79,59 @@ class BandcampIE(BandcampBaseIE):
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
|
||||||
|
return self._parse_json(self._html_search_regex(
|
||||||
|
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
|
||||||
|
attr + ' data', group=2), video_id, fatal=fatal)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
title = self._match_id(url)
|
||||||
title = mobj.group('title')
|
|
||||||
url_track_title = title
|
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
tralbum = self._extract_data_attr(webpage, title)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", url_track_title)
|
track_id = None
|
||||||
json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", url_track_title)
|
track = None
|
||||||
|
track_number = None
|
||||||
|
duration = None
|
||||||
|
|
||||||
json_tracks = json_tralbum.get('trackinfo')
|
formats = []
|
||||||
if not json_tracks:
|
track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
|
||||||
raise ExtractorError('Could not extract track')
|
if track_info:
|
||||||
|
file_ = track_info.get('file')
|
||||||
|
if isinstance(file_, dict):
|
||||||
|
for format_id, format_url in file_.items():
|
||||||
|
if not url_or_none(format_url):
|
||||||
|
continue
|
||||||
|
ext, abr_str = format_id.split('-', 1)
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': self._proto_relative_url(format_url, 'http:'),
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': ext,
|
||||||
|
'abr': int_or_none(abr_str),
|
||||||
|
})
|
||||||
|
track = track_info.get('title')
|
||||||
|
track_id = str_or_none(
|
||||||
|
track_info.get('track_id') or track_info.get('id'))
|
||||||
|
track_number = int_or_none(track_info.get('track_num'))
|
||||||
|
duration = float_or_none(track_info.get('duration'))
|
||||||
|
|
||||||
track = self._parse_json_track(json_tracks[0])
|
embed = self._extract_data_attr(webpage, title, 'embed', False)
|
||||||
artist = json_tralbum.get('artist')
|
current = tralbum.get('current') or {}
|
||||||
album_title = json_embed.get('album_title')
|
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
|
||||||
|
timestamp = unified_timestamp(
|
||||||
|
current.get('publish_date') or tralbum.get('album_publish_date'))
|
||||||
|
|
||||||
json_album = json_tralbum.get('packages')
|
download_link = tralbum.get('freeDownloadPage')
|
||||||
if json_album:
|
|
||||||
json_album = json_album[0]
|
|
||||||
album_publish_date = json_album.get('album_publish_date')
|
|
||||||
album_release_date = json_album.get('album_release_date')
|
|
||||||
else:
|
|
||||||
album_publish_date = None
|
|
||||||
album_release_date = json_tralbum.get('album_release_date')
|
|
||||||
|
|
||||||
timestamp = unified_timestamp(json_tralbum.get('current', {}).get('publish_date') or album_publish_date)
|
|
||||||
release_date = unified_strdate(album_release_date)
|
|
||||||
|
|
||||||
download_link = self._search_regex(
|
|
||||||
r'freeDownloadPage(?:["\']|"):\s*(["\']|")(?P<url>(?:(?!\1).)+)\1', webpage,
|
|
||||||
'download link', default=None, group='url')
|
|
||||||
if download_link:
|
if download_link:
|
||||||
track_id = self._search_regex(
|
track_id = compat_str(tralbum['id'])
|
||||||
r'\?id=(?P<id>\d+)&',
|
|
||||||
download_link, 'track id')
|
|
||||||
|
|
||||||
download_webpage = self._download_webpage(
|
download_webpage = self._download_webpage(
|
||||||
download_link, track_id, 'Downloading free downloads page')
|
download_link, track_id, 'Downloading free downloads page')
|
||||||
|
|
||||||
blob = self._parse_json(
|
blob = self._extract_data_attr(download_webpage, track_id, 'blob')
|
||||||
self._search_regex(
|
|
||||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
|
||||||
'blob', group='blob'),
|
|
||||||
track_id, transform_source=unescapeHTML)
|
|
||||||
|
|
||||||
info = try_get(
|
info = try_get(
|
||||||
blob, (lambda x: x['digital_items'][0],
|
blob, (lambda x: x['digital_items'][0],
|
||||||
|
@ -173,6 +139,8 @@ class BandcampIE(BandcampBaseIE):
|
||||||
if info:
|
if info:
|
||||||
downloads = info.get('downloads')
|
downloads = info.get('downloads')
|
||||||
if isinstance(downloads, dict):
|
if isinstance(downloads, dict):
|
||||||
|
if not track:
|
||||||
|
track = info.get('title')
|
||||||
if not artist:
|
if not artist:
|
||||||
artist = info.get('artist')
|
artist = info.get('artist')
|
||||||
if not thumbnail:
|
if not thumbnail:
|
||||||
|
@ -206,7 +174,7 @@ class BandcampIE(BandcampBaseIE):
|
||||||
retry_url = url_or_none(stat.get('retry_url'))
|
retry_url = url_or_none(stat.get('retry_url'))
|
||||||
if not retry_url:
|
if not retry_url:
|
||||||
continue
|
continue
|
||||||
track['formats'].append({
|
formats.append({
|
||||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||||
'ext': download_formats.get(format_id),
|
'ext': download_formats.get(format_id),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
@ -215,30 +183,34 @@ class BandcampIE(BandcampBaseIE):
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(track['formats'])
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = '%s - %s' % (artist, track.get('title')) if artist else track.get('title')
|
title = '%s - %s' % (artist, track) if artist else track
|
||||||
|
|
||||||
|
if not duration:
|
||||||
|
duration = float_or_none(self._html_search_meta(
|
||||||
|
'duration', webpage, default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'album': album_title,
|
'id': track_id,
|
||||||
'artist': artist,
|
|
||||||
'duration': track['duration'],
|
|
||||||
'formats': track['formats'],
|
|
||||||
'id': track['id'],
|
|
||||||
'release_date': release_date,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'track': track['title'],
|
'thumbnail': thumbnail,
|
||||||
'track_id': track['id'],
|
'uploader': artist,
|
||||||
'track_number': track['number'],
|
'timestamp': timestamp,
|
||||||
'uploader': artist
|
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
||||||
|
'duration': duration,
|
||||||
|
'track': track,
|
||||||
|
'track_number': track_number,
|
||||||
|
'track_id': track_id,
|
||||||
|
'artist': artist,
|
||||||
|
'album': embed.get('album_title'),
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BandcampAlbumIE(BandcampBaseIE):
|
class BandcampAlbumIE(BandcampIE):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
|
@ -248,7 +220,10 @@ class BandcampAlbumIE(BandcampBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1353101989',
|
'id': '1353101989',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Intro',
|
'title': 'Blazo - Intro',
|
||||||
|
'timestamp': 1311756226,
|
||||||
|
'upload_date': '20110727',
|
||||||
|
'uploader': 'Blazo',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -256,7 +231,10 @@ class BandcampAlbumIE(BandcampBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '38097443',
|
'id': '38097443',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Kero One - Keep It Alive (Blazo remix)',
|
'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
|
||||||
|
'timestamp': 1311757238,
|
||||||
|
'upload_date': '20110727',
|
||||||
|
'uploader': 'Blazo',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -292,6 +270,7 @@ class BandcampAlbumIE(BandcampBaseIE):
|
||||||
'title': '"Entropy" EP',
|
'title': '"Entropy" EP',
|
||||||
'uploader_id': 'jstrecords',
|
'uploader_id': 'jstrecords',
|
||||||
'id': 'entropy-ep',
|
'id': 'entropy-ep',
|
||||||
|
'description': 'md5:0ff22959c943622972596062f2f366a5',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
}, {
|
}, {
|
||||||
|
@ -301,6 +280,7 @@ class BandcampAlbumIE(BandcampBaseIE):
|
||||||
'id': 'we-are-the-plague',
|
'id': 'we-are-the-plague',
|
||||||
'title': 'WE ARE THE PLAGUE',
|
'title': 'WE ARE THE PLAGUE',
|
||||||
'uploader_id': 'insulters',
|
'uploader_id': 'insulters',
|
||||||
|
'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
|
@ -312,41 +292,34 @@ class BandcampAlbumIE(BandcampBaseIE):
|
||||||
else super(BandcampAlbumIE, cls).suitable(url))
|
else super(BandcampAlbumIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
uploader_id, album_id = re.match(self._VALID_URL, url).groups()
|
||||||
uploader_id = mobj.group('subdomain')
|
|
||||||
album_id = mobj.group('album_id')
|
|
||||||
playlist_id = album_id or uploader_id
|
playlist_id = album_id or uploader_id
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
tralbum = self._extract_data_attr(webpage, playlist_id)
|
||||||
json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", playlist_id)
|
track_info = tralbum.get('trackinfo')
|
||||||
json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", playlist_id)
|
if not track_info:
|
||||||
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
json_tracks = json_tralbum.get('trackinfo')
|
|
||||||
if not json_tracks:
|
|
||||||
raise ExtractorError('Could not extract album tracks')
|
|
||||||
|
|
||||||
album_title = json_embed.get('album_title')
|
|
||||||
|
|
||||||
# Only tracks with duration info have songs
|
# Only tracks with duration info have songs
|
||||||
tracks = [self._parse_json_track(track) for track in json_tracks]
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
compat_urlparse.urljoin(url, track['title_link']),
|
urljoin(url, t['title_link']), BandcampIE.ie_key(),
|
||||||
ie=BandcampIE.ie_key(), video_id=track['id'],
|
str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
|
||||||
video_title=track['title'])
|
for t in track_info
|
||||||
for track in tracks
|
if t.get('duration')]
|
||||||
if track.get('duration')]
|
|
||||||
|
current = tralbum.get('current') or {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'title': album_title,
|
'title': current.get('title'),
|
||||||
'entries': entries
|
'description': current.get('about'),
|
||||||
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BandcampWeeklyIE(InfoExtractor):
|
class BandcampWeeklyIE(BandcampIE):
|
||||||
IE_NAME = 'Bandcamp:weekly'
|
IE_NAME = 'Bandcamp:weekly'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -361,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||||
'release_date': '20170404',
|
'release_date': '20170404',
|
||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
'episode': 'Magic Moments',
|
'episode': 'Magic Moments',
|
||||||
'episode_number': 208,
|
|
||||||
'episode_id': '224',
|
'episode_id': '224',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'opus-lo',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
show_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, show_id)
|
||||||
|
|
||||||
blob = self._parse_json(
|
blob = self._extract_data_attr(webpage, show_id, 'blob')
|
||||||
self._search_regex(
|
|
||||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
|
||||||
'blob', group='blob'),
|
|
||||||
video_id, transform_source=unescapeHTML)
|
|
||||||
|
|
||||||
show = blob['bcw_show']
|
show = blob['bcw_data'][show_id]
|
||||||
|
|
||||||
# This is desired because any invalid show id redirects to `bandcamp.com`
|
|
||||||
# which happens to expose the latest Bandcamp Weekly episode.
|
|
||||||
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in show['audio_stream'].items():
|
for format_id, format_url in show['audio_stream'].items():
|
||||||
|
@ -408,20 +375,8 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||||
if subtitle:
|
if subtitle:
|
||||||
title += ' - %s' % subtitle
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
episode_number = None
|
|
||||||
seq = blob.get('bcw_seq')
|
|
||||||
|
|
||||||
if seq and isinstance(seq, list):
|
|
||||||
try:
|
|
||||||
episode_number = next(
|
|
||||||
int_or_none(e.get('episode_number'))
|
|
||||||
for e in seq
|
|
||||||
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
|
||||||
except StopIteration:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': show_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': show.get('desc') or show.get('short_desc'),
|
'description': show.get('desc') or show.get('short_desc'),
|
||||||
'duration': float_or_none(show.get('audio_duration')),
|
'duration': float_or_none(show.get('audio_duration')),
|
||||||
|
@ -429,7 +384,6 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||||
'release_date': unified_strdate(show.get('published_date')),
|
'release_date': unified_strdate(show.get('published_date')),
|
||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
'episode': show.get('subtitle'),
|
'episode': show.get('subtitle'),
|
||||||
'episode_number': episode_number,
|
'episode_id': show_id,
|
||||||
'episode_id': compat_str(video_id),
|
|
||||||
'formats': formats
|
'formats': formats
|
||||||
}
|
}
|
||||||
|
|
|
@ -981,7 +981,7 @@ class BBCIE(BBCCoUkIE):
|
||||||
group_id = self._search_regex(
|
group_id = self._search_regex(
|
||||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||||
webpage, 'group id', default=None)
|
webpage, 'group id', default=None)
|
||||||
if playlist_id:
|
if group_id:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||||
ie=BBCCoUkIE.ie_key())
|
ie=BBCCoUkIE.ie_key())
|
||||||
|
@ -1092,10 +1092,26 @@ class BBCIE(BBCCoUkIE):
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||||
'bbcthree config', default='{}'),
|
'bbcthree config', default='{}'),
|
||||||
playlist_id, transform_source=js_to_json, fatal=False)
|
playlist_id, transform_source=js_to_json, fatal=False) or {}
|
||||||
if bbc3_config:
|
payload = bbc3_config.get('payload') or {}
|
||||||
|
if payload:
|
||||||
|
clip = payload.get('currentClip') or {}
|
||||||
|
clip_vpid = clip.get('vpid')
|
||||||
|
clip_title = clip.get('title')
|
||||||
|
if clip_vpid and clip_title:
|
||||||
|
formats, subtitles = self._download_media_selector(clip_vpid)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': clip_vpid,
|
||||||
|
'title': clip_title,
|
||||||
|
'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
|
||||||
|
'description': clip.get('description'),
|
||||||
|
'duration': parse_duration(clip.get('duration')),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
bbc3_playlist = try_get(
|
bbc3_playlist = try_get(
|
||||||
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
|
payload, lambda x: x['content']['bbcMedia']['playlist'],
|
||||||
dict)
|
dict)
|
||||||
if bbc3_playlist:
|
if bbc3_playlist:
|
||||||
playlist_title = bbc3_playlist.get('title') or playlist_title
|
playlist_title = bbc3_playlist.get('title') or playlist_title
|
||||||
|
@ -1118,6 +1134,39 @@ class BBCIE(BBCCoUkIE):
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist_title, playlist_description)
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
initial_data = self._parse_json(self._search_regex(
|
||||||
|
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||||
|
'preload state', default='{}'), playlist_id, fatal=False)
|
||||||
|
if initial_data:
|
||||||
|
def parse_media(media):
|
||||||
|
if not media:
|
||||||
|
return
|
||||||
|
for item in (try_get(media, lambda x: x['media']['items'], list) or []):
|
||||||
|
item_id = item.get('id')
|
||||||
|
item_title = item.get('title')
|
||||||
|
if not (item_id and item_title):
|
||||||
|
continue
|
||||||
|
formats, subtitles = self._download_media_selector(item_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entries.append({
|
||||||
|
'id': item_id,
|
||||||
|
'title': item_title,
|
||||||
|
'thumbnail': item.get('holdingImageUrl'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
for resp in (initial_data.get('data') or {}).values():
|
||||||
|
name = resp.get('name')
|
||||||
|
if name == 'media-experience':
|
||||||
|
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||||
|
elif name == 'article':
|
||||||
|
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
||||||
|
if block.get('type') != 'media':
|
||||||
|
continue
|
||||||
|
parse_media(block.get('model'))
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def extract_all(pattern):
|
def extract_all(pattern):
|
||||||
return list(filter(None, map(
|
return list(filter(None, map(
|
||||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
|
|
|
@ -36,6 +36,14 @@ class BitChuteIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>%s)' % BitChuteIE._VALID_URL,
|
||||||
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class BitwaveReplayIE(InfoExtractor):
|
||||||
|
IE_NAME = 'bitwave:replay'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<user>\w+)/replay/(?P<id>\w+)/?$'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://bitwave.tv/RhythmicCarnage/replay/z4P6eq5L7WDrM85UCrVr',
|
||||||
|
'only_matching': True
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
replay_id = self._match_id(url)
|
||||||
|
replay = self._download_json(
|
||||||
|
'https://api.bitwave.tv/v1/replays/' + replay_id,
|
||||||
|
replay_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': replay_id,
|
||||||
|
'title': replay['data']['title'],
|
||||||
|
'uploader': replay['data']['name'],
|
||||||
|
'uploader_id': replay['data']['name'],
|
||||||
|
'url': replay['data']['url'],
|
||||||
|
'thumbnails': [
|
||||||
|
{'url': x} for x in replay['data']['thumbnails']
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BitwaveStreamIE(InfoExtractor):
|
||||||
|
IE_NAME = 'bitwave:stream'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<id>\w+)/?$'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://bitwave.tv/doomtube',
|
||||||
|
'only_matching': True
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
username = self._match_id(url)
|
||||||
|
channel = self._download_json(
|
||||||
|
'https://api.bitwave.tv/v1/channels/' + username,
|
||||||
|
username)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
channel['data']['url'], username,
|
||||||
|
'mp4')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': username,
|
||||||
|
'title': self._live_title(channel['data']['title']),
|
||||||
|
'uploader': username,
|
||||||
|
'uploader_id': username,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': channel['data']['thumbnail'],
|
||||||
|
'is_live': True,
|
||||||
|
'view_count': channel['data']['viewCount']
|
||||||
|
}
|
|
@ -0,0 +1,98 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_iso8601,
|
||||||
|
# try_get,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BoxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||||
|
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '510727257538',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
|
||||||
|
'uploader': 'MLS Video',
|
||||||
|
'timestamp': 1566320259,
|
||||||
|
'upload_date': '20190820',
|
||||||
|
'uploader_id': '235196876',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
shared_name, file_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
webpage = self._download_webpage(url, file_id)
|
||||||
|
request_token = self._parse_json(self._search_regex(
|
||||||
|
r'Box\.config\s*=\s*({.+?});', webpage,
|
||||||
|
'Box config'), file_id)['requestToken']
|
||||||
|
access_token = self._download_json(
|
||||||
|
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||||
|
'Downloading token JSON metadata',
|
||||||
|
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Request-Token': request_token,
|
||||||
|
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||||
|
})[file_id]['read']
|
||||||
|
shared_link = 'https://app.box.com/s/' + shared_name
|
||||||
|
f = self._download_json(
|
||||||
|
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||||
|
'Downloading file JSON metadata', headers={
|
||||||
|
'Authorization': 'Bearer ' + access_token,
|
||||||
|
'BoxApi': 'shared_link=' + shared_link,
|
||||||
|
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
|
||||||
|
}, query={
|
||||||
|
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
|
||||||
|
})
|
||||||
|
title = f['name']
|
||||||
|
|
||||||
|
query = {
|
||||||
|
'access_token': access_token,
|
||||||
|
'shared_link': shared_link
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
|
||||||
|
# entry_url_template = try_get(
|
||||||
|
# entry, lambda x: x['content']['url_template'])
|
||||||
|
# if not entry_url_template:
|
||||||
|
# continue
|
||||||
|
# representation = entry.get('representation')
|
||||||
|
# if representation == 'dash':
|
||||||
|
# TODO: append query to every fragment URL
|
||||||
|
# formats.extend(self._extract_mpd_formats(
|
||||||
|
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
|
||||||
|
# file_id, query=query))
|
||||||
|
|
||||||
|
authenticated_download_url = f.get('authenticated_download_url')
|
||||||
|
if authenticated_download_url and f.get('is_download_available'):
|
||||||
|
formats.append({
|
||||||
|
'ext': f.get('extension') or determine_ext(title),
|
||||||
|
'filesize': f.get('size'),
|
||||||
|
'format_id': 'download',
|
||||||
|
'url': update_url_query(authenticated_download_url, query),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
creator = f.get('created_by') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': file_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': f.get('description') or None,
|
||||||
|
'uploader': creator.get('name'),
|
||||||
|
'timestamp': parse_iso8601(f.get('created_at')),
|
||||||
|
'uploader_id': creator.get('id'),
|
||||||
|
}
|
|
@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_brighcove_url(cls, object_str):
|
def _build_brightcove_url(cls, object_str):
|
||||||
"""
|
"""
|
||||||
Build a Brightcove url from a xml string containing
|
Build a Brightcove url from a xml string containing
|
||||||
<object class="BrightcoveExperience">{params}</object>
|
<object class="BrightcoveExperience">{params}</object>
|
||||||
|
@ -217,7 +217,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||||
return cls._make_brightcove_url(params)
|
return cls._make_brightcove_url(params)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_brighcove_url_from_js(cls, object_js):
|
def _build_brightcove_url_from_js(cls, object_js):
|
||||||
# The layout of JS is as follows:
|
# The layout of JS is as follows:
|
||||||
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
||||||
# // build Brightcove <object /> XML
|
# // build Brightcove <object /> XML
|
||||||
|
@ -272,12 +272,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||||
).+?>\s*</object>''',
|
).+?>\s*</object>''',
|
||||||
webpage)
|
webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
|
||||||
|
|
||||||
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return list(filter(None, [
|
return list(filter(None, [
|
||||||
cls._build_brighcove_url_from_js(custom_bc)
|
cls._build_brightcove_url_from_js(custom_bc)
|
||||||
for custom_bc in matches]))
|
for custom_bc in matches]))
|
||||||
return [src for _, src in re.findall(
|
return [src for _, src in re.findall(
|
||||||
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
||||||
|
|
|
@ -5,10 +5,16 @@ import codecs
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_chr,
|
||||||
|
compat_ord,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
multipart_encode,
|
multipart_encode,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
random_birthday,
|
random_birthday,
|
||||||
|
@ -107,8 +113,9 @@ class CDAIE(InfoExtractor):
|
||||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
||||||
'view_count', default=None)
|
'view_count', default=None)
|
||||||
average_rating = self._search_regex(
|
average_rating = self._search_regex(
|
||||||
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||||
webpage, 'rating', fatal=False, group='rating_value')
|
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
||||||
|
group='rating_value')
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -123,6 +130,24 @@ class CDAIE(InfoExtractor):
|
||||||
'age_limit': 18 if need_confirm_age else 0,
|
'age_limit': 18 if need_confirm_age else 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Source: https://www.cda.pl/js/player.js?t=1606154898
|
||||||
|
def decrypt_file(a):
|
||||||
|
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
|
||||||
|
a = a.replace(p, '')
|
||||||
|
a = compat_urllib_parse_unquote(a)
|
||||||
|
b = []
|
||||||
|
for c in a:
|
||||||
|
f = compat_ord(c)
|
||||||
|
b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
|
||||||
|
a = ''.join(b)
|
||||||
|
a = a.replace('.cda.mp4', '')
|
||||||
|
for p in ('.2cda.pl', '.3cda.pl'):
|
||||||
|
a = a.replace(p, '.cda.pl')
|
||||||
|
if '/upstream' in a:
|
||||||
|
a = a.replace('/upstream', '.mp4/upstream')
|
||||||
|
return 'https://' + a
|
||||||
|
return 'https://' + a + '.mp4'
|
||||||
|
|
||||||
def extract_format(page, version):
|
def extract_format(page, version):
|
||||||
json_str = self._html_search_regex(
|
json_str = self._html_search_regex(
|
||||||
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
||||||
|
@ -141,6 +166,8 @@ class CDAIE(InfoExtractor):
|
||||||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||||
if video['file'].endswith('adc.mp4'):
|
if video['file'].endswith('adc.mp4'):
|
||||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||||
|
elif not video['file'].startswith('http'):
|
||||||
|
video['file'] = decrypt_file(video['file'])
|
||||||
f = {
|
f = {
|
||||||
'url': video['file'],
|
'url': video['file'],
|
||||||
}
|
}
|
||||||
|
@ -179,4 +206,6 @@ class CDAIE(InfoExtractor):
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return info_dict
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
|
return merge_dicts(info_dict, info)
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import smuggle_url
|
from ..utils import smuggle_url
|
||||||
|
@ -38,7 +39,7 @@ class CNBCIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class CNBCVideoIE(InfoExtractor):
|
class CNBCVideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
video_id = self._download_json(
|
||||||
video_id = self._search_regex(
|
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
|
||||||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
'query': '''{
|
||||||
'video id')
|
page(path: "%s") {
|
||||||
|
vcpsId
|
||||||
|
}
|
||||||
|
}''' % path,
|
||||||
|
})['data']['page']['vcpsId']
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
'http://video.cnbc.com/gallery/?video=%d' % video_id,
|
||||||
CNBCIE.ie_key())
|
CNBCIE.ie_key())
|
||||||
|
|
|
@ -1456,9 +1456,10 @@ class InfoExtractor(object):
|
||||||
try:
|
try:
|
||||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||||
return True
|
return True
|
||||||
except ExtractorError:
|
except ExtractorError as e:
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
'%s: %s URL is invalid, skipping: %s'
|
||||||
|
% (video_id, item, error_to_compat_str(e.cause)))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def http_scheme(self):
|
def http_scheme(self):
|
||||||
|
@ -1663,7 +1664,7 @@ class InfoExtractor(object):
|
||||||
# just the media without qualities renditions.
|
# just the media without qualities renditions.
|
||||||
# Fortunately, master playlist can be easily distinguished from media
|
# Fortunately, master playlist can be easily distinguished from media
|
||||||
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
||||||
# master playlist tags MUST NOT appear in a media playist and vice versa.
|
# master playlist tags MUST NOT appear in a media playlist and vice versa.
|
||||||
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
||||||
# media playlist and MUST NOT appear in master playlist thus we can
|
# media playlist and MUST NOT appear in master playlist thus we can
|
||||||
# clearly detect media playlist with this criterion.
|
# clearly detect media playlist with this criterion.
|
||||||
|
@ -2596,6 +2597,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
hdcore_sign = 'hdcore=3.7.0'
|
hdcore_sign = 'hdcore=3.7.0'
|
||||||
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||||
hds_host = hosts.get('hds')
|
hds_host = hosts.get('hds')
|
||||||
|
@ -2608,6 +2610,7 @@ class InfoExtractor(object):
|
||||||
for entry in f4m_formats:
|
for entry in f4m_formats:
|
||||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||||
formats.extend(f4m_formats)
|
formats.extend(f4m_formats)
|
||||||
|
|
||||||
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||||
hls_host = hosts.get('hls')
|
hls_host = hosts.get('hls')
|
||||||
if hls_host:
|
if hls_host:
|
||||||
|
@ -2615,6 +2618,31 @@ class InfoExtractor(object):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
http_host = hosts.get('http')
|
||||||
|
if http_host and 'hdnea=' not in manifest_url:
|
||||||
|
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
|
||||||
|
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||||
|
qualities_length = len(qualities)
|
||||||
|
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
|
||||||
|
i = 0
|
||||||
|
http_formats = []
|
||||||
|
for f in formats:
|
||||||
|
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
|
||||||
|
for protocol in ('http', 'https'):
|
||||||
|
http_f = f.copy()
|
||||||
|
del http_f['manifest_url']
|
||||||
|
http_url = re.sub(
|
||||||
|
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
|
||||||
|
http_f.update({
|
||||||
|
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
|
||||||
|
'url': http_url,
|
||||||
|
'protocol': protocol,
|
||||||
|
})
|
||||||
|
http_formats.append(http_f)
|
||||||
|
i += 1
|
||||||
|
formats.extend(http_formats)
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||||
|
|
|
@ -16,6 +16,8 @@ from ..utils import (
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
|
||||||
'uploader': 'gq',
|
'uploader': 'gq',
|
||||||
'upload_date': '20170321',
|
'upload_date': '20170321',
|
||||||
'timestamp': 1490126427,
|
'timestamp': 1490126427,
|
||||||
|
'description': 'How much grimmer would things be if these people were competent?',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# JS embed
|
# JS embed
|
||||||
|
@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
|
||||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||||
'uploader': 'arstechnica',
|
'uploader': 'arstechnica',
|
||||||
'upload_date': '20150916',
|
'upload_date': '20150916',
|
||||||
'timestamp': 1442434955,
|
'timestamp': 1442434920,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
||||||
|
@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for t, caption in video_info.get('captions', {}).items():
|
||||||
|
caption_url = caption.get('src')
|
||||||
|
if not (t in ('vtt', 'srt', 'tml') and caption_url):
|
||||||
|
continue
|
||||||
|
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
|
||||||
'season': video_info.get('season_title'),
|
'season': video_info.get('season_title'),
|
||||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||||
'categories': video_info.get('categories'),
|
'categories': video_info.get('categories'),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor):
|
||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
return self._extract_series(url, webpage)
|
return self._extract_series(url, webpage)
|
||||||
else:
|
else:
|
||||||
params = self._extract_video_params(webpage, display_id)
|
video = try_get(self._parse_json(self._search_regex(
|
||||||
info = self._search_json_ld(
|
r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||||
webpage, display_id, fatal=False)
|
'preload state', '{}'), display_id),
|
||||||
|
lambda x: x['transformed']['video'])
|
||||||
|
if video:
|
||||||
|
params = {'videoId': video['id']}
|
||||||
|
info = {'description': strip_or_none(video.get('description'))}
|
||||||
|
else:
|
||||||
|
params = self._extract_video_params(webpage, display_id)
|
||||||
|
info = self._search_json_ld(
|
||||||
|
webpage, display_id, fatal=False)
|
||||||
info.update(self._extract_video(params))
|
info.update(self._extract_video(params))
|
||||||
return info
|
return info
|
||||||
|
|
|
@ -7,7 +7,7 @@ from .dplay import DPlayIE
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryNetworksDeIE(DPlayIE):
|
class DiscoveryNetworksDeIE(DPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||||
|
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -60,7 +60,7 @@ class EuropaIE(InfoExtractor):
|
||||||
|
|
||||||
title = get_item('title', preferred_langs) or video_id
|
title = get_item('title', preferred_langs) or video_id
|
||||||
description = get_item('description', preferred_langs)
|
description = get_item('description', preferred_langs)
|
||||||
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
||||||
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
|
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
|
||||||
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
|
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
|
||||||
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
|
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
|
||||||
|
@ -85,7 +85,7 @@ class EuropaIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnmail,
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
|
|
@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE
|
||||||
from .airmozilla import AirMozillaIE
|
from .airmozilla import AirMozillaIE
|
||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
from .alphaporno import AlphaPornoIE
|
from .alphaporno import AlphaPornoIE
|
||||||
|
from .amara import AmaraIE
|
||||||
from .alura import (
|
from .alura import (
|
||||||
AluraIE,
|
AluraIE,
|
||||||
AluraCourseIE
|
AluraCourseIE
|
||||||
|
@ -62,7 +63,7 @@ from .ard import (
|
||||||
ARDMediathekIE,
|
ARDMediathekIE,
|
||||||
)
|
)
|
||||||
from .arte import (
|
from .arte import (
|
||||||
ArteTVPlus7IE,
|
ArteTVIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
)
|
)
|
||||||
|
@ -116,6 +117,10 @@ from .bitchute import (
|
||||||
BitChuteIE,
|
BitChuteIE,
|
||||||
BitChuteChannelIE,
|
BitChuteChannelIE,
|
||||||
)
|
)
|
||||||
|
from .bitwave import (
|
||||||
|
BitwaveReplayIE,
|
||||||
|
BitwaveStreamIE,
|
||||||
|
)
|
||||||
from .biqle import BIQLEIE
|
from .biqle import BIQLEIE
|
||||||
from .bleacherreport import (
|
from .bleacherreport import (
|
||||||
BleacherReportIE,
|
BleacherReportIE,
|
||||||
|
@ -125,6 +130,7 @@ from .blinkx import BlinkxIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
from .bostonglobe import BostonGlobeIE
|
from .bostonglobe import BostonGlobeIE
|
||||||
|
from .box import BoxIE
|
||||||
from .bpb import BpbIE
|
from .bpb import BpbIE
|
||||||
from .br import (
|
from .br import (
|
||||||
BRIE,
|
BRIE,
|
||||||
|
@ -414,6 +420,10 @@ from .gamestar import GameStarIE
|
||||||
from .gaskrank import GaskrankIE
|
from .gaskrank import GaskrankIE
|
||||||
from .gazeta import GazetaIE
|
from .gazeta import GazetaIE
|
||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
|
from .gedi import (
|
||||||
|
GediIE,
|
||||||
|
GediEmbedsIE,
|
||||||
|
)
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .gfycat import GfycatIE
|
from .gfycat import GfycatIE
|
||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
|
@ -542,6 +552,7 @@ from .laola1tv import (
|
||||||
EHFTVIE,
|
EHFTVIE,
|
||||||
ITTFIE,
|
ITTFIE,
|
||||||
)
|
)
|
||||||
|
from .lbry import LBRYIE
|
||||||
from .lci import LCIIE
|
from .lci import LCIIE
|
||||||
from .lcp import (
|
from .lcp import (
|
||||||
LcpPlayIE,
|
LcpPlayIE,
|
||||||
|
@ -617,6 +628,7 @@ from .markiza import (
|
||||||
from .massengeschmacktv import MassengeschmackTVIE
|
from .massengeschmacktv import MassengeschmackTVIE
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
|
from .medaltv import MedalTVIE
|
||||||
from .mediaset import MediasetIE
|
from .mediaset import MediasetIE
|
||||||
from .mediasite import (
|
from .mediasite import (
|
||||||
MediasiteIE,
|
MediasiteIE,
|
||||||
|
@ -799,6 +811,7 @@ from .ntvru import NTVRuIE
|
||||||
from .nytimes import (
|
from .nytimes import (
|
||||||
NYTimesIE,
|
NYTimesIE,
|
||||||
NYTimesArticleIE,
|
NYTimesArticleIE,
|
||||||
|
NYTimesCookingIE,
|
||||||
)
|
)
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .nzz import NZZIE
|
from .nzz import NZZIE
|
||||||
|
@ -861,6 +874,10 @@ from .picarto import (
|
||||||
)
|
)
|
||||||
from .piksel import PikselIE
|
from .piksel import PikselIE
|
||||||
from .pinkbike import PinkbikeIE
|
from .pinkbike import PinkbikeIE
|
||||||
|
from .pinterest import (
|
||||||
|
PinterestIE,
|
||||||
|
PinterestCollectionIE,
|
||||||
|
)
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .platzi import (
|
from .platzi import (
|
||||||
PlatziIE,
|
PlatziIE,
|
||||||
|
@ -937,6 +954,11 @@ from .raywenderlich import (
|
||||||
RayWenderlichCourseIE,
|
RayWenderlichCourseIE,
|
||||||
)
|
)
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
|
from .rcs import (
|
||||||
|
RCSIE,
|
||||||
|
RCSEmbedsIE,
|
||||||
|
RCSVariousIE,
|
||||||
|
)
|
||||||
from .rds import RDSIE
|
from .rds import RDSIE
|
||||||
from .redbulltv import (
|
from .redbulltv import (
|
||||||
RedBullTVIE,
|
RedBullTVIE,
|
||||||
|
@ -979,6 +1001,7 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe
|
||||||
from .rtvnh import RTVNHIE
|
from .rtvnh import RTVNHIE
|
||||||
from .rtvs import RTVSIE
|
from .rtvs import RTVSIE
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
|
from .rumble import RumbleEmbedIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
|
@ -1029,6 +1052,16 @@ from .shared import (
|
||||||
from .showroomlive import ShowRoomLiveIE
|
from .showroomlive import ShowRoomLiveIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .sixplay import SixPlayIE
|
from .sixplay import SixPlayIE
|
||||||
|
from .skyit import (
|
||||||
|
SkyItPlayerIE,
|
||||||
|
SkyItVideoIE,
|
||||||
|
SkyItVideoLiveIE,
|
||||||
|
SkyItIE,
|
||||||
|
SkyItAcademyIE,
|
||||||
|
SkyItArteIE,
|
||||||
|
CieloTVItIE,
|
||||||
|
TV8ItIE,
|
||||||
|
)
|
||||||
from .skylinewebcams import SkylineWebcamsIE
|
from .skylinewebcams import SkylineWebcamsIE
|
||||||
from .skynewsarabia import (
|
from .skynewsarabia import (
|
||||||
SkyNewsArabiaIE,
|
SkyNewsArabiaIE,
|
||||||
|
@ -1038,10 +1071,6 @@ from .sky import (
|
||||||
SkyNewsIE,
|
SkyNewsIE,
|
||||||
SkySportsIE,
|
SkySportsIE,
|
||||||
)
|
)
|
||||||
from .skyitalia import (
|
|
||||||
SkyArteItaliaIE,
|
|
||||||
SkyItaliaIE,
|
|
||||||
)
|
|
||||||
from .slideshare import SlideshareIE
|
from .slideshare import SlideshareIE
|
||||||
from .slideslive import SlidesLiveIE
|
from .slideslive import SlidesLiveIE
|
||||||
from .slutload import SlutloadIE
|
from .slutload import SlutloadIE
|
||||||
|
@ -1079,8 +1108,7 @@ from .spankbang import (
|
||||||
SpankBangPlaylistIE,
|
SpankBangPlaylistIE,
|
||||||
)
|
)
|
||||||
from .spankwire import SpankwireIE
|
from .spankwire import SpankwireIE
|
||||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
from .spiegel import SpiegelIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
|
||||||
from .spike import (
|
from .spike import (
|
||||||
BellatorIE,
|
BellatorIE,
|
||||||
ParamountNetworkIE,
|
ParamountNetworkIE,
|
||||||
|
@ -1094,6 +1122,12 @@ from .stitcher import StitcherIE
|
||||||
from .sport5 import Sport5IE
|
from .sport5 import Sport5IE
|
||||||
from .sportbox import SportBoxIE
|
from .sportbox import SportBoxIE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
|
from .spreaker import (
|
||||||
|
SpreakerIE,
|
||||||
|
SpreakerPageIE,
|
||||||
|
SpreakerShowIE,
|
||||||
|
SpreakerShowPageIE,
|
||||||
|
)
|
||||||
from .springboardplatform import SpringboardPlatformIE
|
from .springboardplatform import SpringboardPlatformIE
|
||||||
from .sprout import SproutIE
|
from .sprout import SproutIE
|
||||||
from .srgssr import (
|
from .srgssr import (
|
||||||
|
@ -1175,13 +1209,11 @@ from .theweatherchannel import TheWeatherChannelIE
|
||||||
from .thisamericanlife import ThisAmericanLifeIE
|
from .thisamericanlife import ThisAmericanLifeIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
from .thisoldhouse import ThisOldHouseIE
|
from .thisoldhouse import ThisOldHouseIE
|
||||||
|
from .thisvid import ThisVidIE
|
||||||
from .threeqsdn import ThreeQSDNIE
|
from .threeqsdn import ThreeQSDNIE
|
||||||
from .tiktok import TikTokIE
|
from .tiktok import TikTokIE
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
from .tmz import (
|
from .tmz import TMZIE
|
||||||
TMZIE,
|
|
||||||
TMZArticleIE,
|
|
||||||
)
|
|
||||||
from .tnaflix import (
|
from .tnaflix import (
|
||||||
TNAFlixNetworkEmbedIE,
|
TNAFlixNetworkEmbedIE,
|
||||||
TNAFlixIE,
|
TNAFlixIE,
|
||||||
|
@ -1386,8 +1418,8 @@ from .vk import (
|
||||||
)
|
)
|
||||||
from .vlive import (
|
from .vlive import (
|
||||||
VLiveIE,
|
VLiveIE,
|
||||||
|
VLivePostIE,
|
||||||
VLiveChannelIE,
|
VLiveChannelIE,
|
||||||
VLivePlaylistIE
|
|
||||||
)
|
)
|
||||||
from .vodlocker import VodlockerIE
|
from .vodlocker import VodlockerIE
|
||||||
from .vodpl import VODPlIE
|
from .vodpl import VODPlIE
|
||||||
|
@ -1504,21 +1536,18 @@ from .yourporn import YourPornIE
|
||||||
from .yourupload import YourUploadIE
|
from .yourupload import YourUploadIE
|
||||||
from .youtube import (
|
from .youtube import (
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubeChannelIE,
|
|
||||||
YoutubeFavouritesIE,
|
YoutubeFavouritesIE,
|
||||||
YoutubeHistoryIE,
|
YoutubeHistoryIE,
|
||||||
YoutubeLiveIE,
|
YoutubeTabIE,
|
||||||
YoutubePlaylistIE,
|
YoutubePlaylistIE,
|
||||||
YoutubePlaylistsIE,
|
|
||||||
YoutubeRecommendedIE,
|
YoutubeRecommendedIE,
|
||||||
YoutubeSearchDateIE,
|
YoutubeSearchDateIE,
|
||||||
YoutubeSearchIE,
|
YoutubeSearchIE,
|
||||||
YoutubeSearchURLIE,
|
YoutubeSearchURLIE,
|
||||||
YoutubeShowIE,
|
|
||||||
YoutubeSubscriptionsIE,
|
YoutubeSubscriptionsIE,
|
||||||
YoutubeTruncatedIDIE,
|
YoutubeTruncatedIDIE,
|
||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
YoutubeUserIE,
|
YoutubeYtUserIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
)
|
)
|
||||||
from .zapiks import ZapiksIE
|
from .zapiks import ZapiksIE
|
||||||
|
@ -1544,4 +1573,5 @@ from .zattoo import (
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE, ZDFChannelIE
|
from .zdf import ZDFIE, ZDFChannelIE
|
||||||
from .zingmp3 import ZingMp3IE
|
from .zingmp3 import ZingMp3IE
|
||||||
|
from .zoom import ZoomIE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
|
|
|
@ -16,6 +16,7 @@ class FranceInterIE(InfoExtractor):
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
||||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'upload_date': '20160907',
|
'upload_date': '20160907',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -31,6 +32,7 @@ class FranceInterIE(InfoExtractor):
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||||
|
|
||||||
upload_date_str = self._search_regex(
|
upload_date_str = self._search_regex(
|
||||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||||
|
@ -48,6 +50,7 @@ class FranceInterIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'formats': [{
|
'formats': [{
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
|
|
@ -17,6 +17,7 @@ from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
|
|
||||||
|
@ -128,18 +129,38 @@ class FranceTVIE(InfoExtractor):
|
||||||
|
|
||||||
is_live = None
|
is_live = None
|
||||||
|
|
||||||
formats = []
|
videos = []
|
||||||
for video in info['videos']:
|
|
||||||
if video['statut'] != 'ONLINE':
|
for video in (info.get('videos') or []):
|
||||||
|
if video.get('statut') != 'ONLINE':
|
||||||
continue
|
continue
|
||||||
video_url = video['url']
|
if not video.get('url'):
|
||||||
|
continue
|
||||||
|
videos.append(video)
|
||||||
|
|
||||||
|
if not videos:
|
||||||
|
for device_type in ['desktop', 'mobile']:
|
||||||
|
fallback_info = self._download_json(
|
||||||
|
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
|
||||||
|
video_id, 'Downloading fallback %s video JSON' % device_type, query={
|
||||||
|
'device_type': device_type,
|
||||||
|
'browser': 'chrome',
|
||||||
|
}, fatal=False)
|
||||||
|
|
||||||
|
if fallback_info and fallback_info.get('video'):
|
||||||
|
videos.append(fallback_info['video'])
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in videos:
|
||||||
|
video_url = video.get('url')
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
if is_live is None:
|
if is_live is None:
|
||||||
is_live = (try_get(
|
is_live = (try_get(
|
||||||
video, lambda x: x['plages_ouverture'][0]['direct'],
|
video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
|
||||||
bool) is True) or '/live.francetv.fr/' in video_url
|
or video.get('is_live') is True
|
||||||
format_id = video['format']
|
or '/live.francetv.fr/' in video_url)
|
||||||
|
format_id = video.get('format')
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
if ext == 'f4m':
|
if ext == 'f4m':
|
||||||
if georestricted:
|
if georestricted:
|
||||||
|
@ -154,6 +175,9 @@ class FranceTVIE(InfoExtractor):
|
||||||
sign(video_url, format_id), video_id, 'mp4',
|
sign(video_url, format_id), video_id, 'mp4',
|
||||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||||
fatal=False))
|
fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
|
||||||
elif video_url.startswith('rtmp'):
|
elif video_url.startswith('rtmp'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@ -166,6 +190,7 @@ class FranceTVIE(InfoExtractor):
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = info['titre']
|
title = info['titre']
|
||||||
|
@ -185,10 +210,10 @@ class FranceTVIE(InfoExtractor):
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._live_title(title) if is_live else title,
|
'title': self._live_title(title) if is_live else title,
|
||||||
'description': clean_html(info['synopsis']),
|
'description': clean_html(info.get('synopsis')),
|
||||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
|
||||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
|
||||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
|
|
@ -0,0 +1,266 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
base_url,
|
||||||
|
url_basename,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GediBaseIE(InfoExtractor):
|
||||||
|
@staticmethod
|
||||||
|
def _clean_audio_fmts(formats):
|
||||||
|
unique_formats = []
|
||||||
|
for f in formats:
|
||||||
|
if 'acodec' in f:
|
||||||
|
unique_formats.append(f)
|
||||||
|
formats[:] = unique_formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
player_data = re.findall(
|
||||||
|
r'PlayerFactory\.setParam\(\'(?P<type>.+?)\',\s*\'(?P<name>.+?)\',\s*\'(?P<val>.+?)\'\);',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
audio_fmts = []
|
||||||
|
hls_fmts = []
|
||||||
|
http_fmts = []
|
||||||
|
title = ''
|
||||||
|
thumb = ''
|
||||||
|
|
||||||
|
fmt_reg = r'(?P<t>video|audio)-(?P<p>rrtv|hls)-(?P<h>[\w\d]+)(?:-(?P<br>[\w\d]+))?$'
|
||||||
|
br_reg = r'video-rrtv-(?P<br>\d+)-'
|
||||||
|
|
||||||
|
for t, n, v in player_data:
|
||||||
|
if t == 'format':
|
||||||
|
m = re.match(fmt_reg, n)
|
||||||
|
if m:
|
||||||
|
# audio formats
|
||||||
|
if m.group('t') == 'audio':
|
||||||
|
if m.group('p') == 'hls':
|
||||||
|
audio_fmts.extend(self._extract_m3u8_formats(
|
||||||
|
v, video_id, 'm4a', m3u8_id='hls', fatal=False))
|
||||||
|
elif m.group('p') == 'rrtv':
|
||||||
|
audio_fmts.append({
|
||||||
|
'format_id': 'mp3',
|
||||||
|
'url': v,
|
||||||
|
'tbr': 128,
|
||||||
|
'ext': 'mp3',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
})
|
||||||
|
|
||||||
|
# video formats
|
||||||
|
elif m.group('t') == 'video':
|
||||||
|
# hls manifest video
|
||||||
|
if m.group('p') == 'hls':
|
||||||
|
hls_fmts.extend(self._extract_m3u8_formats(
|
||||||
|
v, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
|
# direct mp4 video
|
||||||
|
elif m.group('p') == 'rrtv':
|
||||||
|
if not m.group('br'):
|
||||||
|
mm = re.search(br_reg, v)
|
||||||
|
http_fmts.append({
|
||||||
|
'format_id': 'https-' + m.group('h'),
|
||||||
|
'protocol': 'https',
|
||||||
|
'url': v,
|
||||||
|
'tbr': int(m.group('br')) if m.group('br') else
|
||||||
|
(int(mm.group('br')) if mm.group('br') else 0),
|
||||||
|
'height': int(m.group('h'))
|
||||||
|
})
|
||||||
|
|
||||||
|
elif t == 'param':
|
||||||
|
if n == 'videotitle':
|
||||||
|
title = v
|
||||||
|
if n == 'image_full_play':
|
||||||
|
thumb = v
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage) if title == '' else title
|
||||||
|
|
||||||
|
# clean weird char
|
||||||
|
title = compat_str(title).encode('utf8', 'replace').replace(b'\xc3\x82', b'').decode('utf8', 'replace')
|
||||||
|
|
||||||
|
if audio_fmts:
|
||||||
|
self._clean_audio_fmts(audio_fmts)
|
||||||
|
self._sort_formats(audio_fmts)
|
||||||
|
if hls_fmts:
|
||||||
|
self._sort_formats(hls_fmts)
|
||||||
|
if http_fmts:
|
||||||
|
self._sort_formats(http_fmts)
|
||||||
|
|
||||||
|
formats.extend(audio_fmts)
|
||||||
|
formats.extend(hls_fmts)
|
||||||
|
formats.extend(http_fmts)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._html_search_meta('twitter:description', webpage),
|
||||||
|
'thumbnail': thumb,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class GediIE(GediBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)https?://video\.
|
||||||
|
(?:
|
||||||
|
(?:espresso\.)?repubblica
|
||||||
|
|lastampa
|
||||||
|
|huffingtonpost
|
||||||
|
|ilsecoloxix
|
||||||
|
|iltirreno
|
||||||
|
|messaggeroveneto
|
||||||
|
|ilpiccolo
|
||||||
|
|gazzettadimantova
|
||||||
|
|mattinopadova
|
||||||
|
|laprovinciapavese
|
||||||
|
|tribunatreviso
|
||||||
|
|nuovavenezia
|
||||||
|
|gazzettadimodena
|
||||||
|
|lanuovaferrara
|
||||||
|
|corrierealpi
|
||||||
|
|lasentinella
|
||||||
|
)
|
||||||
|
(?:\.gelocal)?\.it/(?!embed/).+?/(?P<id>[\d/]+)(?:\?|\&|$)'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
|
||||||
|
'md5': '84658d7fb9e55a6e57ecc77b73137494',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '121559/121683',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
|
||||||
|
'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
|
||||||
|
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
|
||||||
|
'md5': 'e763b94b7920799a0e0e23ffefa2d157',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '367415/367963',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Record della pista a Spa Francorchamps, la Pagani Huayra Roadster BC stupisce',
|
||||||
|
'description': 'md5:5deb503cefe734a3eb3f07ed74303920',
|
||||||
|
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
|
||||||
|
'md5': 'e48108e97b1af137d22a8469f2019057',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '66184/66267',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cassani e i brividi azzurri ai Mondiali di Imola: \\"Qui mi sono innamorato del ciclismo da ragazzino, incredibile tornarci da ct\\"',
|
||||||
|
'description': 'md5:fc9c50894f70a2469bb9b54d3d0a3d3b',
|
||||||
|
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
|
||||||
|
'md5': 'a6e39f3bdc1842bbd92abbbbef230817',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '141059/142723',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dentro la notizia - Ferrari, cosa succede a Maranello',
|
||||||
|
'description': 'md5:9907d65b53765681fa3a0b3122617c1f',
|
||||||
|
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class GediEmbedsIE(GediBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)https?://video\.
|
||||||
|
(?:
|
||||||
|
(?:espresso\.)?repubblica
|
||||||
|
|lastampa
|
||||||
|
|huffingtonpost
|
||||||
|
|ilsecoloxix
|
||||||
|
|iltirreno
|
||||||
|
|messaggeroveneto
|
||||||
|
|ilpiccolo
|
||||||
|
|gazzettadimantova
|
||||||
|
|mattinopadova
|
||||||
|
|laprovinciapavese
|
||||||
|
|tribunatreviso
|
||||||
|
|nuovavenezia
|
||||||
|
|gazzettadimodena
|
||||||
|
|lanuovaferrara
|
||||||
|
|corrierealpi
|
||||||
|
|lasentinella
|
||||||
|
)
|
||||||
|
(?:\.gelocal)?\.it/embed/.+?/(?P<id>[\d/]+)(?:\?|\&|$)'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.huffingtonpost.it/embed/politica/cotticelli-non-so-cosa-mi-sia-successo-sto-cercando-di-capire-se-ho-avuto-un-malore/29312/29276?responsive=true&el=video971040871621586700',
|
||||||
|
'md5': 'f4ac23cadfea7fef89bea536583fa7ed',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '29312/29276',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cotticelli: \\"Non so cosa mi sia successo. Sto cercando di capire se ho avuto un malore\\"',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
|
||||||
|
'md5': '0391c2c83c6506581003aaf0255889c0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '14772/14870',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Festival EMERGENCY, Villa: «La buona informazione aiuta la salute» (14772-14870)',
|
||||||
|
'description': 'md5:2bce954d278248f3c950be355b7c2226',
|
||||||
|
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _sanitize_urls(urls):
|
||||||
|
# add protocol if missing
|
||||||
|
for i, e in enumerate(urls):
|
||||||
|
if e.startswith('//'):
|
||||||
|
urls[i] = 'https:%s' % e
|
||||||
|
# clean iframes urls
|
||||||
|
for i, e in enumerate(urls):
|
||||||
|
urls[i] = urljoin(base_url(e), url_basename(e))
|
||||||
|
return urls
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
entries = [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(r'''(?x)
|
||||||
|
(?:
|
||||||
|
data-frame-src=|
|
||||||
|
<iframe[^\n]+src=
|
||||||
|
)
|
||||||
|
(["'])
|
||||||
|
(?P<url>https?://video\.
|
||||||
|
(?:
|
||||||
|
(?:espresso\.)?repubblica
|
||||||
|
|lastampa
|
||||||
|
|huffingtonpost
|
||||||
|
|ilsecoloxix
|
||||||
|
|iltirreno
|
||||||
|
|messaggeroveneto
|
||||||
|
|ilpiccolo
|
||||||
|
|gazzettadimantova
|
||||||
|
|mattinopadova
|
||||||
|
|laprovinciapavese
|
||||||
|
|tribunatreviso
|
||||||
|
|nuovavenezia
|
||||||
|
|gazzettadimodena
|
||||||
|
|lanuovaferrara
|
||||||
|
|corrierealpi
|
||||||
|
|lasentinella
|
||||||
|
)
|
||||||
|
(?:\.gelocal)?\.it/embed/.+?)
|
||||||
|
\1''', webpage)]
|
||||||
|
return GediEmbedsIE._sanitize_urls(entries)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
urls = GediEmbedsIE._extract_urls(webpage)
|
||||||
|
return urls[0] if urls else None
|
|
@ -91,6 +91,7 @@ from .piksel import PikselIE
|
||||||
from .videa import VideaIE
|
from .videa import VideaIE
|
||||||
from .twentymin import TwentyMinutenIE
|
from .twentymin import TwentyMinutenIE
|
||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
|
from .arte import ArteTVEmbedIE
|
||||||
from .videopress import VideoPressIE
|
from .videopress import VideoPressIE
|
||||||
from .rutube import RutubeIE
|
from .rutube import RutubeIE
|
||||||
from .limelight import LimelightBaseIE
|
from .limelight import LimelightBaseIE
|
||||||
|
@ -119,6 +120,9 @@ from .expressen import ExpressenIE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
from .odnoklassniki import OdnoklassnikiIE
|
from .odnoklassniki import OdnoklassnikiIE
|
||||||
from .kinja import KinjaEmbedIE
|
from .kinja import KinjaEmbedIE
|
||||||
|
from .gedi import GediEmbedsIE
|
||||||
|
from .rcs import RCSEmbedsIE
|
||||||
|
from .bitchute import BitChuteIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
|
@ -841,7 +845,7 @@ class GenericIE(InfoExtractor):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# MTVSercices embed
|
# MTVServices embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
||||||
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
||||||
|
@ -2760,11 +2764,9 @@ class GenericIE(InfoExtractor):
|
||||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded arte.tv player
|
# Look for embedded arte.tv player
|
||||||
mobj = re.search(
|
arte_urls = ArteTVEmbedIE._extract_urls(webpage)
|
||||||
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
|
if arte_urls:
|
||||||
webpage)
|
return self.playlist_from_matches(arte_urls, video_id, video_title)
|
||||||
if mobj is not None:
|
|
||||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
|
||||||
|
|
||||||
# Look for embedded francetv player
|
# Look for embedded francetv player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
|
@ -3213,6 +3215,22 @@ class GenericIE(InfoExtractor):
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
||||||
|
|
||||||
|
# Look for RCS media group embeds
|
||||||
|
gedi_urls = GediEmbedsIE._extract_urls(webpage)
|
||||||
|
if gedi_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
gedi_urls, video_id, video_title, ie=GediEmbedsIE.ie_key())
|
||||||
|
|
||||||
|
rcs_urls = RCSEmbedsIE._extract_urls(webpage)
|
||||||
|
if rcs_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key())
|
||||||
|
|
||||||
|
bitchute_urls = BitChuteIE._extract_urls(webpage)
|
||||||
|
if bitchute_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
bitchute_urls, video_id, video_title, ie=BitChuteIE.ie_key())
|
||||||
|
|
||||||
# Look for HTML5 media
|
# Look for HTML5 media
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||||
if entries:
|
if entries:
|
||||||
|
|
|
@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_parse_qs
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
try_get,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
# video can't be watched anonymously due to view count limit reached,
|
# video can't be watched anonymously due to view count limit reached,
|
||||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
'only_matching': True,
|
||||||
'info_dict': {
|
|
||||||
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# video id is longer than 28 characters
|
# video id is longer than 28 characters
|
||||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||||
'info_dict': {
|
|
||||||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
|
||||||
'duration': 189,
|
|
||||||
},
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
|
@ -171,23 +162,21 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
video_info = compat_parse_qs(self._download_webpage(
|
||||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
'https://drive.google.com/get_video_info',
|
||||||
|
video_id, query={'docid': video_id}))
|
||||||
|
|
||||||
title = self._search_regex(
|
def get_value(key):
|
||||||
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
return try_get(video_info, lambda x: x[key][0])
|
||||||
default=None) or self._og_search_title(webpage)
|
|
||||||
duration = int_or_none(self._search_regex(
|
reason = get_value('reason')
|
||||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
title = get_value('title')
|
||||||
default=None))
|
if not title and reason:
|
||||||
|
raise ExtractorError(reason, expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
fmt_stream_map = self._search_regex(
|
fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
|
||||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
fmt_list = (get_value('fmt_list') or '').split(',')
|
||||||
'fmt stream map', default='').split(',')
|
|
||||||
fmt_list = self._search_regex(
|
|
||||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
|
||||||
'fmt_list', default='').split(',')
|
|
||||||
if fmt_stream_map and fmt_list:
|
if fmt_stream_map and fmt_list:
|
||||||
resolutions = {}
|
resolutions = {}
|
||||||
for fmt in fmt_list:
|
for fmt in fmt_list:
|
||||||
|
@ -257,19 +246,14 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
if urlh and urlh.headers.get('Content-Disposition'):
|
if urlh and urlh.headers.get('Content-Disposition'):
|
||||||
add_source_format(urlh)
|
add_source_format(urlh)
|
||||||
|
|
||||||
if not formats:
|
if not formats and reason:
|
||||||
reason = self._search_regex(
|
raise ExtractorError(reason, expected=True)
|
||||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
|
||||||
if reason:
|
|
||||||
raise ExtractorError(reason, expected=True)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
hl = self._search_regex(
|
hl = get_value('hl')
|
||||||
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
|
|
||||||
subtitles_id = None
|
subtitles_id = None
|
||||||
ttsurl = self._search_regex(
|
ttsurl = get_value('ttsurl')
|
||||||
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
|
|
||||||
if ttsurl:
|
if ttsurl:
|
||||||
# the video Id for subtitles will be the last value in the ttsurl
|
# the video Id for subtitles will be the last value in the ttsurl
|
||||||
# query string
|
# query string
|
||||||
|
@ -281,8 +265,8 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
|
||||||
'duration': duration,
|
'duration': int_or_none(get_value('length_seconds')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
||||||
'automatic_captions': self.extract_automatic_captions(
|
'automatic_captions': self.extract_automatic_captions(
|
||||||
|
|
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class InaIE(InfoExtractor):
|
class InaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||||
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||||
|
@ -31,6 +31,9 @@ class InaIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ina.fr/video/P16173408-video.html',
|
'url': 'https://www.ina.fr/video/P16173408-video.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.ina.fr/video/I12055569',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
|
||||||
|
|
||||||
def _extract_rtmp_video(self, webpage):
|
def _extract_rtmp_video(self, webpage):
|
||||||
# The server URL is hardcoded
|
# The server URL is hardcoded
|
||||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
video_url = 'rtmpe://videof.infoq.com/cfx/st/'
|
||||||
|
|
||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(
|
encoded_id = self._search_regex(
|
||||||
|
@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE):
|
||||||
return [{
|
return [{
|
||||||
'format_id': 'http_video',
|
'format_id': 'http_video',
|
||||||
'url': http_video_url,
|
'url': http_video_url,
|
||||||
|
'http_headers': {'Referer': 'https://www.infoq.com/'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_http_audio(self, webpage, video_id):
|
def _extract_http_audio(self, webpage, video_id):
|
||||||
fields = self._hidden_inputs(webpage)
|
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||||
http_audio_url = fields.get('filename')
|
http_audio_url = fields.get('filename')
|
||||||
if not http_audio_url:
|
if not http_audio_url:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# base URL is found in the Location header in the response returned by
|
# base URL is found in the Location header in the response returned by
|
||||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||||
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
||||||
|
|
||||||
# audio file seem to be missing some times even if there is a download link
|
# audio file seem to be missing some times even if there is a download link
|
||||||
|
|
|
@ -126,16 +126,23 @@ class InstagramIE(InfoExtractor):
|
||||||
uploader_id, like_count, comment_count, comments, height,
|
uploader_id, like_count, comment_count, comments, height,
|
||||||
width) = [None] * 11
|
width) = [None] * 11
|
||||||
|
|
||||||
shared_data = self._parse_json(
|
shared_data = try_get(webpage,
|
||||||
self._search_regex(
|
(lambda x: self._parse_json(
|
||||||
r'window\._sharedData\s*=\s*({.+?});',
|
self._search_regex(
|
||||||
webpage, 'shared data', default='{}'),
|
r'window\.__additionalDataLoaded\(\'/(?:p|tv)/(?:[^/?#&]+)/\',({.+?})\);',
|
||||||
video_id, fatal=False)
|
x, 'additional data', default='{}'),
|
||||||
|
video_id, fatal=False),
|
||||||
|
lambda x: self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'window\._sharedData\s*=\s*({.+?});',
|
||||||
|
x, 'shared data', default='{}'),
|
||||||
|
video_id, fatal=False)['entry_data']['PostPage'][0]),
|
||||||
|
None)
|
||||||
if shared_data:
|
if shared_data:
|
||||||
media = try_get(
|
media = try_get(
|
||||||
shared_data,
|
shared_data,
|
||||||
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
(lambda x: x['graphql']['shortcode_media'],
|
||||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
lambda x: x['media']),
|
||||||
dict)
|
dict)
|
||||||
if media:
|
if media:
|
||||||
video_url = media.get('video_url')
|
video_url = media.get('video_url')
|
||||||
|
@ -144,7 +151,7 @@ class InstagramIE(InfoExtractor):
|
||||||
description = try_get(
|
description = try_get(
|
||||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||||
compat_str) or media.get('caption')
|
compat_str) or media.get('caption')
|
||||||
thumbnail = media.get('display_src')
|
thumbnail = media.get('display_src') or media.get('thumbnail_src')
|
||||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||||
uploader = media.get('owner', {}).get('full_name')
|
uploader = media.get('owner', {}).get('full_name')
|
||||||
uploader_id = media.get('owner', {}).get('username')
|
uploader_id = media.get('owner', {}).get('username')
|
||||||
|
|
|
@ -150,7 +150,7 @@ class IqiyiSDKInterpreter(object):
|
||||||
elif function in other_functions:
|
elif function in other_functions:
|
||||||
other_functions[function]()
|
other_functions[function]()
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unknown funcion %s' % function)
|
raise ExtractorError('Unknown function %s' % function)
|
||||||
|
|
||||||
return sdk.target
|
return sdk.target
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ from ..utils import (
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
|
@ -280,12 +281,12 @@ class ITVIE(InfoExtractor):
|
||||||
class ITVBTCCIE(InfoExtractor):
|
class ITVBTCCIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
|
'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'btcc-2018-all-the-action-from-brands-hatch',
|
'id': 'btcc-2019-brands-hatch-gp-race-action',
|
||||||
'title': 'BTCC 2018: All the action from Brands Hatch',
|
'title': 'BTCC 2019: Brands Hatch GP race action',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 9,
|
'playlist_count': 12,
|
||||||
}
|
}
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
@ -294,6 +295,16 @@ class ITVBTCCIE(InfoExtractor):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
json_map = try_get(self._parse_json(self._html_search_regex(
|
||||||
|
'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)</script>', webpage, 'json_map'), playlist_id),
|
||||||
|
lambda x: x['props']['pageProps']['article']['body']['content']) or []
|
||||||
|
|
||||||
|
# Discard empty objects
|
||||||
|
video_ids = []
|
||||||
|
for video in json_map:
|
||||||
|
if video['data'].get('id'):
|
||||||
|
video_ids.append(video['data']['id'])
|
||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
|
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
|
||||||
|
@ -305,7 +316,7 @@ class ITVBTCCIE(InfoExtractor):
|
||||||
'referrer': url,
|
'referrer': url,
|
||||||
}),
|
}),
|
||||||
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
||||||
for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
|
for video_id in video_ids]
|
||||||
|
|
||||||
title = self._og_search_title(webpage, fatal=False)
|
title = self._og_search_title(webpage, fatal=False)
|
||||||
|
|
||||||
|
|
|
@ -64,7 +64,7 @@ class KUSIIE(InfoExtractor):
|
||||||
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
||||||
description = xpath_text(doc, 'ABSTRACT')
|
description = xpath_text(doc, 'ABSTRACT')
|
||||||
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
||||||
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||||
|
|
||||||
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -84,5 +84,5 @@ class KUSIIE(InfoExtractor):
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'timestamp': createtion_time,
|
'timestamp': creation_time,
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,9 @@ class LA7IE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
if not url.startswith('http'):
|
||||||
|
url = '%s//%s' % (self.http_scheme(), url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
player_data = self._search_regex(
|
player_data = self._search_regex(
|
||||||
|
|
|
@ -0,0 +1,91 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LBRYIE(InfoExtractor):
|
||||||
|
IE_NAME = 'lbry.tv'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])'
|
||||||
|
_TESTS = [{
|
||||||
|
# Video
|
||||||
|
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||||
|
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'First day in LBRY? Start HERE!',
|
||||||
|
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||||
|
'timestamp': 1595694354,
|
||||||
|
'upload_date': '20200725',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Audio
|
||||||
|
'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
|
||||||
|
'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
|
||||||
|
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||||
|
'timestamp': 1591312601,
|
||||||
|
'upload_date': '20200604',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _call_api_proxy(self, method, display_id, params):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.lbry.tv/api/v1/proxy', display_id,
|
||||||
|
headers={'Content-Type': 'application/json-rpc'},
|
||||||
|
data=json.dumps({
|
||||||
|
'method': method,
|
||||||
|
'params': params,
|
||||||
|
}).encode())['result']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url).replace(':', '#')
|
||||||
|
uri = 'lbry://' + display_id
|
||||||
|
result = self._call_api_proxy(
|
||||||
|
'resolve', display_id, {'urls': [uri]})[uri]
|
||||||
|
result_value = result['value']
|
||||||
|
if result_value.get('stream_type') not in ('video', 'audio'):
|
||||||
|
raise ExtractorError('Unsupported URL', expected=True)
|
||||||
|
streaming_url = self._call_api_proxy(
|
||||||
|
'get', display_id, {'uri': uri})['streaming_url']
|
||||||
|
source = result_value.get('source') or {}
|
||||||
|
media = result_value.get('video') or result_value.get('audio') or {}
|
||||||
|
signing_channel = result_value.get('signing_channel') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': result['claim_id'],
|
||||||
|
'title': result_value['title'],
|
||||||
|
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||||
|
'description': result_value.get('description'),
|
||||||
|
'license': result_value.get('license'),
|
||||||
|
'timestamp': int_or_none(result.get('timestamp')),
|
||||||
|
'tags': result_value.get('tags'),
|
||||||
|
'width': int_or_none(media.get('width')),
|
||||||
|
'height': int_or_none(media.get('height')),
|
||||||
|
'duration': int_or_none(media.get('duration')),
|
||||||
|
'channel': signing_channel.get('name'),
|
||||||
|
'channel_id': signing_channel.get('claim_id'),
|
||||||
|
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||||
|
'filesize': int_or_none(source.get('size')),
|
||||||
|
'url': streaming_url,
|
||||||
|
}
|
|
@ -5,28 +5,26 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
clean_html,
|
||||||
int_or_none,
|
merge_dicts,
|
||||||
parse_duration,
|
|
||||||
remove_end,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LRTIE(InfoExtractor):
|
class LRTIE(InfoExtractor):
|
||||||
IE_NAME = 'lrt.lt'
|
IE_NAME = 'lrt.lt'
|
||||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
|
'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
|
||||||
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
|
'md5': '85cb2bb530f31d91a9c65b479516ade4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '54391',
|
'id': '2000127261',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Septynios Kauno dienos',
|
'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
|
||||||
'description': 'md5:24d84534c7dc76581e59f5689462411a',
|
'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
|
||||||
'duration': 1783,
|
'duration': 3035,
|
||||||
'view_count': int,
|
'timestamp': 1604079000,
|
||||||
'like_count': int,
|
'upload_date': '20201030',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# direct mp3 download
|
# direct mp3 download
|
||||||
|
@ -43,52 +41,35 @@ class LRTIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_js_var(self, webpage, var_name, default):
|
||||||
|
return self._search_regex(
|
||||||
|
r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
|
||||||
|
webpage, var_name.replace('_', ' '), default, group=2)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' - LRT')
|
media_url = self._extract_js_var(webpage, 'main_url', path)
|
||||||
|
media = self._download_json(self._extract_js_var(
|
||||||
|
webpage, 'media_info_url',
|
||||||
|
'https://www.lrt.lt/servisai/stream_url/vod/media_info/'),
|
||||||
|
video_id, query={'url': media_url})
|
||||||
|
jw_data = self._parse_jwplayer_data(
|
||||||
|
media['playlist_item'], video_id, base_url=url)
|
||||||
|
|
||||||
formats = []
|
json_ld_data = self._search_json_ld(webpage, video_id)
|
||||||
for _, file_url in re.findall(
|
|
||||||
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
tags = []
|
||||||
ext = determine_ext(file_url)
|
for tag in (media.get('tags') or []):
|
||||||
if ext not in ('m3u8', 'mp3'):
|
tag_name = tag.get('name')
|
||||||
|
if not tag_name:
|
||||||
continue
|
continue
|
||||||
# mp3 served as m3u8 produces stuttered media file
|
tags.append(tag_name)
|
||||||
if ext == 'm3u8' and '.mp3' in file_url:
|
|
||||||
continue
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
fatal=False))
|
|
||||||
elif ext == 'mp3':
|
|
||||||
formats.append({
|
|
||||||
'url': file_url,
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
clean_info = {
|
||||||
description = self._og_search_description(webpage)
|
'description': clean_html(media.get('content')),
|
||||||
duration = parse_duration(self._search_regex(
|
'tags': tags,
|
||||||
r'var\s+record_len\s*=\s*(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1',
|
|
||||||
webpage, 'duration', default=None, group='duration'))
|
|
||||||
|
|
||||||
view_count = int_or_none(self._html_search_regex(
|
|
||||||
r'<div[^>]+class=(["\']).*?record-desc-seen.*?\1[^>]*>(?P<count>.+?)</div>',
|
|
||||||
webpage, 'view count', fatal=False, group='count'))
|
|
||||||
like_count = int_or_none(self._search_regex(
|
|
||||||
r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
|
|
||||||
webpage, 'like count', fatal=False, group='count'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'view_count': view_count,
|
|
||||||
'like_count': like_count,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return merge_dicts(clean_info, jw_data, json_ld_data)
|
||||||
|
|
|
@ -12,6 +12,7 @@ from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
remove_end,
|
remove_end,
|
||||||
try_get,
|
try_get,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -93,6 +94,14 @@ class MailRuIE(InfoExtractor):
|
||||||
{
|
{
|
||||||
'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
|
'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://my.mail.ru/mail/cloud-strife/video/embed/Games/2009',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://videoapi.my.mail.ru/videos/embed/mail/cloud-strife/Games/2009.html',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -110,7 +119,7 @@ class MailRuIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
page_config = self._parse_json(self._search_regex([
|
page_config = self._parse_json(self._search_regex([
|
||||||
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
||||||
r'(?s)"video":\s*(\{.+?\}),'],
|
r'(?s)"video":\s*({.+?}),'],
|
||||||
webpage, 'page config', default='{}'), video_id, fatal=False)
|
webpage, 'page config', default='{}'), video_id, fatal=False)
|
||||||
if page_config:
|
if page_config:
|
||||||
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl')
|
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl')
|
||||||
|
@ -121,7 +130,7 @@ class MailRuIE(InfoExtractor):
|
||||||
|
|
||||||
# fix meta_url if missing the host address
|
# fix meta_url if missing the host address
|
||||||
if re.match(r'^\/\+\/', meta_url):
|
if re.match(r'^\/\+\/', meta_url):
|
||||||
meta_url = 'https://my.mail.ru' + meta_url
|
meta_url = urljoin('https://my.mail.ru', meta_url)
|
||||||
|
|
||||||
if meta_url:
|
if meta_url:
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
|
|
|
@ -1,10 +1,16 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import merge_dicts
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
dict_get,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MallTVIE(InfoExtractor):
|
class MallTVIE(InfoExtractor):
|
||||||
|
@ -17,7 +23,7 @@ class MallTVIE(InfoExtractor):
|
||||||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||||
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
|
'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
|
||||||
'duration': 216,
|
'duration': 216,
|
||||||
'timestamp': 1538870400,
|
'timestamp': 1538870400,
|
||||||
'upload_date': '20181007',
|
'upload_date': '20181007',
|
||||||
|
@ -37,20 +43,46 @@ class MallTVIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, display_id, headers=self.geo_verification_headers())
|
url, display_id, headers=self.geo_verification_headers())
|
||||||
|
|
||||||
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
|
video = self._parse_json(self._search_regex(
|
||||||
|
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
|
||||||
|
webpage, 'video object'), display_id)
|
||||||
|
video_source = video['VideoSource']
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
SOURCE_RE, webpage, 'video id', group='id')
|
r'/([\da-z]+)/index\b', video_source, 'video id')
|
||||||
|
|
||||||
media = self._parse_html5_media_entries(
|
formats = self._extract_m3u8_formats(
|
||||||
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
|
video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
|
||||||
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for s in (video.get('Subtitles') or {}):
|
||||||
|
s_url = s.get('Url')
|
||||||
|
if not s_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(s.get('Language') or 'cz', []).append({
|
||||||
|
'url': s_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
entity_counts = video.get('EntityCounts') or {}
|
||||||
|
|
||||||
|
def get_count(k):
|
||||||
|
v = entity_counts.get(k + 's') or {}
|
||||||
|
return int_or_none(dict_get(v, ('Count', 'StrCount')))
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
return merge_dicts(media, info, {
|
return merge_dicts({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': self._og_search_title(webpage, default=None) or display_id,
|
'title': video.get('Title'),
|
||||||
'description': self._og_search_description(webpage, default=None),
|
'description': clean_html(video.get('Description')),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': video.get('ThumbnailUrl'),
|
||||||
})
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
|
||||||
|
'view_count': get_count('View'),
|
||||||
|
'like_count': get_count('Like'),
|
||||||
|
'dislike_count': get_count('Dislike'),
|
||||||
|
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
|
||||||
|
'comment_count': get_count('Comment'),
|
||||||
|
}, info)
|
||||||
|
|
|
@ -0,0 +1,131 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MedalTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
|
||||||
|
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '34934644',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Quad Cold',
|
||||||
|
'description': 'Medal,https://medal.tv/desktop/',
|
||||||
|
'uploader': 'MowgliSB',
|
||||||
|
'timestamp': 1603165266,
|
||||||
|
'upload_date': '20201020',
|
||||||
|
'uploader_id': 10619174,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://medal.tv/clips/36787208',
|
||||||
|
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '36787208',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'u tk me i tk u bigger',
|
||||||
|
'description': 'Medal,https://medal.tv/desktop/',
|
||||||
|
'uploader': 'Mimicc',
|
||||||
|
'timestamp': 1605580939,
|
||||||
|
'upload_date': '20201117',
|
||||||
|
'uploader_id': 5156321,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
hydration_data = self._parse_json(self._search_regex(
|
||||||
|
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
|
||||||
|
webpage, 'hydration data', default='{}'), video_id)
|
||||||
|
|
||||||
|
clip = try_get(
|
||||||
|
hydration_data, lambda x: x['clips'][video_id], dict) or {}
|
||||||
|
if not clip:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Could not find video information.', video_id=video_id)
|
||||||
|
|
||||||
|
title = clip['contentTitle']
|
||||||
|
|
||||||
|
source_width = int_or_none(clip.get('sourceWidth'))
|
||||||
|
source_height = int_or_none(clip.get('sourceHeight'))
|
||||||
|
|
||||||
|
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
|
||||||
|
|
||||||
|
def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||||
|
item_id = item_id or '%dp' % height
|
||||||
|
if item_id not in item_url:
|
||||||
|
return
|
||||||
|
width = int(round(aspect_ratio * height))
|
||||||
|
container.append({
|
||||||
|
'url': item_url,
|
||||||
|
id_key: item_id,
|
||||||
|
'width': width,
|
||||||
|
'height': height
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
thumbnails = []
|
||||||
|
for k, v in clip.items():
|
||||||
|
if not (v and isinstance(v, compat_str)):
|
||||||
|
continue
|
||||||
|
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
|
||||||
|
if not mobj:
|
||||||
|
continue
|
||||||
|
prefix = mobj.group(1)
|
||||||
|
height = int_or_none(mobj.group(2))
|
||||||
|
if prefix == 'contentUrl':
|
||||||
|
add_item(
|
||||||
|
formats, v, height or source_height,
|
||||||
|
item_id=None if height else 'source')
|
||||||
|
elif prefix == 'thumbnail':
|
||||||
|
add_item(thumbnails, v, height, 'id')
|
||||||
|
|
||||||
|
error = clip.get('error')
|
||||||
|
if not formats and error:
|
||||||
|
if error == 404:
|
||||||
|
raise ExtractorError(
|
||||||
|
'That clip does not exist.',
|
||||||
|
expected=True, video_id=video_id)
|
||||||
|
else:
|
||||||
|
raise ExtractorError(
|
||||||
|
'An unknown error occurred ({0}).'.format(error),
|
||||||
|
video_id=video_id)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
# Necessary because the id of the author is not known in advance.
|
||||||
|
# Won't raise an issue if no profile can be found as this is optional.
|
||||||
|
author = try_get(
|
||||||
|
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
|
||||||
|
author_id = str_or_none(author.get('id'))
|
||||||
|
author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'description': clip.get('contentDescription'),
|
||||||
|
'uploader': author.get('displayName'),
|
||||||
|
'timestamp': float_or_none(clip.get('created'), 1000),
|
||||||
|
'uploader_id': author_id,
|
||||||
|
'uploader_url': author_url,
|
||||||
|
'duration': int_or_none(clip.get('videoLengthSeconds')),
|
||||||
|
'view_count': int_or_none(clip.get('views')),
|
||||||
|
'like_count': int_or_none(clip.get('likes')),
|
||||||
|
'comment_count': int_or_none(clip.get('comments')),
|
||||||
|
}
|
|
@ -17,9 +17,8 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class MGTVIE(InfoExtractor):
|
class MGTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||||
IE_DESC = '芒果TV'
|
IE_DESC = '芒果TV'
|
||||||
_GEO_COUNTRIES = ['CN']
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||||
|
@ -34,14 +33,18 @@ class MGTVIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://w.mgtv.com/b/301817/3826653.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
|
||||||
try:
|
try:
|
||||||
api_data = self._download_json(
|
api_data = self._download_json(
|
||||||
'https://pcweb.api.mgtv.com/player/video', video_id, query={
|
'https://pcweb.api.mgtv.com/player/video', video_id, query={
|
||||||
'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1],
|
'tk2': tk2,
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
}, headers=self.geo_verification_headers())['data']
|
}, headers=self.geo_verification_headers())['data']
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
@ -56,6 +59,7 @@ class MGTVIE(InfoExtractor):
|
||||||
stream_data = self._download_json(
|
stream_data = self._download_json(
|
||||||
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
||||||
'pm2': api_data['atc']['pm2'],
|
'pm2': api_data['atc']['pm2'],
|
||||||
|
'tk2': tk2,
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
}, headers=self.geo_verification_headers())['data']
|
}, headers=self.geo_verification_headers())['data']
|
||||||
stream_domain = stream_data['stream_domain'][0]
|
stream_domain = stream_data['stream_domain'][0]
|
||||||
|
|
|
@ -403,6 +403,18 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def extract_child_with_type(parent, t):
|
||||||
|
children = parent['children']
|
||||||
|
return next(c for c in children if c.get('type') == t)
|
||||||
|
|
||||||
|
def _extract_mgid(self, webpage):
|
||||||
|
data = self._parse_json(self._search_regex(
|
||||||
|
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||||
|
main_container = self.extract_child_with_type(data, 'MainContainer')
|
||||||
|
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
|
||||||
|
return video_player['props']['media']['video']['config']['uri']
|
||||||
|
|
||||||
|
|
||||||
class MTVJapanIE(MTVServicesInfoExtractor):
|
class MTVJapanIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = 'mtvjapan'
|
IE_NAME = 'mtvjapan'
|
||||||
|
|
|
@ -10,7 +10,6 @@ from .adobepass import AdobePassIE
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
parse_duration,
|
parse_duration,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -394,8 +393,8 @@ class NBCNewsIE(ThePlatformIE):
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
data = self._parse_json(self._search_regex(
|
data = self._parse_json(self._search_regex(
|
||||||
r'window\.__data\s*=\s*({.+});', webpage,
|
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||||
'bootstrap json'), video_id, js_to_json)
|
webpage, 'bootstrap json'), video_id)['props']['initialState']
|
||||||
video_data = try_get(data, lambda x: x['video']['current'], dict)
|
video_data = try_get(data, lambda x: x['video']['current'], dict)
|
||||||
if not video_data:
|
if not video_data:
|
||||||
video_data = data['article']['content'][0]['primaryMedia']['video']
|
video_data = data['article']['content'][0]['primaryMedia']['video']
|
||||||
|
|
|
@ -82,6 +82,29 @@ class NDRIE(NDRBaseIE):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# with subtitles
|
||||||
|
'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'extra18674',
|
||||||
|
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
||||||
|
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
|
||||||
|
'uploader': 'ndrtv',
|
||||||
|
'upload_date': '20201113',
|
||||||
|
'duration': 1749,
|
||||||
|
'subtitles': {
|
||||||
|
'de': [{
|
||||||
|
'ext': 'ttml',
|
||||||
|
'url': r're:^https://www\.ndr\.de.+',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -242,6 +265,20 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||||
'preference': quality_key(thumbnail.get('quality')),
|
'preference': quality_key(thumbnail.get('quality')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
tracks = config.get('tracks')
|
||||||
|
if tracks and isinstance(tracks, list):
|
||||||
|
for track in tracks:
|
||||||
|
if not isinstance(track, dict):
|
||||||
|
continue
|
||||||
|
track_url = urljoin(url, track.get('src'))
|
||||||
|
if not track_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(track.get('srclang') or 'de', []).append({
|
||||||
|
'url': track_url,
|
||||||
|
'ext': 'ttml',
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -251,6 +288,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ class NprIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
}, {
|
}, {
|
||||||
# mutlimedia, not media title
|
# multimedia, not media title
|
||||||
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
|
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '533198237',
|
'id': '533198237',
|
||||||
|
|
|
@ -9,6 +9,7 @@ from ..compat import (
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
@ -16,17 +17,269 @@ from ..utils import (
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
try_get,
|
try_get,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NRKBaseIE(InfoExtractor):
|
class NRKBaseIE(InfoExtractor):
|
||||||
_GEO_COUNTRIES = ['NO']
|
_GEO_COUNTRIES = ['NO']
|
||||||
|
|
||||||
_api_host = None
|
|
||||||
|
class NRKIE(NRKBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
nrk:|
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
|
||||||
|
v8[-.]psapi\.nrk\.no/mediaelement/
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(?P<id>[^?\#&]+)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# video
|
||||||
|
'url': 'http://www.nrk.no/video/PS*150533',
|
||||||
|
'md5': '706f34cdf1322577589e369e522b50ef',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '150533',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dompap og andre fugler i Piip-Show',
|
||||||
|
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||||
|
'duration': 262,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# audio
|
||||||
|
'url': 'http://www.nrk.no/video/PS*154915',
|
||||||
|
# MD5 is unstable
|
||||||
|
'info_dict': {
|
||||||
|
'id': '154915',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Slik høres internett ut når du er blind',
|
||||||
|
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||||
|
'duration': 20,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_from_playback(self, video_id):
|
||||||
|
manifest = self._download_json(
|
||||||
|
'http://psapi.nrk.no/playback/manifest/%s' % video_id,
|
||||||
|
video_id, 'Downloading manifest JSON')
|
||||||
|
|
||||||
|
playable = manifest['playable']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for asset in playable['assets']:
|
||||||
|
if not isinstance(asset, dict):
|
||||||
|
continue
|
||||||
|
if asset.get('encrypted'):
|
||||||
|
continue
|
||||||
|
format_url = url_or_none(asset.get('url'))
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'http://psapi.nrk.no/playback/metadata/%s' % video_id,
|
||||||
|
video_id, 'Downloading metadata JSON')
|
||||||
|
|
||||||
|
preplay = data['preplay']
|
||||||
|
titles = preplay['titles']
|
||||||
|
title = titles['title']
|
||||||
|
alt_title = titles.get('subtitle')
|
||||||
|
|
||||||
|
description = preplay.get('description')
|
||||||
|
duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for image in try_get(
|
||||||
|
preplay, lambda x: x['poster']['images'], list) or []:
|
||||||
|
if not isinstance(image, dict):
|
||||||
|
continue
|
||||||
|
image_url = url_or_none(image.get('url'))
|
||||||
|
if not image_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': image_url,
|
||||||
|
'width': int_or_none(image.get('pixelWidth')),
|
||||||
|
'height': int_or_none(image.get('pixelHeight')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'alt_title': alt_title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
return self._extract_from_playback(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class NRKTVIE(NRKBaseIE):
|
||||||
|
IE_DESC = 'NRK TV and NRK Radio'
|
||||||
|
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:tv|radio)\.nrk(?:super)?\.no/
|
||||||
|
(?:serie(?:/[^/]+){1,2}|program)/
|
||||||
|
(?![Ee]pisodes)%s
|
||||||
|
(?:/\d{2}-\d{2}-\d{4})?
|
||||||
|
(?:\#del=(?P<part_id>\d+))?
|
||||||
|
''' % _EPISODE_RE
|
||||||
|
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
||||||
|
'md5': '8270824df46ec629b66aeaa5796b36fb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MDDP12000117AA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Alarm Trolltunga',
|
||||||
|
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||||
|
'duration': 2223,
|
||||||
|
'age_limit': 6,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
|
'md5': '9a167e54d04671eb6317a37b7bc8a280',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MUHH48000314AA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '20 spørsmål 23.05.2014',
|
||||||
|
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||||
|
'duration': 1741,
|
||||||
|
'series': '20 spørsmål',
|
||||||
|
'episode': '23.05.2014',
|
||||||
|
},
|
||||||
|
'skip': 'NoProgramRights',
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MDFP15000514CA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||||
|
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||||
|
'duration': 4605,
|
||||||
|
'series': 'Kunnskapskanalen',
|
||||||
|
'episode': '24.05.2014',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# single playlist video
|
||||||
|
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSPO40010515-part2',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||||
|
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Video is geo restricted'],
|
||||||
|
'skip': 'particular part is not supported currently',
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSPO40010515AH',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
||||||
|
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||||
|
'duration': 772,
|
||||||
|
'series': 'Tour de Ski',
|
||||||
|
'episode': '06.01.2015',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSPO40010515BH',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
||||||
|
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||||
|
'duration': 6175,
|
||||||
|
'series': 'Tour de Ski',
|
||||||
|
'episode': '06.01.2015',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSPO40010515',
|
||||||
|
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||||
|
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Video is geo restricted'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'KMTE50001317AA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Anno 13:30',
|
||||||
|
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
||||||
|
'duration': 2340,
|
||||||
|
'series': 'Anno',
|
||||||
|
'episode': '13:30',
|
||||||
|
'season_number': 3,
|
||||||
|
'episode_number': 13,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MUHH46000317AA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nytt på Nytt 27.01.2017',
|
||||||
|
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
||||||
|
'duration': 1796,
|
||||||
|
'series': 'Nytt på nytt',
|
||||||
|
'episode': '27.01.2017',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_api_host = None
|
||||||
|
|
||||||
|
def _extract_from_mediaelement(self, video_id):
|
||||||
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
||||||
|
|
||||||
for api_host in api_hosts:
|
for api_host in api_hosts:
|
||||||
|
@ -195,190 +448,9 @@ class NRKBaseIE(InfoExtractor):
|
||||||
|
|
||||||
return self.playlist_result(entries, video_id, title, description)
|
return self.playlist_result(entries, video_id, title, description)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
class NRKIE(NRKBaseIE):
|
video_id = self._match_id(url)
|
||||||
_VALID_URL = r'''(?x)
|
return self._extract_from_mediaelement(video_id)
|
||||||
(?:
|
|
||||||
nrk:|
|
|
||||||
https?://
|
|
||||||
(?:
|
|
||||||
(?:www\.)?nrk\.no/video/PS\*|
|
|
||||||
v8[-.]psapi\.nrk\.no/mediaelement/
|
|
||||||
)
|
|
||||||
)
|
|
||||||
(?P<id>[^?#&]+)
|
|
||||||
'''
|
|
||||||
_API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
|
|
||||||
_TESTS = [{
|
|
||||||
# video
|
|
||||||
'url': 'http://www.nrk.no/video/PS*150533',
|
|
||||||
'md5': '706f34cdf1322577589e369e522b50ef',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '150533',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Dompap og andre fugler i Piip-Show',
|
|
||||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
|
||||||
'duration': 262,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# audio
|
|
||||||
'url': 'http://www.nrk.no/video/PS*154915',
|
|
||||||
# MD5 is unstable
|
|
||||||
'info_dict': {
|
|
||||||
'id': '154915',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Slik høres internett ut når du er blind',
|
|
||||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
|
||||||
'duration': 20,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
|
|
||||||
class NRKTVIE(NRKBaseIE):
|
|
||||||
IE_DESC = 'NRK TV and NRK Radio'
|
|
||||||
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https?://
|
|
||||||
(?:tv|radio)\.nrk(?:super)?\.no/
|
|
||||||
(?:serie(?:/[^/]+){1,2}|program)/
|
|
||||||
(?![Ee]pisodes)%s
|
|
||||||
(?:/\d{2}-\d{2}-\d{4})?
|
|
||||||
(?:\#del=(?P<part_id>\d+))?
|
|
||||||
''' % _EPISODE_RE
|
|
||||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
|
||||||
'md5': '8270824df46ec629b66aeaa5796b36fb',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MDDP12000117AA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Alarm Trolltunga',
|
|
||||||
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
|
||||||
'duration': 2223,
|
|
||||||
'age_limit': 6,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
|
||||||
'md5': '9a167e54d04671eb6317a37b7bc8a280',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MUHH48000314AA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '20 spørsmål 23.05.2014',
|
|
||||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
|
||||||
'duration': 1741,
|
|
||||||
'series': '20 spørsmål',
|
|
||||||
'episode': '23.05.2014',
|
|
||||||
},
|
|
||||||
'skip': 'NoProgramRights',
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MDFP15000514CA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
|
||||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
|
||||||
'duration': 4605,
|
|
||||||
'series': 'Kunnskapskanalen',
|
|
||||||
'episode': '24.05.2014',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# single playlist video
|
|
||||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MSPO40010515-part2',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
|
||||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['Video is geo restricted'],
|
|
||||||
'skip': 'particular part is not supported currently',
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
|
||||||
'playlist': [{
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MSPO40010515AH',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
|
||||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
|
||||||
'duration': 772,
|
|
||||||
'series': 'Tour de Ski',
|
|
||||||
'episode': '06.01.2015',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MSPO40010515BH',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
|
||||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
|
||||||
'duration': 6175,
|
|
||||||
'series': 'Tour de Ski',
|
|
||||||
'episode': '06.01.2015',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}],
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MSPO40010515',
|
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
|
||||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
|
||||||
},
|
|
||||||
'expected_warnings': ['Video is geo restricted'],
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'KMTE50001317AA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Anno 13:30',
|
|
||||||
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
|
||||||
'duration': 2340,
|
|
||||||
'series': 'Anno',
|
|
||||||
'episode': '13:30',
|
|
||||||
'season_number': 3,
|
|
||||||
'episode_number': 13,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MUHH46000317AA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Nytt på Nytt 27.01.2017',
|
|
||||||
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
|
||||||
'duration': 1796,
|
|
||||||
'series': 'Nytt på nytt',
|
|
||||||
'episode': '27.01.2017',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
|
|
||||||
class NRKTVEpisodeIE(InfoExtractor):
|
class NRKTVEpisodeIE(InfoExtractor):
|
||||||
|
|
|
@ -221,3 +221,41 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||||
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
||||||
webpage, 'podcast data')
|
webpage, 'podcast data')
|
||||||
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
||||||
|
|
||||||
|
|
||||||
|
class NYTimesCookingIE(NYTimesBaseIE):
|
||||||
|
_VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||||
|
'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100000004756089',
|
||||||
|
'ext': 'mov',
|
||||||
|
'timestamp': 1479383008,
|
||||||
|
'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
|
||||||
|
'title': 'Cranberry Tart',
|
||||||
|
'upload_date': '20161117',
|
||||||
|
'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||||
|
'md5': '4b2e8c70530a89b8d905a2b572316eb8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100000003951728',
|
||||||
|
'ext': 'mov',
|
||||||
|
'timestamp': 1445509539,
|
||||||
|
'description': 'Turkey guide',
|
||||||
|
'upload_date': '20151022',
|
||||||
|
'title': 'Turkey',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
page_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||||
|
|
||||||
|
return self._extract_video_from_id(video_id)
|
||||||
|
|
|
@ -477,7 +477,7 @@ class PBSIE(InfoExtractor):
|
||||||
if media_id:
|
if media_id:
|
||||||
return media_id, presumptive_id, upload_date, description
|
return media_id, presumptive_id, upload_date, description
|
||||||
|
|
||||||
# Fronline video embedded via flp
|
# Frontline video embedded via flp
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
|
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
|
||||||
if video_id:
|
if video_id:
|
||||||
|
|
|
@ -0,0 +1,201 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PinterestBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'
|
||||||
|
|
||||||
|
def _call_api(self, resource, video_id, options):
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.pinterest.com/resource/%sResource/get/' % resource,
|
||||||
|
video_id, 'Download %s JSON metadata' % resource, query={
|
||||||
|
'data': json.dumps({'options': options})
|
||||||
|
})['resource_response']
|
||||||
|
|
||||||
|
def _extract_video(self, data, extract_formats=True):
|
||||||
|
video_id = data['id']
|
||||||
|
|
||||||
|
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
duration = None
|
||||||
|
if extract_formats:
|
||||||
|
for format_id, format_dict in data['videos']['video_list'].items():
|
||||||
|
if not isinstance(format_dict, dict):
|
||||||
|
continue
|
||||||
|
format_url = url_or_none(format_dict.get('url'))
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'width': int_or_none(format_dict.get('width')),
|
||||||
|
'height': int_or_none(format_dict.get('height')),
|
||||||
|
'duration': duration,
|
||||||
|
})
|
||||||
|
self._sort_formats(
|
||||||
|
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||||
|
|
||||||
|
description = data.get('description') or data.get('description_html') or data.get('seo_description')
|
||||||
|
timestamp = unified_timestamp(data.get('created_at'))
|
||||||
|
|
||||||
|
def _u(field):
|
||||||
|
return try_get(data, lambda x: x['closeup_attribution'][field], compat_str)
|
||||||
|
|
||||||
|
uploader = _u('full_name')
|
||||||
|
uploader_id = _u('id')
|
||||||
|
|
||||||
|
repost_count = int_or_none(data.get('repin_count'))
|
||||||
|
comment_count = int_or_none(data.get('comment_count'))
|
||||||
|
categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list)
|
||||||
|
tags = data.get('hashtags')
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
images = data.get('images')
|
||||||
|
if isinstance(images, dict):
|
||||||
|
for thumbnail_id, thumbnail in images.items():
|
||||||
|
if not isinstance(thumbnail, dict):
|
||||||
|
continue
|
||||||
|
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int_or_none(thumbnail.get('width')),
|
||||||
|
'height': int_or_none(thumbnail.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'repost_count': repost_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'categories': categories,
|
||||||
|
'tags': tags,
|
||||||
|
'formats': formats,
|
||||||
|
'extractor_key': PinterestIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PinterestIE(PinterestBaseIE):
|
||||||
|
_VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.pinterest.com/pin/664281013778109217/',
|
||||||
|
'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '664281013778109217',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Origami',
|
||||||
|
'description': 'md5:b9d90ddf7848e897882de9e73344f7dd',
|
||||||
|
'duration': 57.7,
|
||||||
|
'timestamp': 1593073622,
|
||||||
|
'upload_date': '20200625',
|
||||||
|
'uploader': 'Love origami -I am Dafei',
|
||||||
|
'uploader_id': '586523688879454212',
|
||||||
|
'repost_count': 50,
|
||||||
|
'comment_count': 0,
|
||||||
|
'categories': list,
|
||||||
|
'tags': list,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://co.pinterest.com/pin/824721750502199491/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
data = self._call_api(
|
||||||
|
'Pin', video_id, {
|
||||||
|
'field_set_key': 'unauth_react_main_pin',
|
||||||
|
'id': video_id,
|
||||||
|
})['data']
|
||||||
|
return self._extract_video(data)
|
||||||
|
|
||||||
|
|
||||||
|
class PinterestCollectionIE(PinterestBaseIE):
|
||||||
|
_VALID_URL = r'%s/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.pinterest.ca/mashal0407/cool-diys/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '585890301462791043',
|
||||||
|
'title': 'cool diys',
|
||||||
|
},
|
||||||
|
'playlist_count': 8,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pinterest.ca/fudohub/videos/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '682858430939307450',
|
||||||
|
'title': 'VIDEOS',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 365,
|
||||||
|
'skip': 'Test with extract_formats=False',
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if PinterestIE.suitable(url) else super(
|
||||||
|
PinterestCollectionIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
username, slug = re.match(self._VALID_URL, url).groups()
|
||||||
|
board = self._call_api(
|
||||||
|
'Board', slug, {
|
||||||
|
'slug': slug,
|
||||||
|
'username': username
|
||||||
|
})['data']
|
||||||
|
board_id = board['id']
|
||||||
|
options = {
|
||||||
|
'board_id': board_id,
|
||||||
|
'page_size': 250,
|
||||||
|
}
|
||||||
|
bookmark = None
|
||||||
|
entries = []
|
||||||
|
while True:
|
||||||
|
if bookmark:
|
||||||
|
options['bookmarks'] = [bookmark]
|
||||||
|
board_feed = self._call_api('BoardFeed', board_id, options)
|
||||||
|
for item in (board_feed.get('data') or []):
|
||||||
|
if not isinstance(item, dict) or item.get('type') != 'pin':
|
||||||
|
continue
|
||||||
|
video_id = item.get('id')
|
||||||
|
if video_id:
|
||||||
|
# Some pins may not be available anonymously via pin URL
|
||||||
|
# video = self._extract_video(item, extract_formats=False)
|
||||||
|
# video.update({
|
||||||
|
# '_type': 'url_transparent',
|
||||||
|
# 'url': 'https://www.pinterest.com/pin/%s/' % video_id,
|
||||||
|
# })
|
||||||
|
# entries.append(video)
|
||||||
|
entries.append(self._extract_video(item))
|
||||||
|
bookmark = board_feed.get('bookmark')
|
||||||
|
if not bookmark:
|
||||||
|
break
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id=board_id, playlist_title=board.get('name'))
|
|
@ -16,6 +16,7 @@ from ..utils import (
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
remove_start,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
@ -30,7 +31,6 @@ class RaiBaseIE(InfoExtractor):
|
||||||
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
||||||
_GEO_COUNTRIES = ['IT']
|
_GEO_COUNTRIES = ['IT']
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_BASE_URL = 'https://www.raiplay.it'
|
|
||||||
|
|
||||||
def _extract_relinker_info(self, relinker_url, video_id):
|
def _extract_relinker_info(self, relinker_url, video_id):
|
||||||
if not re.match(r'https?://', relinker_url):
|
if not re.match(r'https?://', relinker_url):
|
||||||
|
@ -68,7 +68,7 @@ class RaiBaseIE(InfoExtractor):
|
||||||
|
|
||||||
# This does not imply geo restriction (e.g.
|
# This does not imply geo restriction (e.g.
|
||||||
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
|
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
|
||||||
if media_url == 'http://download.rai.it/video_no_available.mp4':
|
if '/video_no_available.mp4' in media_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ext = determine_ext(media_url)
|
ext = determine_ext(media_url)
|
||||||
|
@ -123,7 +123,7 @@ class RaiBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class RaiPlayIE(RaiBaseIE):
|
class RaiPlayIE(RaiBaseIE):
|
||||||
_VALID_URL = r'(?P<url>(?P<base>https?://(?:www\.)?raiplay\.it/.+?-)(?P<id>%s)(?P<ext>\.(?:html|json)))' % RaiBaseIE._UUID_RE
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||||
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
||||||
|
@ -131,11 +131,13 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Report del 07/04/2014',
|
'title': 'Report del 07/04/2014',
|
||||||
'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014 ',
|
'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014',
|
||||||
'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
|
'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Rai Gulp',
|
'uploader': 'Rai Gulp',
|
||||||
'duration': 6160,
|
'duration': 6160,
|
||||||
|
'series': 'Report',
|
||||||
|
'season': '2013/14',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -146,11 +148,10 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
base, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
url, base, video_id, ext = mobj.group('url', 'base', 'id', 'ext')
|
|
||||||
|
|
||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
'%s%s.json' % (base, video_id), video_id, 'Downloading video JSON')
|
base + '.json', video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
title = media['name']
|
title = media['name']
|
||||||
video = media['video']
|
video = media['video']
|
||||||
|
@ -159,34 +160,39 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
self._sort_formats(relinker_info['formats'])
|
self._sort_formats(relinker_info['formats'])
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
if 'images' in media:
|
for _, value in media.get('images', {}).items():
|
||||||
for _, value in media.get('images').items():
|
if value:
|
||||||
if value:
|
thumbnails.append({
|
||||||
thumbnails.append({
|
'url': urljoin(url, value),
|
||||||
'url': urljoin(RaiBaseIE._BASE_URL, value.replace('[RESOLUTION]', '600x400'))
|
})
|
||||||
})
|
|
||||||
|
|
||||||
timestamp = unified_timestamp(try_get(
|
date_published = media.get('date_published')
|
||||||
media, lambda x: x['availabilities'][0]['start'], compat_str))
|
time_published = media.get('time_published')
|
||||||
|
if date_published and time_published:
|
||||||
|
date_published += ' ' + time_published
|
||||||
|
|
||||||
subtitles = self._extract_subtitles(url, video.get('subtitles'))
|
subtitles = self._extract_subtitles(url, video.get('subtitles'))
|
||||||
|
|
||||||
|
program_info = media.get('program_info') or {}
|
||||||
|
season = media.get('season')
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
|
||||||
|
'display_id': video_id,
|
||||||
'title': self._live_title(title) if relinker_info.get(
|
'title': self._live_title(title) if relinker_info.get(
|
||||||
'is_live') else title,
|
'is_live') else title,
|
||||||
'alt_title': media.get('subtitle'),
|
'alt_title': strip_or_none(media.get('subtitle')),
|
||||||
'description': media.get('description'),
|
'description': media.get('description'),
|
||||||
'uploader': strip_or_none(media.get('channel')),
|
'uploader': strip_or_none(media.get('channel')),
|
||||||
'creator': strip_or_none(media.get('editor')),
|
'creator': strip_or_none(media.get('editor') or None),
|
||||||
'duration': parse_duration(video.get('duration')),
|
'duration': parse_duration(video.get('duration')),
|
||||||
'timestamp': timestamp,
|
'timestamp': unified_timestamp(date_published),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'series': try_get(
|
'series': program_info.get('name'),
|
||||||
media, lambda x: x['isPartOf']['name'], compat_str),
|
'season_number': int_or_none(season),
|
||||||
'season_number': int_or_none(try_get(
|
'season': season if (season and not season.isdigit()) else None,
|
||||||
media, lambda x: x['isPartOf']['numeroStagioni'])),
|
'episode': media.get('episode_title'),
|
||||||
'season': media.get('stagione') or None,
|
'episode_number': int_or_none(media.get('episode')),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -194,9 +200,9 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
class RaiPlayLiveIE(RaiBaseIE):
|
class RaiPlayLiveIE(RaiPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.raiplay.it/dirette/rainews24',
|
'url': 'http://www.raiplay.it/dirette/rainews24',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
||||||
|
@ -211,40 +217,11 @@ class RaiPlayLiveIE(RaiBaseIE):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
media = self._download_json(
|
|
||||||
'%s.json' % urljoin(RaiBaseIE._BASE_URL, 'dirette/' + display_id),
|
|
||||||
display_id, 'Downloading channel JSON')
|
|
||||||
|
|
||||||
title = media['name']
|
|
||||||
video = media['video']
|
|
||||||
video_id = media['id'].replace('ContentItem-', '')
|
|
||||||
|
|
||||||
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
|
|
||||||
self._sort_formats(relinker_info['formats'])
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': self._live_title(title) if relinker_info.get(
|
|
||||||
'is_live') else title,
|
|
||||||
'alt_title': media.get('subtitle'),
|
|
||||||
'description': media.get('description'),
|
|
||||||
'uploader': strip_or_none(media.get('channel')),
|
|
||||||
'creator': strip_or_none(media.get('editor')),
|
|
||||||
'duration': parse_duration(video.get('duration')),
|
|
||||||
}
|
|
||||||
|
|
||||||
info.update(relinker_info)
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
class RaiPlayPlaylistIE(InfoExtractor):
|
class RaiPlayPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
|
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -256,29 +233,34 @@ class RaiPlayPlaylistIE(InfoExtractor):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
base, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
media = self._download_json(
|
program = self._download_json(
|
||||||
'%s.json' % urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id),
|
base + '.json', playlist_id, 'Downloading program JSON')
|
||||||
playlist_id, 'Downloading program JSON')
|
|
||||||
|
|
||||||
title = media['name']
|
|
||||||
description = media['program_info']['description']
|
|
||||||
|
|
||||||
content_sets = [s['id'] for b in media['blocks'] for s in b['sets']]
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for cs in content_sets:
|
for b in (program.get('blocks') or []):
|
||||||
medias = self._download_json(
|
for s in (b.get('sets') or []):
|
||||||
'%s/%s.json' % (urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id), cs),
|
s_id = s.get('id')
|
||||||
cs, 'Downloading content set JSON')
|
if not s_id:
|
||||||
for m in medias['items']:
|
continue
|
||||||
video_url = urljoin(url, m['path_id'])
|
medias = self._download_json(
|
||||||
entries.append(self.url_result(
|
'%s/%s.json' % (base, s_id), s_id,
|
||||||
video_url, ie=RaiPlayIE.ie_key(),
|
'Downloading content set JSON', fatal=False)
|
||||||
video_id=RaiPlayIE._match_id(video_url)))
|
if not medias:
|
||||||
|
continue
|
||||||
|
for m in (medias.get('items') or []):
|
||||||
|
path_id = m.get('path_id')
|
||||||
|
if not path_id:
|
||||||
|
continue
|
||||||
|
video_url = urljoin(url, path_id)
|
||||||
|
entries.append(self.url_result(
|
||||||
|
video_url, ie=RaiPlayIE.ie_key(),
|
||||||
|
video_id=RaiPlayIE._match_id(video_url)))
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, program.get('name'),
|
||||||
|
try_get(program, lambda x: x['program_info']['description']))
|
||||||
|
|
||||||
|
|
||||||
class RaiIE(RaiBaseIE):
|
class RaiIE(RaiBaseIE):
|
||||||
|
@ -294,7 +276,8 @@ class RaiIE(RaiBaseIE):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 1758,
|
'duration': 1758,
|
||||||
'upload_date': '20140612',
|
'upload_date': '20140612',
|
||||||
}
|
},
|
||||||
|
'skip': 'This content is available only in Italy',
|
||||||
}, {
|
}, {
|
||||||
# with ContentItem in many metas
|
# with ContentItem in many metas
|
||||||
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
|
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
|
||||||
|
@ -440,7 +423,7 @@ class RaiIE(RaiBaseIE):
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
relinker_url = self._search_regex(
|
relinker_url = self._proto_relative_url(self._search_regex(
|
||||||
r'''(?x)
|
r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
var\s+videoURL|
|
var\s+videoURL|
|
||||||
|
@ -452,7 +435,7 @@ class RaiIE(RaiBaseIE):
|
||||||
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
|
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
|
||||||
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
|
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
|
||||||
''',
|
''',
|
||||||
webpage, 'relinker URL', group='url')
|
webpage, 'relinker URL', group='url'))
|
||||||
|
|
||||||
relinker_info = self._extract_relinker_info(
|
relinker_info = self._extract_relinker_info(
|
||||||
urljoin(url, relinker_url), video_id)
|
urljoin(url, relinker_url), video_id)
|
||||||
|
|
|
@ -0,0 +1,413 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
|
base_url,
|
||||||
|
url_basename,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RCSBaseIE(InfoExtractor):
|
||||||
|
_ALL_REPLACE = {
|
||||||
|
'media2vam.corriere.it.edgesuite.net':
|
||||||
|
'media2vam-corriere-it.akamaized.net',
|
||||||
|
'media.youreporter.it.edgesuite.net':
|
||||||
|
'media-youreporter-it.akamaized.net',
|
||||||
|
'corrierepmd.corriere.it.edgesuite.net':
|
||||||
|
'corrierepmd-corriere-it.akamaized.net',
|
||||||
|
'media2vam-corriere-it.akamaized.net/fcs.quotidiani/vr/videos/':
|
||||||
|
'video.corriere.it/vr360/videos/',
|
||||||
|
'.net//': '.net/',
|
||||||
|
}
|
||||||
|
_MP4_REPLACE = {
|
||||||
|
'media2vam.corbologna.corriere.it.edgesuite.net':
|
||||||
|
'media2vam-bologna-corriere-it.akamaized.net',
|
||||||
|
'media2vam.corfiorentino.corriere.it.edgesuite.net':
|
||||||
|
'media2vam-fiorentino-corriere-it.akamaized.net',
|
||||||
|
'media2vam.cormezzogiorno.corriere.it.edgesuite.net':
|
||||||
|
'media2vam-mezzogiorno-corriere-it.akamaized.net',
|
||||||
|
'media2vam.corveneto.corriere.it.edgesuite.net':
|
||||||
|
'media2vam-veneto-corriere-it.akamaized.net',
|
||||||
|
'media2.oggi.it.edgesuite.net':
|
||||||
|
'media2-oggi-it.akamaized.net',
|
||||||
|
'media2.quimamme.it.edgesuite.net':
|
||||||
|
'media2-quimamme-it.akamaized.net',
|
||||||
|
'media2.amica.it.edgesuite.net':
|
||||||
|
'media2-amica-it.akamaized.net',
|
||||||
|
'media2.living.corriere.it.edgesuite.net':
|
||||||
|
'media2-living-corriere-it.akamaized.net',
|
||||||
|
'media2.style.corriere.it.edgesuite.net':
|
||||||
|
'media2-style-corriere-it.akamaized.net',
|
||||||
|
'media2.iodonna.it.edgesuite.net':
|
||||||
|
'media2-iodonna-it.akamaized.net',
|
||||||
|
'media2.leitv.it.edgesuite.net':
|
||||||
|
'media2-leitv-it.akamaized.net',
|
||||||
|
}
|
||||||
|
_MIGRATION_MAP = {
|
||||||
|
'videoamica-vh.akamaihd': 'amica',
|
||||||
|
'media2-amica-it.akamaized': 'amica',
|
||||||
|
'corrierevam-vh.akamaihd': 'corriere',
|
||||||
|
'media2vam-corriere-it.akamaized': 'corriere',
|
||||||
|
'cormezzogiorno-vh.akamaihd': 'corrieredelmezzogiorno',
|
||||||
|
'media2vam-mezzogiorno-corriere-it.akamaized': 'corrieredelmezzogiorno',
|
||||||
|
'corveneto-vh.akamaihd': 'corrieredelveneto',
|
||||||
|
'media2vam-veneto-corriere-it.akamaized': 'corrieredelveneto',
|
||||||
|
'corbologna-vh.akamaihd': 'corrieredibologna',
|
||||||
|
'media2vam-bologna-corriere-it.akamaized': 'corrieredibologna',
|
||||||
|
'corfiorentino-vh.akamaihd': 'corrierefiorentino',
|
||||||
|
'media2vam-fiorentino-corriere-it.akamaized': 'corrierefiorentino',
|
||||||
|
'corinnovazione-vh.akamaihd': 'corriereinnovazione',
|
||||||
|
'media2-gazzanet-gazzetta-it.akamaized': 'gazzanet',
|
||||||
|
'videogazzanet-vh.akamaihd': 'gazzanet',
|
||||||
|
'videogazzaworld-vh.akamaihd': 'gazzaworld',
|
||||||
|
'gazzettavam-vh.akamaihd': 'gazzetta',
|
||||||
|
'media2vam-gazzetta-it.akamaized': 'gazzetta',
|
||||||
|
'videoiodonna-vh.akamaihd': 'iodonna',
|
||||||
|
'media2-leitv-it.akamaized': 'leitv',
|
||||||
|
'videoleitv-vh.akamaihd': 'leitv',
|
||||||
|
'videoliving-vh.akamaihd': 'living',
|
||||||
|
'media2-living-corriere-it.akamaized': 'living',
|
||||||
|
'media2-oggi-it.akamaized': 'oggi',
|
||||||
|
'videooggi-vh.akamaihd': 'oggi',
|
||||||
|
'media2-quimamme-it.akamaized': 'quimamme',
|
||||||
|
'quimamme-vh.akamaihd': 'quimamme',
|
||||||
|
'videorunning-vh.akamaihd': 'running',
|
||||||
|
'media2-style-corriere-it.akamaized': 'style',
|
||||||
|
'style-vh.akamaihd': 'style',
|
||||||
|
'videostyle-vh.akamaihd': 'style',
|
||||||
|
'media2-stylepiccoli-it.akamaized': 'stylepiccoli',
|
||||||
|
'stylepiccoli-vh.akamaihd': 'stylepiccoli',
|
||||||
|
'doveviaggi-vh.akamaihd': 'viaggi',
|
||||||
|
'media2-doveviaggi-it.akamaized': 'viaggi',
|
||||||
|
'media2-vivimilano-corriere-it.akamaized': 'vivimilano',
|
||||||
|
'vivimilano-vh.akamaihd': 'vivimilano',
|
||||||
|
'media2-youreporter-it.akamaized': 'youreporter'
|
||||||
|
}
|
||||||
|
_MIGRATION_MEDIA = {
|
||||||
|
'advrcs-vh.akamaihd': '',
|
||||||
|
'corriere-f.akamaihd': '',
|
||||||
|
'corrierepmd-corriere-it.akamaized': '',
|
||||||
|
'corrprotetto-vh.akamaihd': '',
|
||||||
|
'gazzetta-f.akamaihd': '',
|
||||||
|
'gazzettapmd-gazzetta-it.akamaized': '',
|
||||||
|
'gazzprotetto-vh.akamaihd': '',
|
||||||
|
'periodici-f.akamaihd': '',
|
||||||
|
'periodicisecure-vh.akamaihd': '',
|
||||||
|
'videocoracademy-vh.akamaihd': ''
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_video_src(self, video):
|
||||||
|
mediaFiles = video.get('mediaProfile').get('mediaFile')
|
||||||
|
src = {}
|
||||||
|
# audio
|
||||||
|
if video.get('mediaType') == 'AUDIO':
|
||||||
|
for aud in mediaFiles:
|
||||||
|
# todo: check
|
||||||
|
src['mp3'] = aud.get('value')
|
||||||
|
# video
|
||||||
|
else:
|
||||||
|
for vid in mediaFiles:
|
||||||
|
if vid.get('mimeType') == 'application/vnd.apple.mpegurl':
|
||||||
|
src['m3u8'] = vid.get('value')
|
||||||
|
if vid.get('mimeType') == 'video/mp4':
|
||||||
|
src['mp4'] = vid.get('value')
|
||||||
|
|
||||||
|
# replace host
|
||||||
|
for t in src:
|
||||||
|
for s, r in self._ALL_REPLACE.items():
|
||||||
|
src[t] = src[t].replace(s, r)
|
||||||
|
for s, r in self._MP4_REPLACE.items():
|
||||||
|
src[t] = src[t].replace(s, r)
|
||||||
|
|
||||||
|
# switch cdn
|
||||||
|
if 'mp4' in src and 'm3u8' in src:
|
||||||
|
if ('-lh.akamaihd' not in src.get('m3u8')
|
||||||
|
and 'akamai' in src.get('mp4')):
|
||||||
|
if 'm3u8' in src:
|
||||||
|
matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('m3u8'))
|
||||||
|
src['m3u8'] = 'https://vod.rcsobjects.it/hls/%s%s' % (
|
||||||
|
self._MIGRATION_MAP[matches.group('host')],
|
||||||
|
matches.group('path').replace(
|
||||||
|
'///', '/').replace(
|
||||||
|
'//', '/').replace(
|
||||||
|
'.csmil', '.urlset'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if 'mp4' in src:
|
||||||
|
matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('mp4'))
|
||||||
|
if matches:
|
||||||
|
if matches.group('host') in self._MIGRATION_MEDIA:
|
||||||
|
vh_stream = 'https://media2.corriereobjects.it'
|
||||||
|
if src.get('mp4').find('fcs.quotidiani_!'):
|
||||||
|
vh_stream = 'https://media2-it.corriereobjects.it'
|
||||||
|
src['mp4'] = '%s%s' % (
|
||||||
|
vh_stream,
|
||||||
|
matches.group('path').replace(
|
||||||
|
'///', '/').replace(
|
||||||
|
'//', '/').replace(
|
||||||
|
'/fcs.quotidiani/mediacenter', '').replace(
|
||||||
|
'/fcs.quotidiani_!/mediacenter', '').replace(
|
||||||
|
'corriere/content/mediacenter/', '').replace(
|
||||||
|
'gazzetta/content/mediacenter/', '')
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
src['mp4'] = 'https://vod.rcsobjects.it/%s%s' % (
|
||||||
|
self._MIGRATION_MAP[matches.group('host')],
|
||||||
|
matches.group('path').replace('///', '/').replace('//', '/')
|
||||||
|
)
|
||||||
|
|
||||||
|
if 'mp3' in src:
|
||||||
|
src['mp3'] = src.get('mp3').replace(
|
||||||
|
'media2vam-corriere-it.akamaized.net',
|
||||||
|
'vod.rcsobjects.it/corriere')
|
||||||
|
if 'mp4' in src:
|
||||||
|
if src.get('mp4').find('fcs.quotidiani_!'):
|
||||||
|
src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||||
|
if 'm3u8' in src:
|
||||||
|
if src.get('m3u8').find('fcs.quotidiani_!'):
|
||||||
|
src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||||
|
|
||||||
|
if 'geoblocking' in video.get('mediaProfile'):
|
||||||
|
if 'm3u8' in src:
|
||||||
|
src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||||
|
if 'mp4' in src:
|
||||||
|
src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||||
|
if 'm3u8' in src:
|
||||||
|
if src.get('m3u8').find('csmil') and src.get('m3u8').find('vod'):
|
||||||
|
src['m3u8'] = src.get('m3u8').replace('.csmil', '.urlset')
|
||||||
|
|
||||||
|
return src
|
||||||
|
|
||||||
|
def _create_formats(self, urls, video_id):
|
||||||
|
formats = []
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False)
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'http-mp4',
|
||||||
|
'url': urls.get('mp4')
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
mobj = re.search(self._VALID_URL, url)
|
||||||
|
|
||||||
|
if 'cdn' not in mobj.groupdict():
|
||||||
|
raise ExtractorError('CDN not found in url: %s' % url)
|
||||||
|
|
||||||
|
# for leitv/youreporter/viaggi don't use the embed page
|
||||||
|
if ((mobj.group('cdn') not in ['leitv.it', 'youreporter.it'])
|
||||||
|
and (mobj.group('vid') == 'video')):
|
||||||
|
url = 'https://video.%s/video-embed/%s' % (mobj.group('cdn'), video_id)
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_data = None
|
||||||
|
# look for json video data url
|
||||||
|
json = self._search_regex(
|
||||||
|
r'''(?x)var url\s*=\s*["']((?:https?:)?
|
||||||
|
//video\.rcs\.it
|
||||||
|
/fragment-includes/video-includes/.+?\.json)["'];''',
|
||||||
|
page, video_id, default=None)
|
||||||
|
if json:
|
||||||
|
if json.startswith('//'):
|
||||||
|
json = 'https:%s' % json
|
||||||
|
video_data = self._download_json(json, video_id)
|
||||||
|
|
||||||
|
# if json url not found, look for json video data directly in the page
|
||||||
|
else:
|
||||||
|
json = self._search_regex(
|
||||||
|
r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)',
|
||||||
|
page, video_id, default=None)
|
||||||
|
if json:
|
||||||
|
video_data = self._parse_json(
|
||||||
|
json, video_id, transform_source=js_to_json)
|
||||||
|
else:
|
||||||
|
# if no video data found try search for iframes
|
||||||
|
emb = RCSEmbedsIE._extract_url(page)
|
||||||
|
if emb:
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': emb,
|
||||||
|
'ie_key': RCSEmbedsIE.ie_key()
|
||||||
|
}
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
|
raise ExtractorError('Video data not found in the page')
|
||||||
|
|
||||||
|
formats = self._create_formats(
|
||||||
|
self._get_video_src(video_data), video_id)
|
||||||
|
|
||||||
|
description = (video_data.get('description')
|
||||||
|
or clean_html(video_data.get('htmlDescription')))
|
||||||
|
uploader = video_data.get('provider') or mobj.group('cdn')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_data.get('title'),
|
||||||
|
'description': description,
|
||||||
|
'uploader': uploader,
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RCSEmbedsIE(RCSBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?P<vid>video)\.
|
||||||
|
(?P<cdn>
|
||||||
|
(?:
|
||||||
|
rcs|
|
||||||
|
(?:corriere\w+\.)?corriere|
|
||||||
|
(?:gazzanet\.)?gazzetta
|
||||||
|
)\.it)
|
||||||
|
/video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.rcs.it/video-embed/iodonna-0001585037',
|
||||||
|
'md5': '623ecc8ffe7299b2d0c1046d8331a9df',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'iodonna-0001585037',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sky Arte racconta Madonna nella serie "Artist to icon"',
|
||||||
|
'description': 'md5:65b09633df9ffee57f48b39e34c9e067',
|
||||||
|
'uploader': 'rcs.it',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
|
||||||
|
'md5': 'a043e3fecbe4d9ed7fc5d888652a5440',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gazzanet-mo05-0000260789',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Valentino Rossi e papà Graziano si divertono col drifting',
|
||||||
|
'description': 'md5:a8bf90d6adafd9815f70fc74c0fc370a',
|
||||||
|
'uploader': 'rcd',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.corriere.it/video-embed/b727632a-f9d0-11ea-91b0-38d50a849abb?player',
|
||||||
|
'match_only': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140',
|
||||||
|
'match_only': True
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _sanitize_urls(urls):
|
||||||
|
# add protocol if missing
|
||||||
|
for i, e in enumerate(urls):
|
||||||
|
if e.startswith('//'):
|
||||||
|
urls[i] = 'https:%s' % e
|
||||||
|
# clean iframes urls
|
||||||
|
for i, e in enumerate(urls):
|
||||||
|
urls[i] = urljoin(base_url(e), url_basename(e))
|
||||||
|
return urls
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
entries = [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(r'''(?x)
|
||||||
|
(?:
|
||||||
|
data-frame-src=|
|
||||||
|
<iframe[^\n]+src=
|
||||||
|
)
|
||||||
|
(["'])
|
||||||
|
(?P<url>(?:https?:)?//video\.
|
||||||
|
(?:
|
||||||
|
rcs|
|
||||||
|
(?:corriere\w+\.)?corriere|
|
||||||
|
(?:gazzanet\.)?gazzetta
|
||||||
|
)
|
||||||
|
\.it/video-embed/.+?)
|
||||||
|
\1''', webpage)]
|
||||||
|
return RCSEmbedsIE._sanitize_urls(entries)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
urls = RCSEmbedsIE._extract_urls(webpage)
|
||||||
|
return urls[0] if urls else None
|
||||||
|
|
||||||
|
|
||||||
|
class RCSIE(RCSBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)https?://(?P<vid>video|viaggi)\.
|
||||||
|
(?P<cdn>
|
||||||
|
(?:
|
||||||
|
corrieredelmezzogiorno\.
|
||||||
|
|corrieredelveneto\.
|
||||||
|
|corrieredibologna\.
|
||||||
|
|corrierefiorentino\.
|
||||||
|
)?corriere\.it
|
||||||
|
|(?:gazzanet\.)?gazzetta\.it)
|
||||||
|
/(?!video-embed/).+?/(?P<id>[^/\?]+)(?=\?|/$|$)'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.corriere.it/sport/formula-1/vettel-guida-ferrari-sf90-mugello-suo-fianco-c-elecrerc-bendato-video-esilarante/b727632a-f9d0-11ea-91b0-38d50a849abb',
|
||||||
|
'md5': '0f4ededc202b0f00b6e509d831e2dcda',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b727632a-f9d0-11ea-91b0-38d50a849abb',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Vettel guida la Ferrari SF90 al Mugello e al suo fianco c\'è Leclerc (bendato): il video è esilarante',
|
||||||
|
'description': 'md5:93b51c9161ac8a64fb2f997b054d0152',
|
||||||
|
'uploader': 'Corriere Tv',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/',
|
||||||
|
'md5': 'da378e4918d2afbf7d61c35abb948d4c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5b7cd134-e2c1-11ea-89b3-b56dd0df2aa2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'La nuova spettacolare attrazione in Norvegia: il ponte sopra Vøringsfossen',
|
||||||
|
'description': 'md5:18b35a291f6746c0c8dacd16e5f5f4f8',
|
||||||
|
'uploader': 'DOVE Viaggi',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.gazzetta.it/video-motogp-catalogna-cadute-dovizioso-vale-rossi/49612410-00ca-11eb-bcd8-30d4253e0140?vclk=Videobar',
|
||||||
|
'md5': 'eedc1b5defd18e67383afef51ff7bdf9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '49612410-00ca-11eb-bcd8-30d4253e0140',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dovizioso, il contatto con Zarco e la caduta. E anche Vale finisce a terra',
|
||||||
|
'description': 'md5:8c6e905dc3b9413218beca11ebd69778',
|
||||||
|
'uploader': 'AMorici',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945',
|
||||||
|
'match_only': True
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class RCSVariousIE(RCSBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)https?://www\.
|
||||||
|
(?P<cdn>
|
||||||
|
leitv\.it|
|
||||||
|
youreporter\.it
|
||||||
|
)/(?:video/)?(?P<id>[^/]+?)(?:$|\?|/)'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.leitv.it/video/marmellata-di-ciliegie-fatta-in-casa/',
|
||||||
|
'md5': '618aaabac32152199c1af86784d4d554',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'marmellata-di-ciliegie-fatta-in-casa',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Marmellata di ciliegie fatta in casa',
|
||||||
|
'description': 'md5:89133864d6aad456dbcf6e7a29f86263',
|
||||||
|
'uploader': 'leitv.it',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youreporter.it/fiume-sesia-3-ottobre-2020/',
|
||||||
|
'md5': '8dccd436b47a830bab5b4a88232f391a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fiume-sesia-3-ottobre-2020',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Fiume Sesia 3 ottobre 2020',
|
||||||
|
'description': 'md5:0070eef1cc884d13c970a4125063de55',
|
||||||
|
'uploader': 'youreporter.it',
|
||||||
|
}
|
||||||
|
}]
|
|
@ -0,0 +1,67 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RumbleEmbedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://rumble.com/embed/v5pv5f',
|
||||||
|
'md5': '36a18a049856720189f30977ccbb2c34',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v5pv5f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
|
||||||
|
'timestamp': 1571611968,
|
||||||
|
'upload_date': '20191020',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
video = self._download_json(
|
||||||
|
'https://rumble.com/embedJS/', video_id,
|
||||||
|
query={'request': 'video', 'v': video_id})
|
||||||
|
title = video['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for height, ua in (video.get('ua') or {}).items():
|
||||||
|
for i in range(2):
|
||||||
|
f_url = try_get(ua, lambda x: x[i], compat_str)
|
||||||
|
if f_url:
|
||||||
|
ext = determine_ext(f_url)
|
||||||
|
f = {
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': '%s-%sp' % (ext, height),
|
||||||
|
'height': int_or_none(height),
|
||||||
|
'url': f_url,
|
||||||
|
}
|
||||||
|
bitrate = try_get(ua, lambda x: x[i + 2]['bitrate'])
|
||||||
|
if bitrate:
|
||||||
|
f['tbr'] = int_or_none(bitrate)
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
author = video.get('author') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': video.get('i'),
|
||||||
|
'timestamp': parse_iso8601(video.get('pubDate')),
|
||||||
|
'channel': author.get('name'),
|
||||||
|
'channel_url': author.get('url'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
}
|
|
@ -1,9 +1,15 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
urlencode_postdata,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ServusIE(InfoExtractor):
|
class ServusIE(InfoExtractor):
|
||||||
|
@ -12,20 +18,29 @@ class ServusIE(InfoExtractor):
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
|
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
|
||||||
servustv\.com/videos
|
(?:servustv|pm-wissen)\.com/videos
|
||||||
)
|
)
|
||||||
/(?P<id>[aA]{2}-\w+|\d+-\d+)
|
/(?P<id>[aA]{2}-\w+|\d+-\d+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# new URL schema
|
# new URL schema
|
||||||
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
|
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
|
||||||
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
|
'md5': '60474d4c21f3eb148838f215c37f02b9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'AA-1T6VBU5PW1W12',
|
'id': 'AA-1T6VBU5PW1W12',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Die Grünen aus Sicht des Volkes',
|
'title': 'Die Grünen aus Sicht des Volkes',
|
||||||
|
'alt_title': 'Talk im Hangar-7 Voxpops Gruene',
|
||||||
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 62.442,
|
||||||
|
'timestamp': 1605193976,
|
||||||
|
'upload_date': '20201112',
|
||||||
|
'series': 'Talk im Hangar-7',
|
||||||
|
'season': 'Season 9',
|
||||||
|
'season_number': 9,
|
||||||
|
'episode': 'Episode 31 - September 14',
|
||||||
|
'episode_number': 31,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# old URL schema
|
# old URL schema
|
||||||
|
@ -40,30 +55,94 @@ class ServusIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
|
'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url).upper()
|
video_id = self._match_id(url).upper()
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
title = self._search_regex(
|
token = self._download_json(
|
||||||
(r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
'https://auth.redbullmediahouse.com/token', video_id,
|
||||||
r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'),
|
'Downloading token', data=urlencode_postdata({
|
||||||
webpage, 'title', default=None,
|
'grant_type': 'client_credentials',
|
||||||
group='title') or self._og_search_title(webpage)
|
}), headers={
|
||||||
title = re.sub(r'\s*-\s*Servus TV\s*$', '', title)
|
'Authorization': 'Basic SVgtMjJYNEhBNFdEM1cxMTpEdDRVSkFLd2ZOMG5IMjB1NGFBWTBmUFpDNlpoQ1EzNA==',
|
||||||
description = self._og_search_description(webpage)
|
})
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
access_token = token['access_token']
|
||||||
|
token_type = token.get('token_type', 'Bearer')
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
video = self._download_json(
|
||||||
'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
|
'https://sparkle-api.liiift.io/api/v1/stv/channels/international/assets/%s' % video_id,
|
||||||
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
video_id, 'Downloading video JSON', headers={
|
||||||
|
'Authorization': '%s %s' % (token_type, access_token),
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
thumbnail = None
|
||||||
|
for resource in video['resources']:
|
||||||
|
if not isinstance(resource, dict):
|
||||||
|
continue
|
||||||
|
format_url = url_or_none(resource.get('url'))
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
extension = resource.get('extension')
|
||||||
|
type_ = resource.get('type')
|
||||||
|
if extension == 'jpg' or type_ == 'reference_keyframe':
|
||||||
|
thumbnail = format_url
|
||||||
|
continue
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if type_ == 'dash' or ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
elif type_ == 'hls' or ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif extension == 'mp4' or ext == 'mp4':
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': type_,
|
||||||
|
'width': int_or_none(resource.get('width')),
|
||||||
|
'height': int_or_none(resource.get('height')),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
attrs = {}
|
||||||
|
for attribute in video['attributes']:
|
||||||
|
if not isinstance(attribute, dict):
|
||||||
|
continue
|
||||||
|
key = attribute.get('fieldKey')
|
||||||
|
value = attribute.get('fieldValue')
|
||||||
|
if not key or not value:
|
||||||
|
continue
|
||||||
|
attrs[key] = value
|
||||||
|
|
||||||
|
title = attrs.get('title_stv') or video_id
|
||||||
|
alt_title = attrs.get('title')
|
||||||
|
description = attrs.get('long_description') or attrs.get('short_description')
|
||||||
|
series = attrs.get('label')
|
||||||
|
season = attrs.get('season')
|
||||||
|
episode = attrs.get('chapter')
|
||||||
|
duration = float_or_none(attrs.get('duration'), scale=1000)
|
||||||
|
season_number = int_or_none(self._search_regex(
|
||||||
|
r'Season (\d+)', season or '', 'season number', default=None))
|
||||||
|
episode_number = int_or_none(self._search_regex(
|
||||||
|
r'Episode (\d+)', episode or '', 'episode number', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'alt_title': alt_title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': unified_timestamp(video.get('lastPublished')),
|
||||||
|
'series': series,
|
||||||
|
'season': season,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': episode_number,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,239 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItPlayerIE(InfoExtractor):
|
||||||
|
IE_NAME = 'player.sky.it'
|
||||||
|
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
_DOMAIN = 'sky'
|
||||||
|
_PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
|
||||||
|
# http://static.sky.it/static/skyplayer/conf.json
|
||||||
|
_TOKEN_MAP = {
|
||||||
|
'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
|
||||||
|
'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
|
||||||
|
'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
|
||||||
|
'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
|
||||||
|
'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
|
||||||
|
'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
|
||||||
|
'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3',
|
||||||
|
'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
|
||||||
|
'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _player_url_result(self, video_id):
|
||||||
|
return self.url_result(
|
||||||
|
self._PLAYER_TMPL % (video_id, self._DOMAIN),
|
||||||
|
SkyItPlayerIE.ie_key(), video_id)
|
||||||
|
|
||||||
|
def _parse_video(self, video, video_id):
|
||||||
|
title = video['title']
|
||||||
|
is_live = video.get('type') == 'live'
|
||||||
|
hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
|
||||||
|
if not hls_url and video.get('geoblock' if is_live else 'geob'):
|
||||||
|
self.raise_geo_restricted(countries=['IT'])
|
||||||
|
|
||||||
|
if is_live:
|
||||||
|
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
|
||||||
|
else:
|
||||||
|
formats = self._extract_akamai_formats(
|
||||||
|
hls_url, video_id, {'http': 'videoplatform.sky.it'})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
|
||||||
|
'description': video.get('short_desc') or None,
|
||||||
|
'timestamp': unified_timestamp(video.get('create_date')),
|
||||||
|
'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
domain = compat_parse_qs(compat_urllib_parse_urlparse(
|
||||||
|
url).query).get('domain', [None])[0]
|
||||||
|
token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
|
||||||
|
video = self._download_json(
|
||||||
|
'https://apid.sky.it/vdp/v1/getVideoData',
|
||||||
|
video_id, query={
|
||||||
|
'caller': 'sky',
|
||||||
|
'id': video_id,
|
||||||
|
'token': token
|
||||||
|
}, headers=self.geo_verification_headers())
|
||||||
|
return self._parse_video(video, video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItVideoIE(SkyItPlayerIE):
|
||||||
|
IE_NAME = 'video.sky.it'
|
||||||
|
_VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
|
||||||
|
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '631227',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Uomo ucciso da uno squalo in Australia',
|
||||||
|
'timestamp': 1606036192,
|
||||||
|
'upload_date': '20201122',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self._player_url_result(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItVideoLiveIE(SkyItPlayerIE):
|
||||||
|
IE_NAME = 'video.sky.it:live'
|
||||||
|
_VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://video.sky.it/diretta/tg24',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||||
|
'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
asset_id = compat_str(self._parse_json(self._search_regex(
|
||||||
|
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||||
|
webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id'])
|
||||||
|
livestream = self._download_json(
|
||||||
|
'https://apid.sky.it/vdp/v1/getLivestream',
|
||||||
|
asset_id, query={'id': asset_id})
|
||||||
|
return self._parse_video(livestream, asset_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItIE(SkyItPlayerIE):
|
||||||
|
IE_NAME = 'sky.it'
|
||||||
|
_VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '631201',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Un rosso alla violenza: in campo per i diritti delle donne',
|
||||||
|
'upload_date': '20201121',
|
||||||
|
'timestamp': 1605995753,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
|
||||||
|
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '631227',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Uomo ucciso da uno squalo in Australia',
|
||||||
|
'timestamp': 1606036192,
|
||||||
|
'upload_date': '20201122',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
self._VIDEO_ID_REGEX, webpage, 'video id')
|
||||||
|
return self._player_url_result(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItAcademyIE(SkyItIE):
|
||||||
|
IE_NAME = 'skyacademy.it'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/',
|
||||||
|
'md5': 'ced5c26638b7863190cbc44dd6f6ba08',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '523458',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sky Academy "The Best CineCamp 2019"',
|
||||||
|
'timestamp': 1562843784,
|
||||||
|
'upload_date': '20190711',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'skyacademy'
|
||||||
|
_VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"'
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItArteIE(SkyItIE):
|
||||||
|
IE_NAME = 'arte.sky.it'
|
||||||
|
_VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/',
|
||||||
|
'md5': '515aee97b87d7a018b6c80727d3e7e17',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '627926',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani",
|
||||||
|
'upload_date': '20201106',
|
||||||
|
'timestamp': 1604664493,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'skyarte'
|
||||||
|
_VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
|
||||||
|
|
||||||
|
|
||||||
|
class CieloTVItIE(SkyItIE):
|
||||||
|
IE_NAME = 'cielotv.it'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
|
||||||
|
'md5': 'c4deed77552ba901c2a0d9258320304b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '499240',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Il lunedì è sempre un dramma',
|
||||||
|
'upload_date': '20190329',
|
||||||
|
'timestamp': 1553862178,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'cielo'
|
||||||
|
_VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
|
||||||
|
|
||||||
|
|
||||||
|
class TV8ItIE(SkyItVideoIE):
|
||||||
|
IE_NAME = 'tv8.it'
|
||||||
|
_VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/',
|
||||||
|
'md5': '9ab906a3f75ea342ed928442f9dabd21',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '630529',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
|
||||||
|
'timestamp': 1605721374,
|
||||||
|
'upload_date': '20201118',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'mtv8'
|
|
@ -1,119 +0,0 @@
|
||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import ExtractorError
|
|
||||||
|
|
||||||
|
|
||||||
class SkyItaliaBaseIE(InfoExtractor):
|
|
||||||
_GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}'
|
|
||||||
_RES = {
|
|
||||||
'low': [426, 240],
|
|
||||||
'med': [640, 360],
|
|
||||||
'high': [854, 480],
|
|
||||||
'hd': [1280, 720]
|
|
||||||
}
|
|
||||||
|
|
||||||
def _extract_video_id(self, url):
|
|
||||||
webpage = self._download_webpage(url, 'skyitalia')
|
|
||||||
video_id = self._html_search_regex(
|
|
||||||
[r'data-videoid=\"(\d+)\"',
|
|
||||||
r'http://player\.sky\.it/social\?id=(\d+)\&'],
|
|
||||||
webpage, 'video_id')
|
|
||||||
if video_id:
|
|
||||||
return video_id
|
|
||||||
raise ExtractorError('Video ID not found.')
|
|
||||||
|
|
||||||
def _get_formats(self, video_id, token):
|
|
||||||
data_url = self._GET_VIDEO_DATA.replace('{id}', video_id)
|
|
||||||
data_url = data_url.replace('{token}', token)
|
|
||||||
video_data = self._parse_json(
|
|
||||||
self._download_webpage(data_url, video_id),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for q, r in self._RES.items():
|
|
||||||
key = 'web_%s_url' % q
|
|
||||||
if key not in video_data:
|
|
||||||
continue
|
|
||||||
formats.append({
|
|
||||||
'url': video_data.get(key),
|
|
||||||
'format_id': q,
|
|
||||||
'width': r[0],
|
|
||||||
'height': r[1]
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
title = video_data.get('title')
|
|
||||||
thumb = video_data.get('thumb')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumb,
|
|
||||||
'formats': formats
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
if video_id == 'None':
|
|
||||||
video_id = self._extract_video_id(url)
|
|
||||||
return self._get_formats(video_id, self._TOKEN)
|
|
||||||
|
|
||||||
|
|
||||||
class SkyItaliaIE(SkyItaliaBaseIE):
|
|
||||||
IE_NAME = 'sky.it'
|
|
||||||
_VALID_URL = r'''(?x)https?://
|
|
||||||
(?P<ie>sport|tg24|video)
|
|
||||||
\.sky\.it/(?:.+?)
|
|
||||||
(?P<id>[0-9]{6})?
|
|
||||||
(?:$|\?)'''
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162',
|
|
||||||
'md5': '9c03b590b06e5952d8051f0e02b0feca',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '616162',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
|
|
||||||
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta',
|
|
||||||
'md5': '9c03b590b06e5952d8051f0e02b0feca',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '616162',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
|
|
||||||
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi',
|
|
||||||
'md5': 'caa25e62dadb529bc5e0b078da99f854',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '615904',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti',
|
|
||||||
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
_TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk'
|
|
||||||
|
|
||||||
|
|
||||||
class SkyArteItaliaIE(SkyItaliaBaseIE):
|
|
||||||
IE_NAME = 'arte.sky.it'
|
|
||||||
_VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P<id>[0-9]{6})?$'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/',
|
|
||||||
'md5': '2f22513a89f45142f2746f878d690647',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '612888',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'I maestri del cinema Federico Felini',
|
|
||||||
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd'
|
|
|
@ -649,7 +649,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
||||||
|
|
||||||
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
|
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
|
||||||
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
||||||
# Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200.
|
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
|
||||||
# https://developers.soundcloud.com/blog/offset-pagination-deprecated
|
# https://developers.soundcloud.com/blog/offset-pagination-deprecated
|
||||||
COMMON_QUERY = {
|
COMMON_QUERY = {
|
||||||
'limit': 200,
|
'limit': 200,
|
||||||
|
|
|
@ -44,7 +44,7 @@ class SouthParkEsIE(SouthParkIE):
|
||||||
|
|
||||||
class SouthParkDeIE(SouthParkIE):
|
class SouthParkDeIE(SouthParkIE):
|
||||||
IE_NAME = 'southpark.de'
|
IE_NAME = 'southpark.de'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:videoclip|collections|folgen)/(?P<id>(?P<unique_id>.+?)/.+?)(?:\?|#|$))'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:(en/(videoclip|collections|episodes))|(videoclip|collections|folgen))/(?P<id>(?P<unique_id>.+?)/.+?)(?:\?|#|$))'
|
||||||
# _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
# _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
|
|
@ -1,159 +1,54 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .nexx import (
|
from .jwplatform import JWPlatformIE
|
||||||
NexxIE,
|
|
||||||
NexxEmbedIE,
|
|
||||||
)
|
|
||||||
from .spiegeltv import SpiegeltvIE
|
|
||||||
from ..compat import compat_urlparse
|
|
||||||
from ..utils import (
|
|
||||||
parse_duration,
|
|
||||||
strip_or_none,
|
|
||||||
unified_timestamp,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SpiegelIE(InfoExtractor):
|
class SpiegelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
|
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' % _UUID_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||||
'md5': 'b57399839d055fccfeb9a0455c439868',
|
'md5': '50c7948883ec85a3e431a0a44b7ad1d6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '563747',
|
'id': 'II0BUyxY',
|
||||||
|
'display_id': '1259285',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
|
'title': 'Vulkan Tungurahua in Ecuador ist wieder aktiv - DER SPIEGEL - Wissenschaft',
|
||||||
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
|
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
|
||||||
'duration': 49,
|
'duration': 48.0,
|
||||||
'upload_date': '20130311',
|
'upload_date': '20130311',
|
||||||
'timestamp': 1362994320,
|
'timestamp': 1362997920,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||||
'md5': '5b6c2f4add9d62912ed5fc78a1faed80',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '580988',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
|
|
||||||
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
|
|
||||||
'duration': 983,
|
|
||||||
'upload_date': '20131115',
|
|
||||||
'timestamp': 1384546642,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
|
|
||||||
'md5': '97b91083a672d72976faa8433430afb9',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '601883',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
|
|
||||||
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
|
|
||||||
'upload_date': '20140904',
|
|
||||||
'timestamp': 1409834160,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# nexx video
|
'url': 'https://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.spiegel.de/panorama/urteile-im-goldmuenzenprozess-haftstrafen-fuer-clanmitglieder-a-aae8df48-43c1-4c61-867d-23f0a2d254b7',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
|
'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}, {
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
metadata_url = 'http://www.spiegel.de/video/metadata/video-%s.json' % video_id
|
|
||||||
handle = self._request_webpage(metadata_url, video_id)
|
|
||||||
|
|
||||||
# 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
|
|
||||||
if SpiegeltvIE.suitable(handle.geturl()):
|
|
||||||
return self.url_result(handle.geturl(), 'Spiegeltv')
|
|
||||||
|
|
||||||
video_data = self._parse_json(self._webpage_read_content(
|
|
||||||
handle, metadata_url, video_id), video_id)
|
|
||||||
title = video_data['title']
|
|
||||||
nexx_id = video_data['nexxOmniaId']
|
|
||||||
domain_id = video_data.get('nexxOmniaDomain') or '748'
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
|
||||||
'url': 'nexx:%s:%s' % (domain_id, nexx_id),
|
|
||||||
'title': title,
|
|
||||||
'description': strip_or_none(video_data.get('teaser')),
|
|
||||||
'duration': parse_duration(video_data.get('duration')),
|
|
||||||
'timestamp': unified_timestamp(video_data.get('datum')),
|
|
||||||
'ie_key': NexxIE.ie_key(),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class SpiegelArticleIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
|
|
||||||
IE_NAME = 'Spiegel:Article'
|
|
||||||
IE_DESC = 'Articles on spiegel.de'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
|
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': '1516455',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
|
|
||||||
'description': 're:^Patrick Kämnitz gehört.{100,}',
|
|
||||||
'upload_date': '20140825',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
|
|
||||||
'info_dict': {
|
|
||||||
|
|
||||||
},
|
|
||||||
'playlist_count': 6,
|
|
||||||
}, {
|
|
||||||
# Nexx iFrame embed
|
|
||||||
'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '161464',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Nervenkitzel Achterbahn',
|
|
||||||
'alt_title': 'Karussellbauer in Deutschland',
|
|
||||||
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
|
|
||||||
'release_year': 2005,
|
|
||||||
'creator': 'SPIEGEL TV',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 2761,
|
|
||||||
'timestamp': 1394021479,
|
|
||||||
'upload_date': '20140305',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'format': 'bestvideo',
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
media_id = self._html_search_regex(
|
||||||
# Single video on top of the page
|
r'("|["\'])mediaId\1\s*:\s*("|["\'])(?P<id>(?:(?!\2).)+)\2',
|
||||||
video_link = self._search_regex(
|
webpage, 'media id', group='id')
|
||||||
r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
|
return {
|
||||||
'video page URL', default=None)
|
'_type': 'url_transparent',
|
||||||
if video_link:
|
'id': video_id,
|
||||||
video_url = compat_urlparse.urljoin(
|
'display_id': video_id,
|
||||||
self.http_scheme() + '//spiegel.de/', video_link)
|
'url': 'jwplatform:%s' % media_id,
|
||||||
return self.url_result(video_url)
|
'title': self._og_search_title(webpage, default=None),
|
||||||
|
'ie_key': JWPlatformIE.ie_key(),
|
||||||
# Multiple embedded videos
|
}
|
||||||
embeds = re.findall(
|
|
||||||
r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
|
|
||||||
webpage)
|
|
||||||
entries = [
|
|
||||||
self.url_result(compat_urlparse.urljoin(
|
|
||||||
self.http_scheme() + '//spiegel.de/', embed_path))
|
|
||||||
for embed_path in embeds]
|
|
||||||
if embeds:
|
|
||||||
return self.playlist_result(entries)
|
|
||||||
|
|
||||||
return self.playlist_from_matches(
|
|
||||||
NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key())
|
|
||||||
|
|
|
@ -0,0 +1,176 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_episode(data, episode_id=None):
|
||||||
|
title = data['title']
|
||||||
|
download_url = data['download_url']
|
||||||
|
|
||||||
|
series = try_get(data, lambda x: x['show']['title'], compat_str)
|
||||||
|
uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for image in ('image_original', 'image_medium', 'image'):
|
||||||
|
image_url = url_or_none(data.get('%s_url' % image))
|
||||||
|
if image_url:
|
||||||
|
thumbnails.append({'url': image_url})
|
||||||
|
|
||||||
|
def stats(key):
|
||||||
|
return int_or_none(try_get(
|
||||||
|
data,
|
||||||
|
(lambda x: x['%ss_count' % key],
|
||||||
|
lambda x: x['stats']['%ss' % key])))
|
||||||
|
|
||||||
|
def duration(key):
|
||||||
|
return float_or_none(data.get(key), scale=1000)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': compat_str(episode_id or data['episode_id']),
|
||||||
|
'url': download_url,
|
||||||
|
'display_id': data.get('permalink'),
|
||||||
|
'title': title,
|
||||||
|
'description': data.get('description'),
|
||||||
|
'timestamp': unified_timestamp(data.get('published_at')),
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': str_or_none(data.get('author_id')),
|
||||||
|
'creator': uploader,
|
||||||
|
'duration': duration('duration') or duration('length'),
|
||||||
|
'view_count': stats('play'),
|
||||||
|
'like_count': stats('like'),
|
||||||
|
'comment_count': stats('message'),
|
||||||
|
'format': 'MPEG Layer 3',
|
||||||
|
'format_id': 'mp3',
|
||||||
|
'container': 'mp3',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'series': series,
|
||||||
|
'extractor_key': SpreakerIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SpreakerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
api\.spreaker\.com/
|
||||||
|
(?:
|
||||||
|
(?:download/)?episode|
|
||||||
|
v2/episodes
|
||||||
|
)/
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://api.spreaker.com/episode/12534508',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12534508',
|
||||||
|
'display_id': 'swm-ep15-how-to-market-your-music-part-2',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'EP:15 | Music Marketing (Likes) - Part 2',
|
||||||
|
'description': 'md5:0588c43e27be46423e183076fa071177',
|
||||||
|
'timestamp': 1502250336,
|
||||||
|
'upload_date': '20170809',
|
||||||
|
'uploader': 'SWM',
|
||||||
|
'uploader_id': '9780658',
|
||||||
|
'duration': 1063.42,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'series': 'Success With Music (SWM)',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode_id = self._match_id(url)
|
||||||
|
data = self._download_json(
|
||||||
|
'https://api.spreaker.com/v2/episodes/%s' % episode_id,
|
||||||
|
episode_id)['response']['episode']
|
||||||
|
return _extract_episode(data, episode_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SpreakerPageIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
episode_id = self._search_regex(
|
||||||
|
(r'data-episode_id=["\'](?P<id>\d+)',
|
||||||
|
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
|
||||||
|
return self.url_result(
|
||||||
|
'https://api.spreaker.com/episode/%s' % episode_id,
|
||||||
|
ie=SpreakerIE.ie_key(), video_id=episode_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SpreakerShowIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://api.spreaker.com/show/4652058',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4652058',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 118,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _entries(self, show_id):
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
episodes = self._download_json(
|
||||||
|
'https://api.spreaker.com/show/%s/episodes' % show_id,
|
||||||
|
show_id, note='Downloading JSON page %d' % page_num, query={
|
||||||
|
'page': page_num,
|
||||||
|
'max_per_page': 100,
|
||||||
|
})
|
||||||
|
pager = try_get(episodes, lambda x: x['response']['pager'], dict)
|
||||||
|
if not pager:
|
||||||
|
break
|
||||||
|
results = pager.get('results')
|
||||||
|
if not results or not isinstance(results, list):
|
||||||
|
break
|
||||||
|
for result in results:
|
||||||
|
if not isinstance(result, dict):
|
||||||
|
continue
|
||||||
|
yield _extract_episode(result)
|
||||||
|
if page_num == pager.get('last_page'):
|
||||||
|
break
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
show_id = self._match_id(url)
|
||||||
|
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SpreakerShowPageIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.spreaker.com/show/success-with-music',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
show_id = self._search_regex(
|
||||||
|
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
|
||||||
|
return self.url_result(
|
||||||
|
'https://api.spreaker.com/show/%s' % show_id,
|
||||||
|
ie=SpreakerShowIE.ie_key(), video_id=show_id)
|
|
@ -9,6 +9,7 @@ from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -44,7 +45,8 @@ class SVTBaseIE(InfoExtractor):
|
||||||
'format_id': player_type,
|
'format_id': player_type,
|
||||||
'url': vurl,
|
'url': vurl,
|
||||||
})
|
})
|
||||||
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
|
rights = try_get(video_info, lambda x: x['rights'], dict) or {}
|
||||||
|
if not formats and rights.get('geoBlockedSweden'):
|
||||||
self.raise_geo_restricted(
|
self.raise_geo_restricted(
|
||||||
'This video is only available in Sweden',
|
'This video is only available in Sweden',
|
||||||
countries=self._GEO_COUNTRIES)
|
countries=self._GEO_COUNTRIES)
|
||||||
|
@ -70,6 +72,7 @@ class SVTBaseIE(InfoExtractor):
|
||||||
episode = video_info.get('episodeTitle')
|
episode = video_info.get('episodeTitle')
|
||||||
episode_number = int_or_none(video_info.get('episodeNumber'))
|
episode_number = int_or_none(video_info.get('episodeNumber'))
|
||||||
|
|
||||||
|
timestamp = unified_timestamp(rights.get('validFrom'))
|
||||||
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
|
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
|
||||||
age_limit = None
|
age_limit = None
|
||||||
adult = dict_get(
|
adult = dict_get(
|
||||||
|
@ -84,6 +87,7 @@ class SVTBaseIE(InfoExtractor):
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'series': series,
|
'series': series,
|
||||||
'season_number': season_number,
|
'season_number': season_number,
|
||||||
|
@ -136,26 +140,39 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
svt:(?P<svt_id>[^/?#&]+)|
|
(?:
|
||||||
|
svt:|
|
||||||
|
https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
|
||||||
|
)
|
||||||
|
(?P<svt_id>[^/?#&]+)|
|
||||||
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
|
'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
|
||||||
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
|
'md5': '2382036fd6f8c994856c323fe51c426e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5996901',
|
'id': 'jNwpV9P',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Flygplan till Haile Selassie',
|
'title': 'Det här är himlen',
|
||||||
'duration': 3527,
|
'timestamp': 1586044800,
|
||||||
'thumbnail': r're:^https?://.*[\.-]jpg$',
|
'upload_date': '20200405',
|
||||||
|
'duration': 3515,
|
||||||
|
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'subtitles': {
|
'subtitles': {
|
||||||
'sv': [{
|
'sv': [{
|
||||||
'ext': 'wsrt',
|
'ext': 'vtt',
|
||||||
}]
|
}]
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
# skip for now due to download test asserts that segment is > 10000 bytes and svt uses
|
||||||
|
# init segments that are smaller
|
||||||
|
# AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# geo restricted to Sweden
|
# geo restricted to Sweden
|
||||||
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
||||||
|
@ -172,6 +189,12 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'svt:14278044',
|
'url': 'svt:14278044',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'svt:eWv5MLX',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _adjust_title(self, info):
|
def _adjust_title(self, info):
|
||||||
|
@ -236,7 +259,10 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||||
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
|
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
return self._extract_by_video_id(svt_id, webpage)
|
info_dict = self._extract_by_video_id(svt_id, webpage)
|
||||||
|
info_dict['thumbnail'] = thumbnail
|
||||||
|
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
|
||||||
class SVTSeriesIE(SVTPlayBaseIE):
|
class SVTSeriesIE(SVTPlayBaseIE):
|
||||||
|
@ -360,7 +386,7 @@ class SVTPageIE(InfoExtractor):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path, display_id = re.match(self._VALID_URL, url).groups()
|
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
|
@ -86,7 +86,7 @@ class TagesschauPlayerIE(InfoExtractor):
|
||||||
# return self._extract_via_api(kind, video_id)
|
# return self._extract_via_api(kind, video_id)
|
||||||
|
|
||||||
# JSON api does not provide some audio formats (e.g. ogg) thus
|
# JSON api does not provide some audio formats (e.g. ogg) thus
|
||||||
# extractiong audio via webpage
|
# extracting audio via webpage
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
|
|
@ -208,7 +208,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||||
if m:
|
if m:
|
||||||
return [m.group('url')]
|
return [m.group('url')]
|
||||||
|
|
||||||
# Are whitesapces ignored in URLs?
|
# Are whitespaces ignored in URLs?
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/12044
|
# https://github.com/ytdl-org/youtube-dl/issues/12044
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
||||||
|
|
|
@ -0,0 +1,97 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class ThisVidIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+/?)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://thisvid.com/videos/french-boy-pantsed/',
|
||||||
|
'md5': '3397979512c682f6b85b3b04989df224',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2400174',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'French Boy Pantsed',
|
||||||
|
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://thisvid.com/embed/2400174/',
|
||||||
|
'md5': '3397979512c682f6b85b3b04989df224',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2400174',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'French Boy Pantsed',
|
||||||
|
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
main_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, main_id)
|
||||||
|
|
||||||
|
# URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future.
|
||||||
|
kvs_version = self._html_search_regex(r'<script [^>]+?src="https://thisvid\.com/player/kt_player\.js\?v=(\d+(\.\d+)+)">', webpage, 'kvs_version', fatal=False)
|
||||||
|
if not kvs_version.startswith("5."):
|
||||||
|
self.report_warning("Major version change (" + kvs_version + ") in player engine--Download may fail.")
|
||||||
|
|
||||||
|
title = self._html_search_regex(r'<title>(?:Video: )?(.+?)(?: - (?:\w+ porn at )?ThisVid(?:.com| tube))?</title>', webpage, 'title')
|
||||||
|
# video_id, video_url and license_code from the 'flashvars' JSON object:
|
||||||
|
video_id = self._html_search_regex(r"video_id: '([0-9]+)',", webpage, 'video_id')
|
||||||
|
video_url = self._html_search_regex(r"video_url: '(function/0/.+?)',", webpage, 'video_url')
|
||||||
|
license_code = self._html_search_regex(r"license_code: '([0-9$]{16})',", webpage, 'license_code')
|
||||||
|
thumbnail = self._html_search_regex(r"preview_url: '((?:https?:)?//media.thisvid.com/.+?.jpg)',", webpage, 'thumbnail', fatal=False)
|
||||||
|
if thumbnail.startswith("//"):
|
||||||
|
thumbnail = "https:" + thumbnail
|
||||||
|
if (re.match(self._VALID_URL, url).group('type') == "videos"):
|
||||||
|
display_id = main_id
|
||||||
|
else:
|
||||||
|
display_id = self._search_regex(r'<link rel="canonical" href="' + self._VALID_URL + r'">', webpage, 'display_id', fatal=False),
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'url': getrealurl(video_url, license_code),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def getrealurl(video_url, license_code):
|
||||||
|
urlparts = video_url.split('/')[2:]
|
||||||
|
license = getlicensetoken(license_code)
|
||||||
|
newmagic = urlparts[5][:32]
|
||||||
|
|
||||||
|
for o in range(len(newmagic) - 1, -1, -1):
|
||||||
|
new = ""
|
||||||
|
l = (o + sum([int(n) for n in license[o:]])) % 32
|
||||||
|
|
||||||
|
for i in range(0, len(newmagic)):
|
||||||
|
if i == o:
|
||||||
|
new += newmagic[l]
|
||||||
|
elif i == l:
|
||||||
|
new += newmagic[o]
|
||||||
|
else:
|
||||||
|
new += newmagic[i]
|
||||||
|
newmagic = new
|
||||||
|
|
||||||
|
urlparts[5] = newmagic + urlparts[5][32:]
|
||||||
|
return "/".join(urlparts)
|
||||||
|
|
||||||
|
|
||||||
|
def getlicensetoken(license):
|
||||||
|
modlicense = license.replace("$", "").replace("0", "1")
|
||||||
|
center = int(len(modlicense) / 2)
|
||||||
|
fronthalf = int(modlicense[:center + 1])
|
||||||
|
backhalf = int(modlicense[center:])
|
||||||
|
|
||||||
|
modlicense = str(4 * abs(fronthalf - backhalf))
|
||||||
|
retval = ""
|
||||||
|
for o in range(0, center + 1):
|
||||||
|
for i in range(1, 5):
|
||||||
|
retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
|
||||||
|
return retval
|
|
@ -1,56 +1,157 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
get_element_by_attribute,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TMZIE(InfoExtractor):
|
class TMZIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#]+)'
|
_VALID_URL = r"https?://(?:www\.)?tmz\.com/.*"
|
||||||
_TESTS = [{
|
_TESTS = [
|
||||||
'url': 'http://www.tmz.com/videos/0_okj015ty/',
|
{
|
||||||
'md5': '4d22a51ef205b6c06395d8394f72d560',
|
"url": "http://www.tmz.com/videos/0-cegprt2p/",
|
||||||
'info_dict': {
|
"info_dict": {
|
||||||
'id': '0_okj015ty',
|
"id": "http://www.tmz.com/videos/0-cegprt2p/",
|
||||||
'ext': 'mp4',
|
"ext": "mp4",
|
||||||
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
|
"title": "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
|
||||||
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
|
"description": "Harvey talks about Director Comey’s decision not to prosecute Hillary Clinton.",
|
||||||
'timestamp': 1394747163,
|
"timestamp": 1467831837,
|
||||||
'uploader_id': 'batchUser',
|
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
|
||||||
'upload_date': '20140313',
|
"upload_date": "20160706",
|
||||||
}
|
},
|
||||||
}, {
|
},
|
||||||
'url': 'http://www.tmz.com/videos/0-cegprt2p/',
|
{
|
||||||
'only_matching': True,
|
"url": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
|
||||||
}]
|
"info_dict": {
|
||||||
|
"id": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "Angry Bagel Shop Guy Says He Doesn't Trust Women",
|
||||||
|
"description": "The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it's women's fault in the first place.",
|
||||||
|
"timestamp": 1562889485,
|
||||||
|
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
|
||||||
|
"upload_date": "20190711",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
|
||||||
|
"md5": "5429c85db8bde39a473a56ca8c4c5602",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "Bobby Brown Tells Crowd ... Bobbi Kristina is Awake",
|
||||||
|
"description": 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
|
||||||
|
"timestamp": 1429467813,
|
||||||
|
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
|
||||||
|
"upload_date": "20150419",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "Patti LaBelle -- Goes Nuclear On Stripping Fan",
|
||||||
|
"description": "Patti LaBelle made it known loud and clear last night ... NO "
|
||||||
|
"ONE gets on her stage and strips down.",
|
||||||
|
"timestamp": 1442683746,
|
||||||
|
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
|
||||||
|
"upload_date": "20150919",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This",
|
||||||
|
"description": "Two pretty parts of this video with NBA Commish Adam Silver.",
|
||||||
|
"timestamp": 1454010989,
|
||||||
|
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
|
||||||
|
"upload_date": "20160128",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!",
|
||||||
|
"description": "James Otis is the the guy who took a pickaxe to Donald Trump's star on the Walk of Fame, and he tells TMZ .. he's ready and willing to go to jail for the crime.",
|
||||||
|
"timestamp": 1477500095,
|
||||||
|
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
|
||||||
|
"upload_date": "20161026",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "Cops Use Billy Clubs Against Pro-Trump and Anti-Fascist "
|
||||||
|
"Demonstrators",
|
||||||
|
"description": "Beverly Hills may be an omen of what's coming next week, "
|
||||||
|
"because things got crazy on the streets and cops started "
|
||||||
|
"swinging their billy clubs at both Anti-Fascist and Pro-Trump "
|
||||||
|
"demonstrators.",
|
||||||
|
"timestamp": 1604182772,
|
||||||
|
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
|
||||||
|
"upload_date": "20201031",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://www.tmz.com/2020/11/05/gervonta-davis-car-crash-hit-and-run-police/",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "Dddb6IGe-ws",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing",
|
||||||
|
"uploader": "ESNEWS",
|
||||||
|
"description": "md5:49675bc58883ccf80474b8aa701e1064",
|
||||||
|
"upload_date": "20201101",
|
||||||
|
"uploader_id": "ESNEWS",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://www.tmz.com/2020/11/19/conor-mcgregor-dustin-poirier-contract-fight-ufc-257-fight-island/",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "1329450007125225473",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "TheMacLife - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.",
|
||||||
|
"uploader": "TheMacLife",
|
||||||
|
"description": "md5:56e6009bbc3d12498e10d08a8e1f1c69",
|
||||||
|
"upload_date": "20201119",
|
||||||
|
"uploader_id": "Maclifeofficial",
|
||||||
|
"timestamp": 1605800556,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url).replace('-', '_')
|
webpage = self._download_webpage(url, url)
|
||||||
return self.url_result('kaltura:591531:%s' % video_id, 'Kaltura', video_id)
|
jsonld = self._search_json_ld(webpage, url)
|
||||||
|
if not jsonld or "url" not in jsonld:
|
||||||
|
# try to extract from YouTube Player API
|
||||||
class TMZArticleIE(InfoExtractor):
|
# see https://developers.google.com/youtube/iframe_api_reference#Video_Queueing_Functions
|
||||||
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
|
match_obj = re.search(r'\.cueVideoById\(\s*(?P<quote>[\'"])(?P<id>.*?)(?P=quote)', webpage)
|
||||||
_TEST = {
|
if match_obj:
|
||||||
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
|
res = self.url_result(match_obj.group("id"))
|
||||||
'md5': '3316ff838ae5bb7f642537825e1e90d2',
|
return res
|
||||||
'info_dict': {
|
# try to extract from twitter
|
||||||
'id': '0_6snoelag',
|
blockquote_el = get_element_by_attribute("class", "twitter-tweet", webpage)
|
||||||
'ext': 'mov',
|
if blockquote_el:
|
||||||
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
|
matches = re.findall(
|
||||||
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
|
r'<a[^>]+href=\s*(?P<quote>[\'"])(?P<link>.*?)(?P=quote)',
|
||||||
'timestamp': 1429467813,
|
blockquote_el)
|
||||||
'upload_date': '20150419',
|
if matches:
|
||||||
'uploader_id': 'batchUser',
|
for _, match in matches:
|
||||||
}
|
if "/status/" in match:
|
||||||
}
|
res = self.url_result(match)
|
||||||
|
return res
|
||||||
def _real_extract(self, url):
|
raise ExtractorError("No video found!")
|
||||||
video_id = self._match_id(url)
|
if id not in jsonld:
|
||||||
|
jsonld["id"] = url
|
||||||
webpage = self._download_webpage(url, video_id)
|
return jsonld
|
||||||
embedded_video_info = self._parse_json(self._html_search_regex(
|
|
||||||
r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
return self.url_result(
|
|
||||||
'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
|
|
||||||
|
|
|
@ -56,9 +56,9 @@ class TurnerBaseIE(AdobePassIE):
|
||||||
content_id = xpath_text(video_data, 'contentId') or video_id
|
content_id = xpath_text(video_data, 'contentId') or video_id
|
||||||
# rtmp_src = xpath_text(video_data, 'akamai/src')
|
# rtmp_src = xpath_text(video_data, 'akamai/src')
|
||||||
# if rtmp_src:
|
# if rtmp_src:
|
||||||
# splited_rtmp_src = rtmp_src.split(',')
|
# split_rtmp_src = rtmp_src.split(',')
|
||||||
# if len(splited_rtmp_src) == 2:
|
# if len(split_rtmp_src) == 2:
|
||||||
# rtmp_src = splited_rtmp_src[1]
|
# rtmp_src = split_rtmp_src[1]
|
||||||
# aifp = xpath_text(video_data, 'akamai/aifp', default='')
|
# aifp = xpath_text(video_data, 'akamai/aifp', default='')
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
|
|
|
@ -8,8 +8,8 @@ from ..utils import int_or_none
|
||||||
|
|
||||||
class TwentyThreeVideoIE(InfoExtractor):
|
class TwentyThreeVideoIE(InfoExtractor):
|
||||||
IE_NAME = '23video'
|
IE_NAME = '23video'
|
||||||
_VALID_URL = r'https?://video\.(?P<domain>twentythree\.net|23video\.com|filmweb\.no)/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
|
_VALID_URL = r'https?://(?P<domain>[^.]+\.(?:twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
|
'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
|
||||||
'md5': '75fcf216303eb1dae9920d651f85ced4',
|
'md5': '75fcf216303eb1dae9920d651f85ced4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -21,11 +21,14 @@ class TwentyThreeVideoIE(InfoExtractor):
|
||||||
'uploader_id': '12258964',
|
'uploader_id': '12258964',
|
||||||
'uploader': 'Rasmus Bysted',
|
'uploader': 'Rasmus Bysted',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, query, photo_id = re.match(self._VALID_URL, url).groups()
|
domain, query, photo_id = re.match(self._VALID_URL, url).groups()
|
||||||
base_url = 'https://video.%s' % domain
|
base_url = 'https://%s' % domain
|
||||||
photo_data = self._download_json(
|
photo_data = self._download_json(
|
||||||
base_url + '/api/photo/list?' + query, photo_id, query={
|
base_url + '/api/photo/list?' + query, photo_id, query={
|
||||||
'format': 'json',
|
'format': 'json',
|
||||||
|
|
|
@ -2,8 +2,11 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_timestamp
|
from ..utils import (
|
||||||
import re
|
dict_get,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class URPlayIE(InfoExtractor):
|
class URPlayIE(InfoExtractor):
|
||||||
|
@ -14,7 +17,7 @@ class URPlayIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '203704',
|
'id': '203704',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Om vetenskap, kritiskt tänkande och motstånd',
|
'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
|
||||||
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
|
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
|
||||||
'timestamp': 1513292400,
|
'timestamp': 1513292400,
|
||||||
'upload_date': '20171214',
|
'upload_date': '20171214',
|
||||||
|
@ -26,7 +29,7 @@ class URPlayIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Tripp, Trapp, Träd : Sovkudde',
|
'title': 'Tripp, Trapp, Träd : Sovkudde',
|
||||||
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
|
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
|
||||||
'timestamp': 1440093600,
|
'timestamp': 1440086400,
|
||||||
'upload_date': '20150820',
|
'upload_date': '20150820',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
@ -36,28 +39,27 @@ class URPlayIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
url = url.replace('skola.se/Produkter', 'play.se/program')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
urplayer_data = re.sub(""", "\"", self._search_regex(
|
urplayer_data = self._parse_json(self._html_search_regex(
|
||||||
r'components\/Player\/Player\" data-react-props=\"({.+?})\"',
|
r'data-react-class="components/Player/Player"[^>]+data-react-props="({.+?})"',
|
||||||
webpage, 'urplayer data'))
|
webpage, 'urplayer data'), video_id)['currentProduct']
|
||||||
urplayer_data = self._parse_json(urplayer_data, video_id)
|
episode = urplayer_data['title']
|
||||||
for i in range(len(urplayer_data['accessibleEpisodes'])):
|
|
||||||
if urplayer_data.get('accessibleEpisodes', {})[i].get('id') == int(video_id):
|
|
||||||
urplayer_data = urplayer_data['accessibleEpisodes'][i]
|
|
||||||
break
|
|
||||||
|
|
||||||
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
|
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
|
||||||
formats = []
|
formats = []
|
||||||
urplayer_streams = urplayer_data.get("streamingInfo")
|
urplayer_streams = urplayer_data.get('streamingInfo', {})
|
||||||
for quality in ('sd'), ('hd'):
|
|
||||||
location = (urplayer_streams.get("raw", {}).get(quality, {}).get("location")
|
for k, v in urplayer_streams.get('raw', {}).items():
|
||||||
or urplayer_streams.get("sweComplete", {}).get(quality, {}).get("location"))
|
if not (k in ('sd', 'hd') and isinstance(v, dict)):
|
||||||
if location:
|
continue
|
||||||
|
file_http = v.get('location')
|
||||||
|
if file_http:
|
||||||
formats.extend(self._extract_wowza_formats(
|
formats.extend(self._extract_wowza_formats(
|
||||||
'http://%s/%s/playlist.m3u8' % (host, location), video_id,
|
'http://%s/%splaylist.m3u8' % (host, file_http),
|
||||||
skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location")
|
subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location")
|
||||||
if subs:
|
if subs:
|
||||||
|
@ -65,14 +67,37 @@ class URPlayIE(InfoExtractor):
|
||||||
'url': subs,
|
'url': subs,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
image = urplayer_data.get('image') or {}
|
||||||
|
thumbnails = []
|
||||||
|
for k, v in image.items():
|
||||||
|
t = {
|
||||||
|
'id': k,
|
||||||
|
'url': v,
|
||||||
|
}
|
||||||
|
wh = k.split('x')
|
||||||
|
if len(wh) == 2:
|
||||||
|
t.update({
|
||||||
|
'width': int_or_none(wh[0]),
|
||||||
|
'height': int_or_none(wh[1]),
|
||||||
|
})
|
||||||
|
thumbnails.append(t)
|
||||||
|
|
||||||
|
series = urplayer_data.get('series') or {}
|
||||||
|
series_title = dict_get(series, ('seriesTitle', 'title')) or dict_get(urplayer_data, ('seriesTitle', 'mainTitle'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': urplayer_data['title'],
|
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
'thumbnail': urplayer_data.get('image', {}).get('1280x720'),
|
|
||||||
'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'),
|
|
||||||
webpage, 'timestamp')),
|
|
||||||
'series': urplayer_data.get('seriesTitle'),
|
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'title': '%s : %s' % (series_title, episode) if series_title else episode,
|
||||||
|
'description': urplayer_data.get('description'),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': unified_timestamp(urplayer_data.get('publishedAt')),
|
||||||
|
'series': series_title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'duration': int_or_none(urplayer_data.get('duration')),
|
||||||
|
'categories': urplayer_data.get('categories'),
|
||||||
|
'tags': urplayer_data.get('keywords'),
|
||||||
|
'season': series.get('label'),
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,74 +1,24 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .adobepass import AdobePassIE
|
from .nbc import NBCIE
|
||||||
from ..utils import (
|
|
||||||
NO_DEFAULT,
|
|
||||||
smuggle_url,
|
|
||||||
update_url_query,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class USANetworkIE(AdobePassIE):
|
class USANetworkIE(NBCIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?usanetwork\.com/(?:[^/]+/videos|movies)/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/(?:[^/]+/videos?|movies?)/(?:[^/]+/)?(?P<id>\d+))'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.usanetwork.com/mrrobot/videos/hpe-cybersecurity',
|
'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302',
|
||||||
'md5': '33c0d2ba381571b414024440d08d57fd',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3086229',
|
'id': '4185302',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'HPE Cybersecurity',
|
'title': 'Intelligence (Trailer)',
|
||||||
'description': 'The more we digitize our world, the more vulnerable we are.',
|
'description': 'A maverick NSA agent enlists the help of a junior systems analyst in a workplace power grab.',
|
||||||
'upload_date': '20160818',
|
'upload_date': '20200715',
|
||||||
'timestamp': 1471535460,
|
'timestamp': 1594785600,
|
||||||
'uploader': 'NBCU-USA',
|
'uploader': 'NBCU-MPAT',
|
||||||
},
|
},
|
||||||
}
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
def _real_extract(self, url):
|
'skip_download': True,
|
||||||
display_id = self._match_id(url)
|
},
|
||||||
webpage = self._download_webpage(url, display_id)
|
}]
|
||||||
|
|
||||||
def _x(name, default=NO_DEFAULT):
|
|
||||||
return self._search_regex(
|
|
||||||
r'data-%s\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
|
||||||
webpage, name, default=default, group='value')
|
|
||||||
|
|
||||||
video_id = _x('mpx-guid')
|
|
||||||
title = _x('episode-title')
|
|
||||||
mpx_account_id = _x('mpx-account-id', '2304992029')
|
|
||||||
|
|
||||||
query = {
|
|
||||||
'mbr': 'true',
|
|
||||||
}
|
|
||||||
if _x('is-full-episode', None) == '1':
|
|
||||||
query['manifest'] = 'm3u'
|
|
||||||
|
|
||||||
if _x('is-entitlement', None) == '1':
|
|
||||||
adobe_pass = {}
|
|
||||||
drupal_settings = self._search_regex(
|
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
|
||||||
webpage, 'drupal settings', fatal=False)
|
|
||||||
if drupal_settings:
|
|
||||||
drupal_settings = self._parse_json(drupal_settings, video_id, fatal=False)
|
|
||||||
if drupal_settings:
|
|
||||||
adobe_pass = drupal_settings.get('adobePass', {})
|
|
||||||
resource = self._get_mvpd_resource(
|
|
||||||
adobe_pass.get('adobePassResourceId', 'usa'),
|
|
||||||
title, video_id, _x('episode-rating', 'TV-14'))
|
|
||||||
query['auth'] = self._extract_mvpd_auth(
|
|
||||||
url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
|
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
|
||||||
info.update({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': smuggle_url(update_url_query(
|
|
||||||
'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id),
|
|
||||||
query), {'force_smil_url': True}),
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'series': _x('show-title', None),
|
|
||||||
'episode': title,
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class UstreamIE(InfoExtractor):
|
class UstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
|
||||||
IE_NAME = 'ustream'
|
IE_NAME = 'ustream'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ustream.tv/recorded/20274954',
|
'url': 'http://www.ustream.tv/recorded/20274954',
|
||||||
|
@ -67,12 +67,15 @@ class UstreamIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # m3u8 download
|
'skip_download': True, # m3u8 download
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.ibm.com/embed/recorded/128240221?&autoplay=true&controls=true&volume=100',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import itertools
|
import itertools
|
||||||
|
@ -9,6 +10,10 @@ import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@ -16,6 +21,7 @@ from ..utils import (
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
std_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -166,19 +172,20 @@ class VikiIE(VikiBaseIE):
|
||||||
}, {
|
}, {
|
||||||
# episode
|
# episode
|
||||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||||
'md5': '5fa476a902e902783ac7a4d615cdbc7a',
|
'md5': '94e0e34fd58f169f40c184f232356cfe',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '44699v',
|
'id': '44699v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Boys Over Flowers - Episode 1',
|
'title': 'Boys Over Flowers - Episode 1',
|
||||||
'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
|
'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
|
||||||
'duration': 4204,
|
'duration': 4172,
|
||||||
'timestamp': 1270496524,
|
'timestamp': 1270496524,
|
||||||
'upload_date': '20100405',
|
'upload_date': '20100405',
|
||||||
'uploader': 'group8',
|
'uploader': 'group8',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
}
|
},
|
||||||
|
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||||
}, {
|
}, {
|
||||||
# youtube external
|
# youtube external
|
||||||
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
|
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
|
||||||
|
@ -195,14 +202,15 @@ class VikiIE(VikiBaseIE):
|
||||||
'uploader_id': 'ad14065n',
|
'uploader_id': 'ad14065n',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
}
|
},
|
||||||
|
'skip': 'Page not found!',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.viki.com/player/44699v',
|
'url': 'http://www.viki.com/player/44699v',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# non-English description
|
# non-English description
|
||||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||||
'md5': '1713ae35df5a521b31f6dc40730e7c9c',
|
'md5': 'adf9e321a0ae5d0aace349efaaff7691',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '158036v',
|
'id': '158036v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -218,71 +226,13 @@ class VikiIE(VikiBaseIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._call_api(
|
resp = self._download_json(
|
||||||
'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
|
'https://www.viki.com/api/videos/' + video_id,
|
||||||
|
video_id, 'Downloading video JSON', headers={
|
||||||
streams = self._call_api(
|
'x-client-user-agent': std_headers['User-Agent'],
|
||||||
'videos/%s/streams.json' % video_id, video_id,
|
'x-viki-app-ver': '4.0.57',
|
||||||
'Downloading video streams JSON')
|
})
|
||||||
|
video = resp['video']
|
||||||
formats = []
|
|
||||||
for format_id, stream_dict in streams.items():
|
|
||||||
height = int_or_none(self._search_regex(
|
|
||||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
|
||||||
for protocol, format_dict in stream_dict.items():
|
|
||||||
# rtmps URLs does not seem to work
|
|
||||||
if protocol == 'rtmps':
|
|
||||||
continue
|
|
||||||
format_url = format_dict.get('url')
|
|
||||||
format_drms = format_dict.get('drms')
|
|
||||||
format_stream_id = format_dict.get('id')
|
|
||||||
if format_id == 'm3u8':
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
|
||||||
format_url, video_id, 'mp4',
|
|
||||||
entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
|
||||||
# Despite CODECS metadata in m3u8 all video-only formats
|
|
||||||
# are actually video+audio
|
|
||||||
for f in m3u8_formats:
|
|
||||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
|
||||||
f['acodec'] = None
|
|
||||||
formats.extend(m3u8_formats)
|
|
||||||
elif format_id == 'mpd':
|
|
||||||
mpd_formats = self._extract_mpd_formats(
|
|
||||||
format_url, video_id,
|
|
||||||
mpd_id='mpd-%s' % protocol, fatal=False)
|
|
||||||
formats.extend(mpd_formats)
|
|
||||||
elif format_id == 'mpd':
|
|
||||||
|
|
||||||
formats.extend(mpd_formats)
|
|
||||||
elif format_url.startswith('rtmp'):
|
|
||||||
mobj = re.search(
|
|
||||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
|
||||||
format_url)
|
|
||||||
if not mobj:
|
|
||||||
continue
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'rtmp-%s' % format_id,
|
|
||||||
'ext': 'flv',
|
|
||||||
'url': mobj.group('url'),
|
|
||||||
'play_path': mobj.group('playpath'),
|
|
||||||
'app': mobj.group('app'),
|
|
||||||
'page_url': url,
|
|
||||||
'drms': format_drms,
|
|
||||||
'stream_id': format_stream_id,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
urlh = self._request_webpage(
|
|
||||||
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
|
|
||||||
formats.append({
|
|
||||||
'url': format_url,
|
|
||||||
'format_id': '%s-%s' % (format_id, protocol),
|
|
||||||
'height': height,
|
|
||||||
'drms': format_drms,
|
|
||||||
'stream_id': format_stream_id,
|
|
||||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
self._check_errors(video)
|
self._check_errors(video)
|
||||||
|
|
||||||
|
@ -308,19 +258,26 @@ class VikiIE(VikiBaseIE):
|
||||||
'url': thumbnail.get('url'),
|
'url': thumbnail.get('url'),
|
||||||
})
|
})
|
||||||
|
|
||||||
stream_ids = []
|
|
||||||
for f in formats:
|
|
||||||
s_id = f.get('stream_id')
|
|
||||||
if s_id is not None:
|
|
||||||
stream_ids.append(s_id)
|
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
|
try:
|
||||||
subtitles[subtitle_lang] = [{
|
# New way to fetch subtitles
|
||||||
'ext': subtitles_format,
|
new_video = self._download_json(
|
||||||
'url': self._prepare_call(
|
'https://www.viki.com/api/videos/%s' % video_id, video_id,
|
||||||
'videos/%s/subtitles/%s.%s?stream_id=%s' % (video_id, subtitle_lang, subtitles_format, stream_ids[0])),
|
'Downloading new video JSON to get subtitles', headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
|
||||||
} for subtitles_format in ('srt', 'vtt')]
|
for sub in new_video.get('streamSubtitles').get('dash'):
|
||||||
|
subtitles[sub.get('srclang')] = [{
|
||||||
|
'ext': 'vtt',
|
||||||
|
'url': sub.get('src'),
|
||||||
|
'completion': sub.get('percentage'),
|
||||||
|
}]
|
||||||
|
except AttributeError:
|
||||||
|
# fall-back to the old way if there isn't a streamSubtitles attribute
|
||||||
|
for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
|
||||||
|
subtitles[subtitle_lang] = [{
|
||||||
|
'ext': subtitles_format,
|
||||||
|
'url': self._prepare_call(
|
||||||
|
'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
|
||||||
|
} for subtitles_format in ('srt', 'vtt')]
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -335,12 +292,84 @@ class VikiIE(VikiBaseIE):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
if 'external' in streams:
|
formats = []
|
||||||
result.update({
|
|
||||||
'_type': 'url_transparent',
|
def add_format(format_id, format_dict, protocol='http'):
|
||||||
'url': streams['external']['url'],
|
# rtmps URLs does not seem to work
|
||||||
})
|
if protocol == 'rtmps':
|
||||||
return result
|
return
|
||||||
|
format_url = format_dict.get('url')
|
||||||
|
if not format_url:
|
||||||
|
return
|
||||||
|
format_drms = format_dict.get('drms')
|
||||||
|
format_stream_id = format_dict.get('id')
|
||||||
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
|
||||||
|
stream = qs.get('stream', [None])[0]
|
||||||
|
if stream:
|
||||||
|
format_url = base64.b64decode(stream).decode()
|
||||||
|
if format_id in ('m3u8', 'hls'):
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||||
|
# Despite CODECS metadata in m3u8 all video-only formats
|
||||||
|
# are actually video+audio
|
||||||
|
for f in m3u8_formats:
|
||||||
|
if '_drm/index_' in f['url']:
|
||||||
|
continue
|
||||||
|
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||||
|
f['acodec'] = None
|
||||||
|
formats.append(f)
|
||||||
|
elif format_id in ('mpd', 'dash'):
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
|
||||||
|
elif format_url.startswith('rtmp'):
|
||||||
|
mobj = re.search(
|
||||||
|
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
||||||
|
format_url)
|
||||||
|
if not mobj:
|
||||||
|
return
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'rtmp-%s' % format_id,
|
||||||
|
'ext': 'flv',
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'play_path': mobj.group('playpath'),
|
||||||
|
'app': mobj.group('app'),
|
||||||
|
'page_url': url,
|
||||||
|
'drms': format_drms,
|
||||||
|
'stream_id': format_stream_id,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
urlh = self._request_webpage(
|
||||||
|
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': '%s-%s' % (format_id, protocol),
|
||||||
|
'height': int_or_none(self._search_regex(
|
||||||
|
r'^(\d+)[pP]$', format_id, 'height', default=None)),
|
||||||
|
'drms': format_drms,
|
||||||
|
'stream_id': format_stream_id,
|
||||||
|
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||||
|
})
|
||||||
|
|
||||||
|
for format_id, format_dict in (resp.get('streams') or {}).items():
|
||||||
|
add_format(format_id, format_dict)
|
||||||
|
if not formats:
|
||||||
|
streams = self._call_api(
|
||||||
|
'videos/%s/streams.json' % video_id, video_id,
|
||||||
|
'Downloading video streams JSON')
|
||||||
|
|
||||||
|
if 'external' in streams:
|
||||||
|
result.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': streams['external']['url'],
|
||||||
|
})
|
||||||
|
return result
|
||||||
|
|
||||||
|
for format_id, stream_dict in streams.items():
|
||||||
|
for protocol, format_dict in stream_dict.items():
|
||||||
|
add_format(format_id, format_dict, protocol)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
result['formats'] = formats
|
result['formats'] = formats
|
||||||
return result
|
return result
|
||||||
|
|
|
@ -922,7 +922,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
||||||
}]
|
}]
|
||||||
_PAGE_SIZE = 100
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
|
def _fetch_page(self, album_id, authorization, hashed_pass, page):
|
||||||
api_page = page + 1
|
api_page = page + 1
|
||||||
query = {
|
query = {
|
||||||
'fields': 'link,uri',
|
'fields': 'link,uri',
|
||||||
|
@ -934,7 +934,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
||||||
videos = self._download_json(
|
videos = self._download_json(
|
||||||
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||||
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||||
'Authorization': 'jwt ' + authorizaion,
|
'Authorization': 'jwt ' + authorization,
|
||||||
})['data']
|
})['data']
|
||||||
for video in videos:
|
for video in videos:
|
||||||
link = video.get('link')
|
link = video.get('link')
|
||||||
|
@ -946,10 +946,13 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
album_id = self._match_id(url)
|
album_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, album_id)
|
viewer = self._download_json(
|
||||||
viewer = self._parse_json(self._search_regex(
|
'https://vimeo.com/_rv/viewer', album_id, fatal=False)
|
||||||
r'bootstrap_data\s*=\s*({.+?})</script>',
|
if not viewer:
|
||||||
webpage, 'bootstrap data'), album_id)['viewer']
|
webpage = self._download_webpage(url, album_id)
|
||||||
|
viewer = self._parse_json(self._search_regex(
|
||||||
|
r'bootstrap_data\s*=\s*({.+?})</script>',
|
||||||
|
webpage, 'bootstrap data'), album_id)['viewer']
|
||||||
jwt = viewer['jwt']
|
jwt = viewer['jwt']
|
||||||
album = self._download_json(
|
album = self._download_json(
|
||||||
'https://api.vimeo.com/albums/' + album_id,
|
'https://api.vimeo.com/albums/' + album_id,
|
||||||
|
|
|
@ -1,25 +1,32 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .naver import NaverBaseIE
|
from .naver import NaverBaseIE
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
remove_start,
|
str_or_none,
|
||||||
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class VLiveIE(NaverBaseIE):
|
class VLiveBaseIE(NaverBaseIE):
|
||||||
|
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
|
||||||
|
|
||||||
|
|
||||||
|
class VLiveIE(VLiveBaseIE):
|
||||||
IE_NAME = 'vlive'
|
IE_NAME = 'vlive'
|
||||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
|
||||||
_NETRC_MACHINE = 'vlive'
|
_NETRC_MACHINE = 'vlive'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.vlive.tv/video/1326',
|
'url': 'http://www.vlive.tv/video/1326',
|
||||||
|
@ -27,7 +34,7 @@ class VLiveIE(NaverBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1326',
|
'id': '1326',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "[V LIVE] Girl's Day's Broadcast",
|
'title': "Girl's Day's Broadcast",
|
||||||
'creator': "Girl's Day",
|
'creator': "Girl's Day",
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'uploader_id': 'muploader_a',
|
'uploader_id': 'muploader_a',
|
||||||
|
@ -37,7 +44,7 @@ class VLiveIE(NaverBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '16937',
|
'id': '16937',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '[V LIVE] 첸백시 걍방',
|
'title': '첸백시 걍방',
|
||||||
'creator': 'EXO',
|
'creator': 'EXO',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'subtitles': 'mincount:12',
|
'subtitles': 'mincount:12',
|
||||||
|
@ -58,12 +65,15 @@ class VLiveIE(NaverBaseIE):
|
||||||
'subtitles': 'mincount:10',
|
'subtitles': 'mincount:10',
|
||||||
},
|
},
|
||||||
'skip': 'This video is only available for CH+ subscribers',
|
'skip': 'This video is only available for CH+ subscribers',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.vlive.tv/embed/1326',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# works only with gcc=KR
|
||||||
|
'url': 'https://www.vlive.tv/video/225019',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
|
@ -95,173 +105,199 @@ class VLiveIE(NaverBaseIE):
|
||||||
if not is_logged_in():
|
if not is_logged_in():
|
||||||
raise ExtractorError('Unable to log in', expected=True)
|
raise ExtractorError('Unable to log in', expected=True)
|
||||||
|
|
||||||
|
def _call_api(self, path_template, video_id, fields=None):
|
||||||
|
query = {'appId': self._APP_ID, 'gcc': 'KR'}
|
||||||
|
if fields:
|
||||||
|
query['fields'] = fields
|
||||||
|
try:
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
|
||||||
|
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
|
||||||
|
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
self.raise_login_required(json.loads(e.cause.read().decode())['message'])
|
||||||
|
raise
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
post = self._call_api(
|
||||||
'https://www.vlive.tv/video/%s' % video_id, video_id)
|
'post/v1.0/officialVideoPost-%s', video_id,
|
||||||
|
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
|
||||||
|
|
||||||
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
|
video = post['officialVideo']
|
||||||
VIDEO_PARAMS_FIELD = 'video params'
|
|
||||||
|
|
||||||
params = self._parse_json(self._search_regex(
|
def get_common_fields():
|
||||||
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
|
channel = post.get('channel') or {}
|
||||||
transform_source=lambda s: '[' + s + ']', fatal=False)
|
return {
|
||||||
|
'title': video.get('title'),
|
||||||
|
'creator': post.get('author', {}).get('nickname'),
|
||||||
|
'channel': channel.get('channelName'),
|
||||||
|
'channel_id': channel.get('channelCode'),
|
||||||
|
'duration': int_or_none(video.get('playTime')),
|
||||||
|
'view_count': int_or_none(video.get('playCount')),
|
||||||
|
'like_count': int_or_none(video.get('likeCount')),
|
||||||
|
'comment_count': int_or_none(video.get('commentCount')),
|
||||||
|
}
|
||||||
|
|
||||||
if not params or len(params) < 7:
|
video_type = video.get('type')
|
||||||
params = self._search_regex(
|
if video_type == 'VOD':
|
||||||
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
|
inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
|
||||||
params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
|
vod_id = video['vodId']
|
||||||
|
return merge_dicts(
|
||||||
status, long_video_id, key = params[2], params[5], params[6]
|
get_common_fields(),
|
||||||
status = remove_start(status, 'PRODUCT_')
|
self._extract_video_info(video_id, vod_id, inkey))
|
||||||
|
elif video_type == 'LIVE':
|
||||||
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
|
status = video.get('status')
|
||||||
return self._live(video_id, webpage)
|
if status == 'ON_AIR':
|
||||||
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
|
stream_url = self._call_api(
|
||||||
return self._replay(video_id, webpage, long_video_id, key)
|
'old/v3/live/%s/playInfo',
|
||||||
|
video_id)['result']['adaptiveStreamUrl']
|
||||||
if status == 'LIVE_END':
|
formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
|
||||||
raise ExtractorError('Uploading for replay. Please wait...',
|
info = get_common_fields()
|
||||||
expected=True)
|
info.update({
|
||||||
elif status == 'COMING_SOON':
|
'title': self._live_title(video['title']),
|
||||||
raise ExtractorError('Coming soon!', expected=True)
|
'id': video_id,
|
||||||
elif status == 'CANCELED':
|
'formats': formats,
|
||||||
raise ExtractorError('We are sorry, '
|
'is_live': True,
|
||||||
'but the live broadcast has been canceled.',
|
})
|
||||||
expected=True)
|
return info
|
||||||
elif status == 'ONLY_APP':
|
elif status == 'ENDED':
|
||||||
raise ExtractorError('Unsupported video type', expected=True)
|
raise ExtractorError(
|
||||||
else:
|
'Uploading for replay. Please wait...', expected=True)
|
||||||
raise ExtractorError('Unknown status %s' % status)
|
elif status == 'RESERVED':
|
||||||
|
raise ExtractorError('Coming soon!', expected=True)
|
||||||
def _get_common_fields(self, webpage):
|
elif video.get('exposeStatus') == 'CANCEL':
|
||||||
title = self._og_search_title(webpage)
|
raise ExtractorError(
|
||||||
creator = self._html_search_regex(
|
'We are sorry, but the live broadcast has been canceled.',
|
||||||
r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
|
expected=True)
|
||||||
webpage, 'creator', fatal=False)
|
else:
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
raise ExtractorError('Unknown status ' + status)
|
||||||
return {
|
|
||||||
'title': title,
|
|
||||||
'creator': creator,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _live(self, video_id, webpage):
|
|
||||||
init_page = self._download_init_page(video_id)
|
|
||||||
|
|
||||||
live_params = self._search_regex(
|
|
||||||
r'"liveStreamInfo"\s*:\s*(".*"),',
|
|
||||||
init_page, 'live stream info')
|
|
||||||
live_params = self._parse_json(live_params, video_id)
|
|
||||||
live_params = self._parse_json(live_params, video_id)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for vid in live_params.get('resolutions', []):
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
vid['cdnUrl'], video_id, 'mp4',
|
|
||||||
m3u8_id=vid.get('name'),
|
|
||||||
fatal=False, live=True))
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = self._get_common_fields(webpage)
|
|
||||||
info.update({
|
|
||||||
'title': self._live_title(info['title']),
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'is_live': True,
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
|
|
||||||
def _replay(self, video_id, webpage, long_video_id, key):
|
|
||||||
if '' in (long_video_id, key):
|
|
||||||
init_page = self._download_init_page(video_id)
|
|
||||||
video_info = self._parse_json(self._search_regex(
|
|
||||||
(r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
|
|
||||||
r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
|
|
||||||
video_id)
|
|
||||||
if video_info.get('status') == 'NEED_CHANNEL_PLUS':
|
|
||||||
self.raise_login_required(
|
|
||||||
'This video is only available for CH+ subscribers')
|
|
||||||
long_video_id, key = video_info['vid'], video_info['inkey']
|
|
||||||
|
|
||||||
return merge_dicts(
|
|
||||||
self._get_common_fields(webpage),
|
|
||||||
self._extract_video_info(video_id, long_video_id, key))
|
|
||||||
|
|
||||||
def _download_init_page(self, video_id):
|
|
||||||
return self._download_webpage(
|
|
||||||
'https://www.vlive.tv/video/init/view',
|
|
||||||
video_id, note='Downloading live webpage',
|
|
||||||
data=urlencode_postdata({'videoSeq': video_id}),
|
|
||||||
headers={
|
|
||||||
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded'
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class VLiveChannelIE(InfoExtractor):
|
class VLivePostIE(VLiveIE):
|
||||||
|
IE_NAME = 'vlive:post'
|
||||||
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# uploadType = SOS
|
||||||
|
'url': 'https://www.vlive.tv/post/1-20088044',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1-20088044',
|
||||||
|
'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
|
||||||
|
'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
|
}, {
|
||||||
|
# uploadType = V
|
||||||
|
'url': 'https://www.vlive.tv/post/1-20087926',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1-20087926',
|
||||||
|
'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
}]
|
||||||
|
_FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
|
||||||
|
_SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
|
||||||
|
_INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
post_id = self._match_id(url)
|
||||||
|
|
||||||
|
post = self._call_api(
|
||||||
|
'post/v1.0/post-%s', post_id,
|
||||||
|
'attachments{video},officialVideo{videoSeq},plainBody,title')
|
||||||
|
|
||||||
|
video_seq = str_or_none(try_get(
|
||||||
|
post, lambda x: x['officialVideo']['videoSeq']))
|
||||||
|
if video_seq:
|
||||||
|
return self.url_result(
|
||||||
|
'http://www.vlive.tv/video/' + video_seq,
|
||||||
|
VLiveIE.ie_key(), video_seq)
|
||||||
|
|
||||||
|
title = post['title']
|
||||||
|
entries = []
|
||||||
|
for idx, video in enumerate(post['attachments']['video'].values()):
|
||||||
|
video_id = video.get('videoId')
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
upload_type = video.get('uploadType')
|
||||||
|
upload_info = video.get('uploadInfo') or {}
|
||||||
|
entry = None
|
||||||
|
if upload_type == 'SOS':
|
||||||
|
download = self._call_api(
|
||||||
|
self._SOS_TMPL, video_id)['videoUrl']['download']
|
||||||
|
formats = []
|
||||||
|
for f_id, f_url in download.items():
|
||||||
|
formats.append({
|
||||||
|
'format_id': f_id,
|
||||||
|
'url': f_url,
|
||||||
|
'height': int_or_none(f_id[:-1]),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entry = {
|
||||||
|
'formats': formats,
|
||||||
|
'id': video_id,
|
||||||
|
'thumbnail': upload_info.get('imageUrl'),
|
||||||
|
}
|
||||||
|
elif upload_type == 'V':
|
||||||
|
vod_id = upload_info.get('videoId')
|
||||||
|
if not vod_id:
|
||||||
|
continue
|
||||||
|
inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
|
||||||
|
entry = self._extract_video_info(video_id, vod_id, inkey)
|
||||||
|
if entry:
|
||||||
|
entry['title'] = '%s_part%s' % (title, idx)
|
||||||
|
entries.append(entry)
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, post_id, title, strip_or_none(post.get('plainBody')))
|
||||||
|
|
||||||
|
|
||||||
|
class VLiveChannelIE(VLiveBaseIE):
|
||||||
IE_NAME = 'vlive:channel'
|
IE_NAME = 'vlive:channel'
|
||||||
_VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
|
_VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://channels.vlive.tv/FCD4B',
|
'url': 'http://channels.vlive.tv/FCD4B',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'FCD4B',
|
'id': 'FCD4B',
|
||||||
'title': 'MAMAMOO',
|
'title': 'MAMAMOO',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 110
|
'playlist_mincount': 110
|
||||||
}
|
}, {
|
||||||
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
|
'url': 'https://www.vlive.tv/channel/FCD4B',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _call_api(self, path, channel_key_suffix, channel_value, note, query):
|
||||||
|
q = {
|
||||||
|
'app_id': self._APP_ID,
|
||||||
|
'channel' + channel_key_suffix: channel_value,
|
||||||
|
}
|
||||||
|
q.update(query)
|
||||||
|
return self._download_json(
|
||||||
|
'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
|
||||||
|
channel_value, note='Downloading ' + note, query=q)['result']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_code = self._match_id(url)
|
channel_code = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
channel_seq = self._call_api(
|
||||||
'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
|
'decodeChannelCode', 'Code', channel_code,
|
||||||
|
'decode channel code', {})['channelSeq']
|
||||||
|
|
||||||
app_id = None
|
|
||||||
|
|
||||||
app_js_url = self._search_regex(
|
|
||||||
r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
|
|
||||||
webpage, 'app js', default=None, group='url')
|
|
||||||
|
|
||||||
if app_js_url:
|
|
||||||
app_js = self._download_webpage(
|
|
||||||
app_js_url, channel_code, 'Downloading app JS', fatal=False)
|
|
||||||
if app_js:
|
|
||||||
app_id = self._search_regex(
|
|
||||||
r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
|
|
||||||
app_js, 'app id', default=None)
|
|
||||||
|
|
||||||
app_id = app_id or self._APP_ID
|
|
||||||
|
|
||||||
channel_info = self._download_json(
|
|
||||||
'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
|
|
||||||
channel_code, note='Downloading decode channel code',
|
|
||||||
query={
|
|
||||||
'app_id': app_id,
|
|
||||||
'channelCode': channel_code,
|
|
||||||
'_': int(time.time())
|
|
||||||
})
|
|
||||||
|
|
||||||
channel_seq = channel_info['result']['channelSeq']
|
|
||||||
channel_name = None
|
channel_name = None
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
video_list = self._download_json(
|
video_list = self._call_api(
|
||||||
'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
|
'getChannelVideoList', 'Seq', channel_seq,
|
||||||
channel_code, note='Downloading channel list page #%d' % page_num,
|
'channel list page #%d' % page_num, {
|
||||||
query={
|
|
||||||
'app_id': app_id,
|
|
||||||
'channelSeq': channel_seq,
|
|
||||||
# Large values of maxNumOfRows (~300 or above) may cause
|
# Large values of maxNumOfRows (~300 or above) may cause
|
||||||
# empty responses (see [1]), e.g. this happens for [2] that
|
# empty responses (see [1]), e.g. this happens for [2] that
|
||||||
# has more than 300 videos.
|
# has more than 300 videos.
|
||||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/13830
|
# 1. https://github.com/ytdl-org/youtube-dl/issues/13830
|
||||||
# 2. http://channels.vlive.tv/EDBF.
|
# 2. http://channels.vlive.tv/EDBF.
|
||||||
'maxNumOfRows': 100,
|
'maxNumOfRows': 100,
|
||||||
'_': int(time.time()),
|
|
||||||
'pageNo': page_num
|
'pageNo': page_num
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -269,99 +305,44 @@ class VLiveChannelIE(InfoExtractor):
|
||||||
if not channel_name:
|
if not channel_name:
|
||||||
channel_name = try_get(
|
channel_name = try_get(
|
||||||
video_list,
|
video_list,
|
||||||
lambda x: x['result']['channelInfo']['channelName'],
|
lambda x: x['channelInfo']['channelName'],
|
||||||
compat_str)
|
compat_str)
|
||||||
|
|
||||||
videos = try_get(
|
videos = try_get(
|
||||||
video_list, lambda x: x['result']['videoList'], list)
|
video_list, lambda x: x['videoList'], list)
|
||||||
if not videos:
|
if not videos:
|
||||||
break
|
break
|
||||||
|
|
||||||
for video in videos:
|
for video in videos:
|
||||||
video_id = video.get('videoSeq')
|
video_id = video.get('videoSeq')
|
||||||
if not video_id:
|
video_type = video.get('videoType')
|
||||||
|
|
||||||
|
if not video_id or not video_type:
|
||||||
continue
|
continue
|
||||||
video_id = compat_str(video_id)
|
video_id = compat_str(video_id)
|
||||||
entries.append(
|
|
||||||
self.url_result(
|
if video_type in ('PLAYLIST'):
|
||||||
'http://www.vlive.tv/video/%s' % video_id,
|
playlist_videos = try_get(
|
||||||
ie=VLiveIE.ie_key(), video_id=video_id))
|
video,
|
||||||
|
lambda x: x['videoPlaylist']['videoList'], list)
|
||||||
|
if not playlist_videos:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for playlist_video in playlist_videos:
|
||||||
|
playlist_video_id = playlist_video.get('videoSeq')
|
||||||
|
if not playlist_video_id:
|
||||||
|
continue
|
||||||
|
playlist_video_id = compat_str(playlist_video_id)
|
||||||
|
|
||||||
|
entries.append(
|
||||||
|
self.url_result(
|
||||||
|
'http://www.vlive.tv/video/%s' % playlist_video_id,
|
||||||
|
ie=VLiveIE.ie_key(), video_id=playlist_video_id))
|
||||||
|
else:
|
||||||
|
entries.append(
|
||||||
|
self.url_result(
|
||||||
|
'http://www.vlive.tv/video/%s' % video_id,
|
||||||
|
ie=VLiveIE.ie_key(), video_id=video_id))
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, channel_code, channel_name)
|
entries, channel_code, channel_name)
|
||||||
|
|
||||||
|
|
||||||
class VLivePlaylistIE(InfoExtractor):
|
|
||||||
IE_NAME = 'vlive:playlist'
|
|
||||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
|
|
||||||
_VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
|
|
||||||
_TESTS = [{
|
|
||||||
# regular working playlist
|
|
||||||
'url': 'https://www.vlive.tv/video/117956/playlist/117963',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '117963',
|
|
||||||
'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
|
|
||||||
},
|
|
||||||
'playlist_mincount': 10
|
|
||||||
}, {
|
|
||||||
# playlist with no playlistVideoSeqs
|
|
||||||
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '22867',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '[V LIVE] Valentine Day Message from MINA',
|
|
||||||
'creator': 'TWICE',
|
|
||||||
'view_count': int
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _build_video_result(self, video_id, message):
|
|
||||||
self.to_screen(message)
|
|
||||||
return self.url_result(
|
|
||||||
self._VIDEO_URL_TEMPLATE % video_id,
|
|
||||||
ie=VLiveIE.ie_key(), video_id=video_id)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id, playlist_id = mobj.group('video_id', 'id')
|
|
||||||
|
|
||||||
if self._downloader.params.get('noplaylist'):
|
|
||||||
return self._build_video_result(
|
|
||||||
video_id,
|
|
||||||
'Downloading just video %s because of --no-playlist'
|
|
||||||
% video_id)
|
|
||||||
|
|
||||||
self.to_screen(
|
|
||||||
'Downloading playlist %s - add --no-playlist to just download video'
|
|
||||||
% playlist_id)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'http://www.vlive.tv/video/%s/playlist/%s'
|
|
||||||
% (video_id, playlist_id), playlist_id)
|
|
||||||
|
|
||||||
raw_item_ids = self._search_regex(
|
|
||||||
r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
|
|
||||||
'playlist video seqs', default=None, fatal=False)
|
|
||||||
|
|
||||||
if not raw_item_ids:
|
|
||||||
return self._build_video_result(
|
|
||||||
video_id,
|
|
||||||
'Downloading just video %s because no playlist was found'
|
|
||||||
% video_id)
|
|
||||||
|
|
||||||
item_ids = self._parse_json(raw_item_ids, playlist_id)
|
|
||||||
|
|
||||||
entries = [
|
|
||||||
self.url_result(
|
|
||||||
self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
|
|
||||||
video_id=compat_str(item_id))
|
|
||||||
for item_id in item_ids]
|
|
||||||
|
|
||||||
playlist_name = self._html_search_regex(
|
|
||||||
r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
|
|
||||||
webpage, 'playlist title', fatal=False)
|
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, playlist_name)
|
|
||||||
|
|
|
@ -54,17 +54,17 @@ class XiamiBaseIE(InfoExtractor):
|
||||||
def _decrypt(origin):
|
def _decrypt(origin):
|
||||||
n = int(origin[0])
|
n = int(origin[0])
|
||||||
origin = origin[1:]
|
origin = origin[1:]
|
||||||
short_lenth = len(origin) // n
|
short_length = len(origin) // n
|
||||||
long_num = len(origin) - short_lenth * n
|
long_num = len(origin) - short_length * n
|
||||||
l = tuple()
|
l = tuple()
|
||||||
for i in range(0, n):
|
for i in range(0, n):
|
||||||
length = short_lenth
|
length = short_length
|
||||||
if i < long_num:
|
if i < long_num:
|
||||||
length += 1
|
length += 1
|
||||||
l += (origin[0:length], )
|
l += (origin[0:length], )
|
||||||
origin = origin[length:]
|
origin = origin[length:]
|
||||||
ans = ''
|
ans = ''
|
||||||
for i in range(0, short_lenth + 1):
|
for i in range(0, short_length + 1):
|
||||||
for j in range(0, n):
|
for j in range(0, n):
|
||||||
if len(l[j]) > i:
|
if len(l[j]) > i:
|
||||||
ans += l[j][i]
|
ans += l[j][i]
|
||||||
|
|
|
@ -5,7 +5,6 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
@ -34,7 +33,7 @@ class XTubeIE(InfoExtractor):
|
||||||
'title': 'strange erotica',
|
'title': 'strange erotica',
|
||||||
'description': 'contains:an ET kind of thing',
|
'description': 'contains:an ET kind of thing',
|
||||||
'uploader': 'greenshowers',
|
'uploader': 'greenshowers',
|
||||||
'duration': 449,
|
'duration': 450,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@ -74,24 +73,16 @@ class XTubeIE(InfoExtractor):
|
||||||
|
|
||||||
title, thumbnail, duration = [None] * 3
|
title, thumbnail, duration = [None] * 3
|
||||||
|
|
||||||
json_config_string = self._search_regex(
|
config = self._parse_json(self._search_regex(
|
||||||
r'playerConf=({.+?}),loaderConf',
|
r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf)', webpage, 'config',
|
||||||
webpage, 'config', default=None)
|
default='{}'), video_id, transform_source=js_to_json, fatal=False)
|
||||||
if not json_config_string:
|
if config:
|
||||||
raise ExtractorError("Could not extract video player data")
|
config = config.get('mainRoll')
|
||||||
|
if isinstance(config, dict):
|
||||||
json_config_string = json_config_string.replace("!0", "true").replace("!1", "false")
|
title = config.get('title')
|
||||||
|
thumbnail = config.get('poster')
|
||||||
config = self._parse_json(json_config_string, video_id, transform_source=js_to_json, fatal=False)
|
duration = int_or_none(config.get('duration'))
|
||||||
if not config:
|
sources = config.get('sources') or config.get('format')
|
||||||
raise ExtractorError("Could not extract video player data")
|
|
||||||
|
|
||||||
config = config.get('mainRoll')
|
|
||||||
if isinstance(config, dict):
|
|
||||||
title = config.get('title')
|
|
||||||
thumbnail = config.get('poster')
|
|
||||||
duration = int_or_none(config.get('duration'))
|
|
||||||
sources = config.get('sources') or config.get('format')
|
|
||||||
|
|
||||||
if not isinstance(sources, dict):
|
if not isinstance(sources, dict):
|
||||||
sources = self._parse_json(self._search_regex(
|
sources = self._parse_json(self._search_regex(
|
||||||
|
|
|
@ -29,7 +29,6 @@ class YouPornIE(InfoExtractor):
|
||||||
'upload_date': '20101217',
|
'upload_date': '20101217',
|
||||||
'average_rating': int,
|
'average_rating': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
|
||||||
'categories': list,
|
'categories': list,
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@ -48,7 +47,6 @@ class YouPornIE(InfoExtractor):
|
||||||
'upload_date': '20110418',
|
'upload_date': '20110418',
|
||||||
'average_rating': int,
|
'average_rating': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
|
||||||
'categories': list,
|
'categories': list,
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@ -156,7 +154,8 @@ class YouPornIE(InfoExtractor):
|
||||||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
[r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
[r'UPLOADED:\s*<span>([^<]+)',
|
||||||
|
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
||||||
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
|
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
|
||||||
webpage, 'upload date', fatal=False))
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
|
@ -171,7 +170,7 @@ class YouPornIE(InfoExtractor):
|
||||||
webpage, 'view count', fatal=False, group='count'))
|
webpage, 'view count', fatal=False, group='count'))
|
||||||
comment_count = str_to_int(self._search_regex(
|
comment_count = str_to_int(self._search_regex(
|
||||||
r'>All [Cc]omments? \(([\d,.]+)\)',
|
r'>All [Cc]omments? \(([\d,.]+)\)',
|
||||||
webpage, 'comment count', fatal=False))
|
webpage, 'comment count', default=None))
|
||||||
|
|
||||||
def extract_tag_box(regex, title):
|
def extract_tag_box(regex, title):
|
||||||
tag_box = self._search_regex(regex, webpage, title, default=None)
|
tag_box = self._search_regex(regex, webpage, title, default=None)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,82 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
|
parse_filesize,
|
||||||
|
urlencode_postdata
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ZoomIE(InfoExtractor):
|
||||||
|
IE_NAME = 'zoom'
|
||||||
|
_VALID_URL = r'https://(?:.*).?zoom.us/rec(?:ording)?/(play|share)/(?P<id>[A-Za-z0-9\-_.]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://zoom.us/recording/play/SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK',
|
||||||
|
'info_dict': {
|
||||||
|
'md5': '031a5b379f1547a8b29c5c4c837dccf2',
|
||||||
|
'title': "GAZ Transformational Tuesdays W/ Landon & Stapes",
|
||||||
|
'id': "SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK",
|
||||||
|
'ext': "mp4"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
password_protected = self._search_regex(r'<form[^>]+?id="(password_form)"', webpage, 'password field', fatal=False, default=None)
|
||||||
|
if password_protected is not None:
|
||||||
|
self._verify_video_password(url, display_id, webpage)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(r"viewMp4Url: \'(.*)\'", webpage, 'video url')
|
||||||
|
title = self._html_search_regex([r"topic: \"(.*)\",", r"<title>(.*) - Zoom</title>"], webpage, 'title')
|
||||||
|
viewResolvtionsWidth = self._search_regex(r"viewResolvtionsWidth: (\d*)", webpage, 'res width', fatal=False)
|
||||||
|
viewResolvtionsHeight = self._search_regex(r"viewResolvtionsHeight: (\d*)", webpage, 'res height', fatal=False)
|
||||||
|
fileSize = parse_filesize(self._search_regex(r"fileSize: \'(.+)\'", webpage, 'fileSize', fatal=False))
|
||||||
|
|
||||||
|
urlprefix = url.split("zoom.us")[0] + "zoom.us/"
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
formats.append({
|
||||||
|
'url': url_or_none(video_url),
|
||||||
|
'width': int_or_none(viewResolvtionsWidth),
|
||||||
|
'height': int_or_none(viewResolvtionsHeight),
|
||||||
|
'http_headers': {'Accept': 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',
|
||||||
|
'Referer': urlprefix},
|
||||||
|
'ext': "mp4",
|
||||||
|
'filesize_approx': int_or_none(fileSize)
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
|
||||||
|
def _verify_video_password(self, url, video_id, webpage):
|
||||||
|
password = self._downloader.params.get('videopassword')
|
||||||
|
if password is None:
|
||||||
|
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||||
|
meetId = self._search_regex(r'<input[^>]+?id="meetId" value="([^\"]+)"', webpage, 'meetId')
|
||||||
|
data = urlencode_postdata({
|
||||||
|
'id': meetId,
|
||||||
|
'passwd': password,
|
||||||
|
'action': "viewdetailedpage",
|
||||||
|
'recaptcha': ""
|
||||||
|
})
|
||||||
|
validation_url = url.split("zoom.us")[0] + "zoom.us/rec/validate_meet_passwd"
|
||||||
|
validation_response = self._download_json(
|
||||||
|
validation_url, video_id,
|
||||||
|
note='Validating Password...',
|
||||||
|
errnote='Wrong password?',
|
||||||
|
data=data)
|
||||||
|
|
||||||
|
if validation_response['errorCode'] != 0:
|
||||||
|
raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, validation_response['errorMessage']))
|
|
@ -344,6 +344,10 @@ def parseOpts(overrideArguments=None):
|
||||||
'--download-archive', metavar='FILE',
|
'--download-archive', metavar='FILE',
|
||||||
dest='download_archive',
|
dest='download_archive',
|
||||||
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
|
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
|
||||||
|
selection.add_option(
|
||||||
|
'--break-on-existing',
|
||||||
|
action='store_true', dest='break_on_existing', default=False,
|
||||||
|
help="Stop the download process after attempting to download a file that's in the archive.")
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--include-ads',
|
'--include-ads',
|
||||||
dest='include_ads', action='store_true',
|
dest='include_ads', action='store_true',
|
||||||
|
@ -582,7 +586,7 @@ def parseOpts(overrideArguments=None):
|
||||||
'along with --min-sleep-interval.'))
|
'along with --min-sleep-interval.'))
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--sleep-subtitles',
|
'--sleep-subtitles',
|
||||||
dest='sleep_interval_subtitles', action='store_true', default=0,
|
dest='sleep_interval_subtitles', default=0, type=int,
|
||||||
help='Enforce sleep interval on subtitles as well')
|
help='Enforce sleep interval on subtitles as well')
|
||||||
|
|
||||||
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
||||||
|
|
|
@ -89,12 +89,15 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||||
|
|
||||||
elif info['ext'] == 'mkv':
|
elif info['ext'] == 'mkv':
|
||||||
os.rename(encodeFilename(thumbnail_filename), encodeFilename('cover.jpg'))
|
|
||||||
old_thumbnail_filename = thumbnail_filename
|
old_thumbnail_filename = thumbnail_filename
|
||||||
thumbnail_filename = 'cover.jpg'
|
thumbnail_filename = os.path.join(os.path.dirname(old_thumbnail_filename), 'cover.jpg')
|
||||||
|
if os.path.exists(thumbnail_filename):
|
||||||
|
os.remove(encodeFilename(thumbnail_filename))
|
||||||
|
os.rename(encodeFilename(old_thumbnail_filename), encodeFilename(thumbnail_filename))
|
||||||
|
|
||||||
options = [
|
options = [
|
||||||
'-c', 'copy', '-attach', thumbnail_filename, '-metadata:s:t', 'mimetype=image/jpeg']
|
'-c', 'copy', '-map', '0',
|
||||||
|
'-attach', thumbnail_filename, '-metadata:s:t', 'mimetype=image/jpeg']
|
||||||
|
|
||||||
self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
|
self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
|
||||||
|
|
||||||
|
@ -140,6 +143,6 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||||
os.remove(encodeFilename(filename))
|
os.remove(encodeFilename(filename))
|
||||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||||
else:
|
else:
|
||||||
raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
|
raise EmbedThumbnailPPError('Only mp3, mkv, m4a and mp4 are supported for thumbnail embedding for now.')
|
||||||
|
|
||||||
return [], info
|
return [], info
|
||||||
|
|
|
@ -359,7 +359,7 @@ class FFmpegVideoRemuxerPP(FFmpegPostProcessor):
|
||||||
if information['ext'] == self._preferedformat:
|
if information['ext'] == self._preferedformat:
|
||||||
self._downloader.to_screen('[ffmpeg] Not remuxing video file %s - already is in target format %s' % (path, self._preferedformat))
|
self._downloader.to_screen('[ffmpeg] Not remuxing video file %s - already is in target format %s' % (path, self._preferedformat))
|
||||||
return [], information
|
return [], information
|
||||||
options = ['-c', 'copy']
|
options = ['-c', 'copy', '-map', '0']
|
||||||
prefix, sep, ext = path.rpartition('.')
|
prefix, sep, ext = path.rpartition('.')
|
||||||
outpath = prefix + sep + self._preferedformat
|
outpath = prefix + sep + self._preferedformat
|
||||||
self._downloader.to_screen('[' + 'ffmpeg' + '] Remuxing video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
|
self._downloader.to_screen('[' + 'ffmpeg' + '] Remuxing video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
|
||||||
|
@ -412,7 +412,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||||
|
|
||||||
for lang, sub_info in subtitles.items():
|
for lang, sub_info in subtitles.items():
|
||||||
sub_ext = sub_info['ext']
|
sub_ext = sub_info['ext']
|
||||||
if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
|
if sub_ext == 'json':
|
||||||
|
self._downloader.to_screen('[ffmpeg] JSON subtitles cannot be embedded')
|
||||||
|
elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
|
||||||
sub_langs.append(lang)
|
sub_langs.append(lang)
|
||||||
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
|
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
|
||||||
else:
|
else:
|
||||||
|
@ -426,8 +428,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||||
input_files = [filename] + sub_filenames
|
input_files = [filename] + sub_filenames
|
||||||
|
|
||||||
opts = [
|
opts = [
|
||||||
'-map', '0',
|
'-c', 'copy', '-map', '0',
|
||||||
'-c', 'copy',
|
|
||||||
# Don't copy the existing subtitles, we may be running the
|
# Don't copy the existing subtitles, we may be running the
|
||||||
# postprocessor a second time
|
# postprocessor a second time
|
||||||
'-map', '-0:s',
|
'-map', '-0:s',
|
||||||
|
@ -577,7 +578,7 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor):
|
||||||
filename = info['filepath']
|
filename = info['filepath']
|
||||||
temp_filename = prepend_extension(filename, 'temp')
|
temp_filename = prepend_extension(filename, 'temp')
|
||||||
|
|
||||||
options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
|
options = ['-c', 'copy', '-map', '0', '-aspect', '%f' % stretched_ratio]
|
||||||
self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
|
self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
|
||||||
self.run_ffmpeg(filename, temp_filename, options)
|
self.run_ffmpeg(filename, temp_filename, options)
|
||||||
|
|
||||||
|
@ -595,7 +596,7 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
|
||||||
filename = info['filepath']
|
filename = info['filepath']
|
||||||
temp_filename = prepend_extension(filename, 'temp')
|
temp_filename = prepend_extension(filename, 'temp')
|
||||||
|
|
||||||
options = ['-c', 'copy', '-f', 'mp4']
|
options = ['-c', 'copy', '-map', '0', '-f', 'mp4']
|
||||||
self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
|
self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
|
||||||
self.run_ffmpeg(filename, temp_filename, options)
|
self.run_ffmpeg(filename, temp_filename, options)
|
||||||
|
|
||||||
|
@ -611,7 +612,7 @@ class FFmpegFixupM3u8PP(FFmpegPostProcessor):
|
||||||
if self.get_audio_codec(filename) == 'aac':
|
if self.get_audio_codec(filename) == 'aac':
|
||||||
temp_filename = prepend_extension(filename, 'temp')
|
temp_filename = prepend_extension(filename, 'temp')
|
||||||
|
|
||||||
options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
options = ['-c', 'copy', '-map', '0', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||||
self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
|
self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
|
||||||
self.run_ffmpeg(filename, temp_filename, options)
|
self.run_ffmpeg(filename, temp_filename, options)
|
||||||
|
|
||||||
|
@ -643,13 +644,18 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||||
self._downloader.to_screen(
|
self._downloader.to_screen(
|
||||||
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
|
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
|
||||||
continue
|
continue
|
||||||
|
elif ext == 'json':
|
||||||
|
self._downloader.to_screen(
|
||||||
|
'[ffmpeg] You have requested to convert json subtitles into another format, '
|
||||||
|
'which is currently not possible')
|
||||||
|
continue
|
||||||
old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
|
old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
|
||||||
sub_filenames.append(old_file)
|
sub_filenames.append(old_file)
|
||||||
new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
|
new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
|
||||||
|
|
||||||
if ext in ('dfxp', 'ttml', 'tt'):
|
if ext in ('dfxp', 'ttml', 'tt'):
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
'You have requested to convert dfxp (TTML) subtitles into another format, '
|
'[ffmpeg] You have requested to convert dfxp (TTML) subtitles into another format, '
|
||||||
'which results in style information loss')
|
'which results in style information loss')
|
||||||
|
|
||||||
dfxp_file = old_file
|
dfxp_file = old_file
|
||||||
|
|
|
@ -7,6 +7,7 @@ import hashlib
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import platform
|
||||||
from zipimport import zipimporter
|
from zipimport import zipimporter
|
||||||
|
|
||||||
from .compat import compat_realpath
|
from .compat import compat_realpath
|
||||||
|
@ -15,18 +16,173 @@ from .utils import encode_compat_str
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
|
|
||||||
|
|
||||||
def rsa_verify(message, signature, key):
|
def update_binary(ydl):
|
||||||
from hashlib import sha256
|
LATEST_URL = 'https://api.github.com/repos/blackjack4494/yt-dlc/releases/latest'
|
||||||
assert isinstance(message, bytes)
|
|
||||||
byte_size = (len(bin(key[0])) - 2 + 8 - 1) // 8
|
def sha256sum(path):
|
||||||
signature = ('%x' % pow(int(signature, 16), key[1], key[0])).encode()
|
h = hashlib.sha256()
|
||||||
signature = (byte_size * 2 - len(signature)) * b'0' + signature
|
b = bytearray(128 * 1024)
|
||||||
asn1 = b'3031300d060960864801650304020105000420'
|
mv = memoryview(b)
|
||||||
asn1 += sha256(message).hexdigest().encode()
|
with open(os.path.realpath(path), 'rb', buffering=0) as f:
|
||||||
if byte_size < len(asn1) // 2 + 11:
|
for n in iter(lambda: f.readinto(mv), 0):
|
||||||
return False
|
h.update(mv[:n])
|
||||||
expected = b'0001' + (byte_size - len(asn1) // 2 - 3) * b'ff' + b'00' + asn1
|
return h.hexdigest()
|
||||||
return expected == signature
|
|
||||||
|
if isinstance(globals().get('__loader__'), zipimporter):
|
||||||
|
pass
|
||||||
|
elif hasattr(sys, 'frozen'):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return ydl.to_screen('It looks like you installed youtube-dlc with a package manager, pip, setup.py or a tarball. Please use that to update')
|
||||||
|
|
||||||
|
filename = compat_realpath(sys.executable if hasattr(sys, 'frozen') else sys.argv[0])
|
||||||
|
build_hash = sha256sum(filename)
|
||||||
|
ydl.to_screen('Current Build Hash %s' % build_hash)
|
||||||
|
|
||||||
|
# Download and check versions info
|
||||||
|
try:
|
||||||
|
latest = ydl._opener.open(LATEST_URL).read().decode('utf-8')
|
||||||
|
latest_json = json.loads(latest)
|
||||||
|
except Exception:
|
||||||
|
if ydl.verbose:
|
||||||
|
ydl.to_screen(encode_compat_str(traceback.format_exc()))
|
||||||
|
ydl.to_screen('ERROR: can\'t obtain versions info. Please try again later.')
|
||||||
|
return ydl.to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
|
||||||
|
|
||||||
|
latest_version = latest_json['tag_name']
|
||||||
|
|
||||||
|
def download_sha256sums():
|
||||||
|
download_url = ''
|
||||||
|
for _ in latest_json['assets']:
|
||||||
|
if 'SHA2-256SUMS' in _['name']:
|
||||||
|
download_url = _['browser_download_url']
|
||||||
|
break
|
||||||
|
if download_url:
|
||||||
|
return ydl._opener.open(download_url).read().decode('utf-8')
|
||||||
|
else:
|
||||||
|
ydl.to_screen('ERROR: can\'t obtain SHA256SUMS. Please try again later.')
|
||||||
|
return None
|
||||||
|
|
||||||
|
sha256sums = download_sha256sums()
|
||||||
|
sha256sums_dict = dict(_.split(':') in sha256sums for _ in sha256sums.split('\n'))
|
||||||
|
sha256sums_version = sha256sums_dict.get('version')
|
||||||
|
if sha256sums_version:
|
||||||
|
if build_hash in sha256sums_dict.values():
|
||||||
|
ydl.to_screen('SHA256 checksum successfully validated.')
|
||||||
|
if __version__.split('.') >= sha256sums_version.split('.') and __version__.split('.') >= latest_version.split('.'):
|
||||||
|
ydl.to_screen('youtube-dlc is up to date (%s)' % __version__)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
ydl.to_screen('Something is wrong here. Trying to update anyway.')
|
||||||
|
else:
|
||||||
|
if build_hash in sha256sums_dict.values():
|
||||||
|
ydl.to_screen('SHA256 checksum successfully validated.')
|
||||||
|
if __version__.split('.') >= latest_version.split('.'):
|
||||||
|
ydl.to_screen('youtube-dlc is up to date (%s)' % __version__)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not latest_version == sha256sums_version:
|
||||||
|
ydl.to_screen('WARNING: available youtube-dlc versions differ. Trying to update anyway.')
|
||||||
|
|
||||||
|
ydl.to_screen('Updating to version ' + latest_version + ' ...')
|
||||||
|
|
||||||
|
version_labels = {
|
||||||
|
'zip_3': '',
|
||||||
|
'exe_64': '.exe',
|
||||||
|
'exe_32': '_x86.exe',
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_bin_info(bin_or_exe, version):
|
||||||
|
label = version_labels['%s_%s' % (bin_or_exe, version)]
|
||||||
|
return next((_ for _ in latest_json['assets'] if _['name'] == 'youtube-dlc%s' % label), {})
|
||||||
|
|
||||||
|
if not os.access(filename, os.W_OK):
|
||||||
|
return ydl.to_screen('no write permissions on %s' % filename)
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
if hasattr(sys, 'frozen'):
|
||||||
|
exe = filename
|
||||||
|
directory = os.path.dirname(exe)
|
||||||
|
if not os.access(directory, os.W_OK):
|
||||||
|
return ydl.to_screen('no write permissions on %s' % directory)
|
||||||
|
try:
|
||||||
|
if os.path.exists(filename + '.old'):
|
||||||
|
os.remove(filename + '.old')
|
||||||
|
except (IOError, OSError):
|
||||||
|
return ydl.to_screen('unable to remove the old version')
|
||||||
|
|
||||||
|
try:
|
||||||
|
arch = platform.architecture()[0][:2]
|
||||||
|
url = get_bin_info('exe', arch).get('browser_download_url')
|
||||||
|
if not url:
|
||||||
|
return ydl.to_screen('unable to fetch updates')
|
||||||
|
urlh = ydl._opener.open(url)
|
||||||
|
newcontent = urlh.read()
|
||||||
|
urlh.close()
|
||||||
|
except (IOError, OSError, StopIteration):
|
||||||
|
return ydl.to_screen('unable to download latest version')
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(exe + '.new', 'wb') as outf:
|
||||||
|
outf.write(newcontent)
|
||||||
|
except (IOError, OSError):
|
||||||
|
return ydl.to_screen('unable to write the new version')
|
||||||
|
|
||||||
|
expected_sum = sha256sums_dict.get('youtube-dlc%s' % version_labels['%s_%s' % ('exe', arch)])
|
||||||
|
if not expected_sum:
|
||||||
|
ydl.report_warning('no hash information found for the release')
|
||||||
|
elif sha256sum(exe + '.new') != expected_sum:
|
||||||
|
ydl.to_screen('unable to verify the new executable')
|
||||||
|
try:
|
||||||
|
os.remove(exe + '.new')
|
||||||
|
except OSError:
|
||||||
|
return ydl.to_screen('unable to remove corrupt download')
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.rename(exe, exe + '.old')
|
||||||
|
except (IOError, OSError):
|
||||||
|
return ydl.to_screen('unable to move current version')
|
||||||
|
try:
|
||||||
|
os.rename(exe + '.new', exe)
|
||||||
|
except (IOError, OSError):
|
||||||
|
ydl.to_screen('unable to overwrite current version')
|
||||||
|
os.rename(exe + '.old', exe)
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
# Continues to run in the background
|
||||||
|
subprocess.Popen(
|
||||||
|
'ping 127.0.0.1 -n 5 -w 1000 & del /F "%s.old"' % exe,
|
||||||
|
shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||||
|
ydl.to_screen('Updated youtube-dlc to version %s' % latest_version)
|
||||||
|
return True # Exit app
|
||||||
|
except OSError:
|
||||||
|
ydl.to_screen('unable to delete old version')
|
||||||
|
|
||||||
|
# Zip unix package
|
||||||
|
elif isinstance(globals().get('__loader__'), zipimporter):
|
||||||
|
try:
|
||||||
|
url = get_bin_info('zip', '3').get('browser_download_url')
|
||||||
|
if not url:
|
||||||
|
return ydl.to_screen('unable to fetch updates')
|
||||||
|
urlh = ydl._opener.open(url)
|
||||||
|
newcontent = urlh.read()
|
||||||
|
urlh.close()
|
||||||
|
except (IOError, OSError, StopIteration):
|
||||||
|
return ydl.to_screen('unable to download latest version')
|
||||||
|
|
||||||
|
expected_sum = sha256sums_dict.get('youtube-dlc%s' % version_labels['%s_%s' % ('zip', '3')])
|
||||||
|
if not expected_sum:
|
||||||
|
ydl.report_warning('no hash information found for the release')
|
||||||
|
elif hashlib.sha256(newcontent).hexdigest() != expected_sum:
|
||||||
|
return ydl.to_screen('unable to verify the new zip')
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(filename, 'wb') as outf:
|
||||||
|
outf.write(newcontent)
|
||||||
|
except (IOError, OSError):
|
||||||
|
return ydl.to_screen('unable to overwrite current version')
|
||||||
|
|
||||||
|
ydl.to_screen('Updated youtube-dlc to version %s; Restart youtube-dlc to use the new version' % latest_version)
|
||||||
|
|
||||||
|
|
||||||
def update_self(to_screen, verbose, opener):
|
def update_self(to_screen, verbose, opener):
|
||||||
|
@ -35,7 +191,7 @@ def update_self(to_screen, verbose, opener):
|
||||||
UPDATE_URL = 'https://blackjack4494.github.io//update/'
|
UPDATE_URL = 'https://blackjack4494.github.io//update/'
|
||||||
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
|
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
|
||||||
JSON_URL = UPDATE_URL + 'versions.json'
|
JSON_URL = UPDATE_URL + 'versions.json'
|
||||||
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
|
# UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
|
||||||
|
|
||||||
def sha256sum():
|
def sha256sum():
|
||||||
h = hashlib.sha256()
|
h = hashlib.sha256()
|
||||||
|
@ -83,11 +239,8 @@ def update_self(to_screen, verbose, opener):
|
||||||
if 'signature' not in versions_info:
|
if 'signature' not in versions_info:
|
||||||
to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
|
to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
|
||||||
return
|
return
|
||||||
signature = versions_info['signature']
|
# signature = versions_info['signature']
|
||||||
del versions_info['signature']
|
del versions_info['signature']
|
||||||
if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY):
|
|
||||||
to_screen('ERROR: the versions file signature is invalid. Aborting.')
|
|
||||||
return
|
|
||||||
|
|
||||||
version_id = versions_info['latest']
|
version_id = versions_info['latest']
|
||||||
|
|
||||||
|
|
|
@ -2460,7 +2460,7 @@ class XAttrMetadataError(YoutubeDLError):
|
||||||
|
|
||||||
# Parsing code and msg
|
# Parsing code and msg
|
||||||
if (self.code in (errno.ENOSPC, errno.EDQUOT)
|
if (self.code in (errno.ENOSPC, errno.EDQUOT)
|
||||||
or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
|
or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
|
||||||
self.reason = 'NO_SPACE'
|
self.reason = 'NO_SPACE'
|
||||||
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
|
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
|
||||||
self.reason = 'VALUE_TOO_LONG'
|
self.reason = 'VALUE_TOO_LONG'
|
||||||
|
@ -4085,7 +4085,7 @@ def js_to_json(code):
|
||||||
v = m.group(0)
|
v = m.group(0)
|
||||||
if v in ('true', 'false', 'null'):
|
if v in ('true', 'false', 'null'):
|
||||||
return v
|
return v
|
||||||
elif v.startswith('/*') or v.startswith('//') or v == ',':
|
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
if v[0] in ("'", '"'):
|
if v[0] in ("'", '"'):
|
||||||
|
@ -4095,12 +4095,12 @@ def js_to_json(code):
|
||||||
'\\\n': '',
|
'\\\n': '',
|
||||||
'\\x': '\\u00',
|
'\\x': '\\u00',
|
||||||
}.get(m.group(0), m.group(0)), v[1:-1])
|
}.get(m.group(0), m.group(0)), v[1:-1])
|
||||||
|
else:
|
||||||
for regex, base in INTEGER_TABLE:
|
for regex, base in INTEGER_TABLE:
|
||||||
im = re.match(regex, v)
|
im = re.match(regex, v)
|
||||||
if im:
|
if im:
|
||||||
i = int(im.group(1), base)
|
i = int(im.group(1), base)
|
||||||
return '"%d":' % i if v.endswith(':') else '%d' % i
|
return '"%d":' % i if v.endswith(':') else '%d' % i
|
||||||
|
|
||||||
return '"%s"' % v
|
return '"%s"' % v
|
||||||
|
|
||||||
|
@ -4110,7 +4110,8 @@ def js_to_json(code):
|
||||||
{comment}|,(?={skip}[\]}}])|
|
{comment}|,(?={skip}[\]}}])|
|
||||||
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
|
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
|
||||||
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
||||||
[0-9]+(?={skip}:)
|
[0-9]+(?={skip}:)|
|
||||||
|
!+
|
||||||
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
|
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4214,10 +4215,10 @@ def parse_codecs(codecs_str):
|
||||||
# http://tools.ietf.org/html/rfc6381
|
# http://tools.ietf.org/html/rfc6381
|
||||||
if not codecs_str:
|
if not codecs_str:
|
||||||
return {}
|
return {}
|
||||||
splited_codecs = list(filter(None, map(
|
split_codecs = list(filter(None, map(
|
||||||
lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
|
lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
|
||||||
vcodec, acodec = None, None
|
vcodec, acodec = None, None
|
||||||
for full_codec in splited_codecs:
|
for full_codec in split_codecs:
|
||||||
codec = full_codec.split('.')[0]
|
codec = full_codec.split('.')[0]
|
||||||
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
|
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
|
||||||
if not vcodec:
|
if not vcodec:
|
||||||
|
@ -4228,10 +4229,10 @@ def parse_codecs(codecs_str):
|
||||||
else:
|
else:
|
||||||
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
|
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
|
||||||
if not vcodec and not acodec:
|
if not vcodec and not acodec:
|
||||||
if len(splited_codecs) == 2:
|
if len(split_codecs) == 2:
|
||||||
return {
|
return {
|
||||||
'vcodec': splited_codecs[0],
|
'vcodec': split_codecs[0],
|
||||||
'acodec': splited_codecs[1],
|
'acodec': split_codecs[1],
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
return {
|
return {
|
||||||
|
@ -5470,7 +5471,7 @@ def encode_base_n(num, n, table=None):
|
||||||
|
|
||||||
def decode_packed_codes(code):
|
def decode_packed_codes(code):
|
||||||
mobj = re.search(PACKED_CODES_RE, code)
|
mobj = re.search(PACKED_CODES_RE, code)
|
||||||
obfucasted_code, base, count, symbols = mobj.groups()
|
obfuscated_code, base, count, symbols = mobj.groups()
|
||||||
base = int(base)
|
base = int(base)
|
||||||
count = int(count)
|
count = int(count)
|
||||||
symbols = symbols.split('|')
|
symbols = symbols.split('|')
|
||||||
|
@ -5483,7 +5484,7 @@ def decode_packed_codes(code):
|
||||||
|
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
|
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
|
||||||
obfucasted_code)
|
obfuscated_code)
|
||||||
|
|
||||||
|
|
||||||
def caesar(s, alphabet, shift):
|
def caesar(s, alphabet, shift):
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2020.10.25'
|
__version__ = '2020.11.11-2'
|
||||||
|
|
Loading…
Reference in New Issue