Merge branch 'UP/youtube-dl' into dl/YoutubeSearchURLIE
This commit is contained in:
+163
-123
@@ -163,6 +163,7 @@ class YoutubeDL(object):
|
||||
simulate: Do not download the video files.
|
||||
format: Video format code. See options.py for more information.
|
||||
outtmpl: Template for output names.
|
||||
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
ignoreerrors: Do not stop on download errors.
|
||||
force_generic_extractor: Force downloader to use the generic extractor
|
||||
@@ -338,6 +339,8 @@ class YoutubeDL(object):
|
||||
_pps = []
|
||||
_download_retcode = None
|
||||
_num_downloads = None
|
||||
_playlist_level = 0
|
||||
_playlist_urls = set()
|
||||
_screen_file = None
|
||||
|
||||
def __init__(self, params=None, auto_init=True):
|
||||
@@ -656,7 +659,7 @@ class YoutubeDL(object):
|
||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||
for k, v in template_dict.items()
|
||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||
template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
|
||||
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
|
||||
@@ -676,8 +679,8 @@ class YoutubeDL(object):
|
||||
|
||||
# Missing numeric fields used together with integer presentation types
|
||||
# in format specification will break the argument substitution since
|
||||
# string 'NA' is returned for missing fields. We will patch output
|
||||
# template for missing fields to meet string presentation type.
|
||||
# string NA placeholder is returned for missing fields. We will patch
|
||||
# output template for missing fields to meet string presentation type.
|
||||
for numeric_field in self._NUMERIC_FIELDS:
|
||||
if numeric_field not in template_dict:
|
||||
# As of [1] format syntax is:
|
||||
@@ -770,11 +773,20 @@ class YoutubeDL(object):
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||
process=True, force_generic_extractor=False):
|
||||
'''
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
extra_info is a dict containing the extra values to add to each result
|
||||
'''
|
||||
"""
|
||||
Return a list with a dictionary for each video extracted.
|
||||
|
||||
Arguments:
|
||||
url -- URL to extract
|
||||
|
||||
Keyword arguments:
|
||||
download -- whether to download videos during extraction
|
||||
ie_key -- extractor key hint
|
||||
extra_info -- dictionary containing the extra values to add to each result
|
||||
process -- whether to resolve all unresolved references (URLs, playlist items),
|
||||
must be True for download to work.
|
||||
force_generic_extractor -- force using the generic extractor
|
||||
"""
|
||||
|
||||
if not ie_key and force_generic_extractor:
|
||||
ie_key = 'Generic'
|
||||
@@ -906,115 +918,23 @@ class YoutubeDL(object):
|
||||
return self.process_ie_result(
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
elif result_type in ('playlist', 'multi_video'):
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
playlist_results = []
|
||||
|
||||
playliststart = self.params.get('playliststart', 1) - 1
|
||||
playlistend = self.params.get('playlistend')
|
||||
# For backwards compatibility, interpret -1 as whole list
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
playlistitems_str = self.params.get('playlist_items')
|
||||
playlistitems = None
|
||||
if playlistitems_str is not None:
|
||||
def iter_playlistitems(format):
|
||||
for string_segment in format.split(','):
|
||||
if '-' in string_segment:
|
||||
start, end = string_segment.split('-')
|
||||
for item in range(int(start), int(end) + 1):
|
||||
yield int(item)
|
||||
else:
|
||||
yield int(string_segment)
|
||||
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
|
||||
|
||||
ie_entries = ie_result['entries']
|
||||
|
||||
def make_playlistitems_entries(list_ie_entries):
|
||||
num_entries = len(list_ie_entries)
|
||||
return [
|
||||
list_ie_entries[i - 1] for i in playlistitems
|
||||
if -num_entries <= i - 1 < num_entries]
|
||||
|
||||
def report_download(num_entries):
|
||||
# Protect from infinite recursion due to recursively nested playlists
|
||||
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
|
||||
webpage_url = ie_result['webpage_url']
|
||||
if webpage_url in self._playlist_urls:
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, num_entries))
|
||||
'[download] Skipping already downloaded playlist: %s'
|
||||
% ie_result.get('title') or ie_result.get('id'))
|
||||
return
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(ie_entries)
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
if playlistitems:
|
||||
entries = []
|
||||
for item in playlistitems:
|
||||
entries.extend(ie_entries.getslice(
|
||||
item - 1, item
|
||||
))
|
||||
else:
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(list(itertools.islice(
|
||||
ie_entries, 0, max(playlistitems))))
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||
# minimal changes
|
||||
if x_forwarded_for:
|
||||
entry['__x_forwarded_for_ip'] = x_forwarded_for
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
reason = self._match_entry(entry, incomplete=True)
|
||||
if reason is not None:
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||
# TODO: skip failed (empty) entries?
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
return ie_result
|
||||
self._playlist_level += 1
|
||||
self._playlist_urls.add(webpage_url)
|
||||
try:
|
||||
return self.__process_playlist(ie_result, download)
|
||||
finally:
|
||||
self._playlist_level -= 1
|
||||
if not self._playlist_level:
|
||||
self._playlist_urls.clear()
|
||||
elif result_type == 'compat_list':
|
||||
self.report_warning(
|
||||
'Extractor %s returned a compat_list result. '
|
||||
@@ -1039,6 +959,118 @@ class YoutubeDL(object):
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
def __process_playlist(self, ie_result, download):
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
playlist_results = []
|
||||
|
||||
playliststart = self.params.get('playliststart', 1) - 1
|
||||
playlistend = self.params.get('playlistend')
|
||||
# For backwards compatibility, interpret -1 as whole list
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
playlistitems_str = self.params.get('playlist_items')
|
||||
playlistitems = None
|
||||
if playlistitems_str is not None:
|
||||
def iter_playlistitems(format):
|
||||
for string_segment in format.split(','):
|
||||
if '-' in string_segment:
|
||||
start, end = string_segment.split('-')
|
||||
for item in range(int(start), int(end) + 1):
|
||||
yield int(item)
|
||||
else:
|
||||
yield int(string_segment)
|
||||
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
|
||||
|
||||
ie_entries = ie_result['entries']
|
||||
|
||||
def make_playlistitems_entries(list_ie_entries):
|
||||
num_entries = len(list_ie_entries)
|
||||
return [
|
||||
list_ie_entries[i - 1] for i in playlistitems
|
||||
if -num_entries <= i - 1 < num_entries]
|
||||
|
||||
def report_download(num_entries):
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, num_entries))
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(ie_entries)
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
if playlistitems:
|
||||
entries = []
|
||||
for item in playlistitems:
|
||||
entries.extend(ie_entries.getslice(
|
||||
item - 1, item
|
||||
))
|
||||
else:
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(list(itertools.islice(
|
||||
ie_entries, 0, max(playlistitems))))
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||
# minimal changes
|
||||
if x_forwarded_for:
|
||||
entry['__x_forwarded_for_ip'] = x_forwarded_for
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
reason = self._match_entry(entry, incomplete=True)
|
||||
if reason is not None:
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||
# TODO: skip failed (empty) entries?
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
return ie_result
|
||||
|
||||
@__handle_extraction_exceptions
|
||||
def __process_iterable_entry(self, entry, download, extra_info):
|
||||
return self.process_ie_result(
|
||||
@@ -1226,6 +1258,8 @@ class YoutubeDL(object):
|
||||
group = _parse_format_selection(tokens, inside_group=True)
|
||||
current_selector = FormatSelector(GROUP, group, [])
|
||||
elif string == '+':
|
||||
if inside_merge:
|
||||
raise syntax_error('Unexpected "+"', start)
|
||||
video_selector = current_selector
|
||||
audio_selector = _parse_format_selection(tokens, inside_merge=True)
|
||||
if not video_selector or not audio_selector:
|
||||
@@ -1486,14 +1520,18 @@ class YoutubeDL(object):
|
||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
try:
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
|
||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
for ts_key, date_key in (
|
||||
('timestamp', 'upload_date'),
|
||||
('release_timestamp', 'release_date'),
|
||||
):
|
||||
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
try:
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
|
||||
# Auto generate title fields corresponding to the *_number fields when missing
|
||||
# in order to always have clean titles. This is very common for TV series.
|
||||
@@ -1777,6 +1815,8 @@ class YoutubeDL(object):
|
||||
os.makedirs(dn)
|
||||
return True
|
||||
except (OSError, IOError) as err:
|
||||
if isinstance(err, OSError) and err.errno == errno.EEXIST:
|
||||
return True
|
||||
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
||||
return False
|
||||
|
||||
|
||||
@@ -340,6 +340,7 @@ def _real_main(argv=None):
|
||||
'format': opts.format,
|
||||
'listformats': opts.listformats,
|
||||
'outtmpl': outtmpl,
|
||||
'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
'autonumber_start': opts.autonumber_start,
|
||||
'restrictfilenames': opts.restrictfilenames,
|
||||
|
||||
@@ -73,6 +73,15 @@ try:
|
||||
except ImportError: # Python 2
|
||||
import Cookie as compat_cookies
|
||||
|
||||
if sys.version_info[0] == 2:
|
||||
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||
def load(self, rawdata):
|
||||
if isinstance(rawdata, compat_str):
|
||||
rawdata = str(rawdata)
|
||||
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
||||
else:
|
||||
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||
|
||||
try:
|
||||
import html.entities as compat_html_entities
|
||||
except ImportError: # Python 2
|
||||
@@ -3000,6 +3009,7 @@ __all__ = [
|
||||
'compat_cookiejar',
|
||||
'compat_cookiejar_Cookie',
|
||||
'compat_cookies',
|
||||
'compat_cookies_SimpleCookie',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_etree_Element',
|
||||
'compat_etree_fromstring',
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import re
|
||||
import time
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AbcNewsVideoIE(AMPIE):
|
||||
@@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
|
||||
(?:
|
||||
abcnews\.go\.com/
|
||||
(?:
|
||||
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||
video/embed\?.*?\bid=
|
||||
(?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
|
||||
video/(?:embed|itemfeed)\?.*?\bid=
|
||||
)|
|
||||
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
|
||||
)
|
||||
@@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
|
||||
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
||||
'duration': 180,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1380454200,
|
||||
'upload_date': '20130929',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
# Youtube Embeds
|
||||
'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
|
||||
'info_dict': {
|
||||
'id': '10505354',
|
||||
'ext': 'flv',
|
||||
'display_id': 'dramatic-video-rare-death-job-america',
|
||||
'title': 'Occupational Hazards',
|
||||
'description': 'Nightline investigates the dangers that lurk at various jobs.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20100428',
|
||||
'timestamp': 1272412800,
|
||||
'id': '51286501',
|
||||
'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
|
||||
'description': 'Billingsley went from a child actor to Hollywood power player.',
|
||||
},
|
||||
'add_ie': ['AbcNewsVideo'],
|
||||
'playlist_count': 5,
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||
'info_dict': {
|
||||
'id': '38897857',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
||||
'upload_date': '20160515',
|
||||
'timestamp': 1463329500,
|
||||
'upload_date': '20160505',
|
||||
'timestamp': 1462442280,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# inline.type == 'video'
|
||||
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
story_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, story_id)
|
||||
story = self._parse_json(self._search_regex(
|
||||
r"window\['__abcnews__'\]\s*=\s*({.+?});",
|
||||
webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
|
||||
article_contents = story.get('articleContents') or {}
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
||||
def entries():
|
||||
featured_video = story.get('featuredVideo') or {}
|
||||
feed = try_get(featured_video, lambda x: x['video']['feed'])
|
||||
if feed:
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'id': featured_video.get('id'),
|
||||
'title': featured_video.get('name'),
|
||||
'url': feed,
|
||||
'thumbnail': featured_video.get('images'),
|
||||
'description': featured_video.get('description'),
|
||||
'timestamp': parse_iso8601(featured_video.get('uploadDate')),
|
||||
'duration': parse_duration(featured_video.get('duration')),
|
||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||
}
|
||||
|
||||
youtube_url = YoutubeIE._extract_url(webpage)
|
||||
for inline in (article_contents.get('inlines') or []):
|
||||
inline_type = inline.get('type')
|
||||
if inline_type == 'iframe':
|
||||
iframe_url = try_get(inline, lambda x: x['attrs']['src'])
|
||||
if iframe_url:
|
||||
yield self.url_result(iframe_url)
|
||||
elif inline_type == 'video':
|
||||
video_id = inline.get('id')
|
||||
if video_id:
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
|
||||
'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
|
||||
'description': inline.get('description'),
|
||||
'duration': parse_duration(inline.get('duration')),
|
||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||
}
|
||||
|
||||
timestamp = None
|
||||
date_str = self._html_search_regex(
|
||||
r'<span[^>]+class="timestamp">([^<]+)</span>',
|
||||
webpage, 'timestamp', fatal=False)
|
||||
if date_str:
|
||||
tz_offset = 0
|
||||
if date_str.endswith(' ET'): # Eastern Time
|
||||
tz_offset = -5
|
||||
date_str = date_str[:-3]
|
||||
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
|
||||
for date_format in date_formats:
|
||||
try:
|
||||
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
|
||||
except ValueError:
|
||||
continue
|
||||
if timestamp is not None:
|
||||
timestamp -= tz_offset * 3600
|
||||
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||
'url': full_video_url,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
if youtube_url:
|
||||
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
return entry
|
||||
return self.playlist_result(
|
||||
entries(), story_id, article_contents.get('headline'),
|
||||
article_contents.get('subHead'))
|
||||
|
||||
+128
-66
@@ -10,6 +10,7 @@ import random
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
@@ -18,11 +19,14 @@ from ..utils import (
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,16 +35,30 @@ class ADNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'md5': 'e497370d847fd79d9d4c74be55575c7a',
|
||||
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
||||
'info_dict': {
|
||||
'id': '7778',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
'series': 'Blue Exorcist - Kyôto Saga',
|
||||
'duration': 1467,
|
||||
'release_date': '20170106',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'season_number': 2,
|
||||
'episode': 'Début des hostilités',
|
||||
'episode_number': 1,
|
||||
}
|
||||
}
|
||||
|
||||
_NETRC_MACHINE = 'animedigitalnetwork'
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
|
||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
@@ -54,26 +72,24 @@ class ADNIE(InfoExtractor):
|
||||
def _ass_subtitles_timecode(seconds):
|
||||
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
|
||||
|
||||
def _get_subtitles(self, sub_path, video_id):
|
||||
if not sub_path:
|
||||
def _get_subtitles(self, sub_url, video_id):
|
||||
if not sub_url:
|
||||
return None
|
||||
|
||||
enc_subtitles = self._download_webpage(
|
||||
urljoin(self._BASE_URL, sub_path),
|
||||
video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
||||
sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
||||
subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
|
||||
if subtitle_location:
|
||||
enc_subtitles = self._download_webpage(
|
||||
urljoin(self._BASE_URL, subtitle_location),
|
||||
video_id, 'Downloading subtitles data', fatal=False,
|
||||
headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
subtitle_location, video_id, 'Downloading subtitles data',
|
||||
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
@@ -117,61 +133,100 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
}])
|
||||
return subtitles
|
||||
|
||||
def _real_initialize(self):
|
||||
username, password = self._get_login_info()
|
||||
if not username:
|
||||
return
|
||||
try:
|
||||
access_token = (self._download_json(
|
||||
self._API_BASE_URL + 'authentication/login', None,
|
||||
'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'password': password,
|
||||
'rememberMe': False,
|
||||
'source': 'Web',
|
||||
'username': username,
|
||||
})) or {}).get('accessToken')
|
||||
if access_token:
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
resp = self._parse_json(
|
||||
e.cause.read().decode(), None, fatal=False) or {}
|
||||
message = resp.get('message') or resp.get('code')
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_config = self._parse_json(self._search_regex(
|
||||
r'playerConfig\s*=\s*({.+});', webpage,
|
||||
'player config', default='{}'), video_id, fatal=False)
|
||||
if not player_config:
|
||||
config_url = urljoin(self._BASE_URL, self._search_regex(
|
||||
r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
|
||||
webpage, 'config url'))
|
||||
player_config = self._download_json(
|
||||
config_url, video_id,
|
||||
'Downloading player config JSON metadata')['player']
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
'Downloading player config JSON metadata',
|
||||
headers=self._HEADERS)['player']
|
||||
options = player['options']
|
||||
|
||||
video_info = {}
|
||||
video_info_str = self._search_regex(
|
||||
r'videoInfo\s*=\s*({.+});', webpage,
|
||||
'video info', fatal=False)
|
||||
if video_info_str:
|
||||
video_info = self._parse_json(
|
||||
video_info_str, video_id, fatal=False) or {}
|
||||
user = options['user']
|
||||
if not user.get('hasAccess'):
|
||||
self.raise_login_required()
|
||||
|
||||
options = player_config.get('options') or {}
|
||||
metas = options.get('metas') or {}
|
||||
links = player_config.get('links') or {}
|
||||
sub_path = player_config.get('subtitles')
|
||||
error = None
|
||||
if not links:
|
||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
||||
token = options['token']
|
||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
'k': self._K,
|
||||
'e': 60,
|
||||
't': token,
|
||||
}))
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
video_id, 'Downloading access token', headers={
|
||||
'x-player-refresh-token': user['refreshToken']
|
||||
}, data=b'')['token']
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
'k': self._K,
|
||||
't': token,
|
||||
}))
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry with
|
||||
# a different random padding
|
||||
links_data = None
|
||||
for _ in range(3):
|
||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||
n, e = self._RSA_KEY
|
||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||
authorization = base64.b64encode(encrypted_message).decode()
|
||||
links_data = self._download_json(
|
||||
urljoin(self._BASE_URL, links_url), video_id,
|
||||
'Downloading links JSON metadata', headers={
|
||||
'Authorization': 'Bearer ' + authorization,
|
||||
})
|
||||
links = links_data.get('links') or {}
|
||||
metas = metas or links_data.get('meta') or {}
|
||||
sub_path = sub_path or links_data.get('subtitles') or \
|
||||
'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
|
||||
sub_path += '&token=' + token
|
||||
error = links_data.get('error')
|
||||
title = metas.get('title') or video_info['title']
|
||||
|
||||
try:
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
'withMetadata': 'true',
|
||||
'source': 'Web'
|
||||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError):
|
||||
raise e
|
||||
|
||||
if e.cause.code == 401:
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message)
|
||||
else:
|
||||
raise ExtractorError('Giving up retrying')
|
||||
|
||||
links = links_data.get('links') or {}
|
||||
metas = links_data.get('metadata') or {}
|
||||
sub_url = (links.get('subtitles') or {}).get('all')
|
||||
video_info = links_data.get('video') or {}
|
||||
title = metas['title']
|
||||
|
||||
formats = []
|
||||
for format_id, qualities in links.items():
|
||||
for format_id, qualities in (links.get('streaming') or {}).items():
|
||||
if not isinstance(qualities, dict):
|
||||
continue
|
||||
for quality, load_balancer_url in qualities.items():
|
||||
@@ -189,19 +244,26 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
formats.extend(m3u8_formats)
|
||||
if not error:
|
||||
error = options.get('error')
|
||||
if not formats and error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'description': strip_or_none(metas.get('summary') or video.get('summary')),
|
||||
'thumbnail': video_info.get('image') or player.get('image'),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(sub_path, video_id),
|
||||
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
|
||||
'series': video_info.get('playlistTitle'),
|
||||
'subtitles': self.extract_subtitles(sub_url, video_id),
|
||||
'episode': metas.get('subtitle') or video.get('name'),
|
||||
'episode_number': int_or_none(video.get('shortNumber')),
|
||||
'series': show.get('title'),
|
||||
'season_number': int_or_none(video.get('season')),
|
||||
'duration': int_or_none(video_info.get('duration') or video.get('duration')),
|
||||
'release_date': unified_strdate(video.get('releaseDate')),
|
||||
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
}
|
||||
|
||||
@@ -252,11 +252,11 @@ class AENetworksShowIE(AENetworksListBaseIE):
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/ancient-aliens',
|
||||
'info_dict': {
|
||||
'id': 'SH012427480000',
|
||||
'id': 'SERIES1574',
|
||||
'title': 'Ancient Aliens',
|
||||
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||
},
|
||||
'playlist_mincount': 168,
|
||||
'playlist_mincount': 150,
|
||||
}]
|
||||
_RESOURCE = 'series'
|
||||
_ITEMS_KEY = 'episodes'
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AlJazeeraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||
'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
|
||||
'info_dict': {
|
||||
'id': '3792260579001',
|
||||
'ext': 'mp4',
|
||||
@@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}, {
|
||||
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
|
||||
'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, program_name)
|
||||
brightcove_id = self._search_regex(
|
||||
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
post_type, name = re.match(self._VALID_URL, url).groups()
|
||||
post_type = {
|
||||
'features': 'post',
|
||||
'program': 'episode',
|
||||
'videos': 'video',
|
||||
}[post_type.split('/')[0]]
|
||||
video = self._download_json(
|
||||
'https://www.aljazeera.com/graphql', name, query={
|
||||
'operationName': 'SingleArticleQuery',
|
||||
'variables': json.dumps({
|
||||
'name': name,
|
||||
'postType': post_type,
|
||||
}),
|
||||
}, headers={
|
||||
'wp-site': 'aje',
|
||||
})['data']['article']['video']
|
||||
video_id = video['id']
|
||||
account_id = video.get('accountId') or '665003303001'
|
||||
player_id = video.get('playerId') or 'BkeSH5BDb'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,8 +25,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523664000,
|
||||
'upload_date': '20180414',
|
||||
'timestamp': 1523318400,
|
||||
'upload_date': '20180410',
|
||||
'release_date': '20180410',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 18,
|
||||
@@ -33,6 +36,27 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
|
||||
'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
|
||||
'md5': '06451608c57651e985a498e69cec17e5',
|
||||
'info_dict': {
|
||||
'id': '5fbe8c61bda2010001c6763b',
|
||||
'title': 'Simple Chicken Dinner',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1610755200,
|
||||
'upload_date': '20210116',
|
||||
'release_date': '20210116',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 21,
|
||||
'episode': 'Simple Chicken Dinner',
|
||||
'episode_number': 3,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
@@ -60,7 +84,76 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||
'ie_key': 'Zype',
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': unified_timestamp(video.get('publishDate')),
|
||||
'release_date': unified_strdate(video.get('publishDate')),
|
||||
'episode_number': int_or_none(episode.get('number')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'series': try_get(episode, lambda x: x['show']['title']),
|
||||
'episode': episode.get('title'),
|
||||
}
|
||||
|
||||
|
||||
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# ATK Season
|
||||
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||
'info_dict': {
|
||||
'id': 'season_1',
|
||||
'title': 'Season 1',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# Cooks Country Season
|
||||
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||
'info_dict': {
|
||||
'id': 'season_12',
|
||||
'title': 'Season 12',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name, season_number = re.match(self._VALID_URL, url).groups()
|
||||
season_number = int(season_number)
|
||||
|
||||
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
|
||||
|
||||
season = 'Season %d' % season_number
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
season, headers={
|
||||
'Origin': 'https://www.%s.com' % show_name,
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
'facetFilters': json.dumps([
|
||||
'search_season_list:' + season,
|
||||
'search_document_klass:episode',
|
||||
'search_show_slug:' + slug,
|
||||
]),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
|
||||
'attributesToHighlight': '',
|
||||
'hitsPerPage': 1000,
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode in (season_search.get('hits') or []):
|
||||
search_url = episode.get('search_url')
|
||||
if not search_url:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': 'https://www.%s.com%s' % (show_name, search_url),
|
||||
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||
'season_number': season_number,
|
||||
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
|
||||
'ie_key': AmericasTestKitchenIE.ie_key(),
|
||||
}
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), 'season_%d' % season_number, season)
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -88,7 +89,7 @@ class AMPIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
||||
timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -116,8 +116,6 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
||||
webpage, 'anime description', default=None)
|
||||
|
||||
entries = []
|
||||
|
||||
def extract_info(html, video_id, num=None):
|
||||
title, description = [None] * 2
|
||||
formats = []
|
||||
@@ -233,7 +231,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
self._sort_formats(info['formats'])
|
||||
f = common_info.copy()
|
||||
f.update(info)
|
||||
entries.append(f)
|
||||
yield f
|
||||
|
||||
# Extract teaser/trailer only when full episode is not available
|
||||
if not info['formats']:
|
||||
@@ -247,7 +245,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'title': m.group('title'),
|
||||
'url': urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
yield f
|
||||
|
||||
def extract_episodes(html):
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
@@ -275,7 +273,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
extract_entries(episode_html, video_id, common_info)
|
||||
for e in extract_entries(episode_html, video_id, common_info):
|
||||
yield e
|
||||
|
||||
def extract_film(html, video_id):
|
||||
common_info = {
|
||||
@@ -283,11 +282,18 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'title': anime_title,
|
||||
'description': anime_description,
|
||||
}
|
||||
extract_entries(html, video_id, common_info)
|
||||
for e in extract_entries(html, video_id, common_info):
|
||||
yield e
|
||||
|
||||
extract_episodes(webpage)
|
||||
def entries():
|
||||
has_episodes = False
|
||||
for e in extract_episodes(webpage):
|
||||
has_episodes = True
|
||||
yield e
|
||||
|
||||
if not entries:
|
||||
extract_film(webpage, anime_id)
|
||||
if not has_episodes:
|
||||
for e in extract_film(webpage, anime_id):
|
||||
yield e
|
||||
|
||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
||||
return self.playlist_result(
|
||||
entries(), anime_id, anime_title, anime_description)
|
||||
|
||||
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .yahoo import YahooIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -15,9 +15,9 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class AolIE(InfoExtractor):
|
||||
class AolIE(YahooIE):
|
||||
IE_NAME = 'aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
@@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Yahoo video
|
||||
'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if '-' in video_id:
|
||||
return self._extract_yahoo_video(video_id, 'us')
|
||||
|
||||
response = self._download_json(
|
||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||
|
||||
+24
-23
@@ -6,25 +6,21 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class APAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
|
||||
'info_dict': {
|
||||
'id': 'jjv85FdZ',
|
||||
'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'ext': 'mp4',
|
||||
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 254,
|
||||
'timestamp': 1519211149,
|
||||
'upload_date': '20180221',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
|
||||
@@ -46,9 +42,11 @@ class APAIE(InfoExtractor):
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, base_url = mobj.group('id', 'base_url')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'%s/player/%s' % (base_url, video_id), video_id)
|
||||
|
||||
jwplatform_id = self._search_regex(
|
||||
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||
@@ -59,16 +57,18 @@ class APAIE(InfoExtractor):
|
||||
'jwplatform:' + jwplatform_id, ie='JWPlatform',
|
||||
video_id=video_id)
|
||||
|
||||
sources = self._parse_json(
|
||||
self._search_regex(
|
||||
r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
|
||||
video_id, transform_source=js_to_json)
|
||||
def extract(field, name=None):
|
||||
return self._search_regex(
|
||||
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
|
||||
webpage, name or field, default=None, group='value')
|
||||
|
||||
title = extract('title') or video_id
|
||||
description = extract('description')
|
||||
thumbnail = extract('poster', 'thumbnail')
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = url_or_none(source.get('file'))
|
||||
for format_id in ('hls', 'progressive'):
|
||||
source_url = url_or_none(extract(format_id))
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
@@ -77,18 +77,19 @@ class APAIE(InfoExtractor):
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)\.mp4', source_url, 'height', default=None))
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -9,10 +9,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'md5': 'e7c38568a01ea45402570e6029206723',
|
||||
'md5': 'c1d41f72c8bcaf222e089434619316e4',
|
||||
'info_dict': {
|
||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'ext': 'm4v',
|
||||
@@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
|
||||
'upload_date': '20150710',
|
||||
'timestamp': 1436545535,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
|
||||
|
||||
video_data = self._parse_json(video_json, video_id)
|
||||
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
|
||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -42,6 +42,7 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id)
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode = ember_data['data']['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
|
||||
@@ -2,15 +2,17 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||
@@ -19,8 +21,11 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'ext': 'ogg',
|
||||
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
|
||||
'upload_date': '19681210',
|
||||
'uploader': 'SRI International'
|
||||
'creator': 'SRI International',
|
||||
'release_date': '19681210',
|
||||
'uploader': 'SRI International',
|
||||
'timestamp': 1268695290,
|
||||
'upload_date': '20100315',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Cops1922',
|
||||
@@ -29,22 +34,43 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'id': 'Cops1922',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||
'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
|
||||
'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
|
||||
'timestamp': 1387699629,
|
||||
'upload_date': '20131222',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://archive.org/details/MSNBCW_20131125_040000_To_Catch_a_Predator/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://archive.org/embed/' + video_id, video_id)
|
||||
jwplayer_playlist = self._parse_json(self._search_regex(
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
|
||||
webpage, 'jwplayer playlist'), video_id)
|
||||
info = self._parse_jwplayer_data(
|
||||
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||
|
||||
playlist = None
|
||||
play8 = self._search_regex(
|
||||
r'(<[^>]+\bclass=["\']js-play8-playlist[^>]+>)', webpage,
|
||||
'playlist', default=None)
|
||||
if play8:
|
||||
attrs = extract_attributes(play8)
|
||||
playlist = attrs.get('value')
|
||||
if not playlist:
|
||||
# Old jwplayer fallback
|
||||
playlist = self._search_regex(
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
|
||||
webpage, 'jwplayer playlist', default='[]')
|
||||
jwplayer_playlist = self._parse_json(playlist, video_id, fatal=False)
|
||||
if jwplayer_playlist:
|
||||
info = self._parse_jwplayer_data(
|
||||
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||
else:
|
||||
# HTML5 media fallback
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
info['id'] = video_id
|
||||
|
||||
def get_optional(metadata, field):
|
||||
return metadata.get(field, [None])[0]
|
||||
@@ -58,8 +84,12 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': clean_html(get_optional(metadata, 'description')),
|
||||
})
|
||||
if info.get('_type') != 'playlist':
|
||||
creator = get_optional(metadata, 'creator')
|
||||
info.update({
|
||||
'uploader': get_optional(metadata, 'creator'),
|
||||
'upload_date': unified_strdate(get_optional(metadata, 'date')),
|
||||
'creator': creator,
|
||||
'release_date': unified_strdate(get_optional(metadata, 'date')),
|
||||
'uploader': get_optional(metadata, 'publisher') or creator,
|
||||
'timestamp': unified_timestamp(get_optional(metadata, 'publicdate')),
|
||||
'language': get_optional(metadata, 'language'),
|
||||
})
|
||||
return info
|
||||
|
||||
+74
-44
@@ -187,13 +187,13 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
title = self._html_search_regex(
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
@@ -249,31 +249,40 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 14.02.2019
|
||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
|
||||
# available till 7.01.2022
|
||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
|
||||
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
|
||||
'info_dict': {
|
||||
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
|
||||
'id': '102',
|
||||
'id': 'maischberger-die-woche-video100',
|
||||
'display_id': 'maischberger-die-woche-video100',
|
||||
'ext': 'mp4',
|
||||
'duration': 4435.0,
|
||||
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
|
||||
'upload_date': '20180214',
|
||||
'duration': 3687.0,
|
||||
'title': 'maischberger. die woche vom 7. Januar 2021',
|
||||
'upload_date': '20210107',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
|
||||
'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('id')
|
||||
|
||||
player_url = mobj.group('mainurl') + '~playerXml.xml'
|
||||
doc = self._download_xml(player_url, display_id)
|
||||
@@ -284,25 +293,47 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for a in video_node.findall('.//asset'):
|
||||
file_name = xpath_text(a, './fileName', default=None)
|
||||
if not file_name:
|
||||
continue
|
||||
format_type = a.attrib.get('type')
|
||||
format_url = url_or_none(file_name)
|
||||
if format_url:
|
||||
ext = determine_ext(file_name)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_type or 'hls', fatal=False))
|
||||
continue
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(format_url, {'hdcore': '3.7.0'}),
|
||||
display_id, f4m_id=format_type or 'hds', fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'format_id': a.attrib['type'],
|
||||
'width': int_or_none(a.find('./frameWidth').text),
|
||||
'height': int_or_none(a.find('./frameHeight').text),
|
||||
'vbr': int_or_none(a.find('./bitrateVideo').text),
|
||||
'abr': int_or_none(a.find('./bitrateAudio').text),
|
||||
'vcodec': a.find('./codecVideo').text,
|
||||
'tbr': int_or_none(a.find('./totalBitrate').text),
|
||||
'format_id': format_type,
|
||||
'width': int_or_none(xpath_text(a, './frameWidth')),
|
||||
'height': int_or_none(xpath_text(a, './frameHeight')),
|
||||
'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
|
||||
'abr': int_or_none(xpath_text(a, './bitrateAudio')),
|
||||
'vcodec': xpath_text(a, './codecVideo'),
|
||||
'tbr': int_or_none(xpath_text(a, './totalBitrate')),
|
||||
}
|
||||
if a.find('./serverPrefix').text:
|
||||
f['url'] = a.find('./serverPrefix').text
|
||||
f['playpath'] = a.find('./fileName').text
|
||||
server_prefix = xpath_text(a, './serverPrefix', default=None)
|
||||
if server_prefix:
|
||||
f.update({
|
||||
'url': server_prefix,
|
||||
'playpath': file_name,
|
||||
})
|
||||
else:
|
||||
f['url'] = a.find('./fileName').text
|
||||
if not format_url:
|
||||
continue
|
||||
f['url'] = format_url
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': mobj.group('id'),
|
||||
'id': xpath_text(video_node, './videoId', default=display_id),
|
||||
'formats': formats,
|
||||
'display_id': display_id,
|
||||
'title': video_node.find('./title').text,
|
||||
@@ -313,19 +344,19 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?:[^/]+/)?(?:player|live|video)/(?:[^/]+/)*(?P<id>Y3JpZDovL[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||
'info_dict': {
|
||||
'display_id': 'die-robuste-roswita',
|
||||
'id': '70153354',
|
||||
'id': '78566716',
|
||||
'title': 'Die robuste Roswita',
|
||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||
'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
|
||||
'duration': 5316,
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
|
||||
'timestamp': 1577047500,
|
||||
'upload_date': '20191222',
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
|
||||
'timestamp': 1596658200,
|
||||
'upload_date': '20200805',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
@@ -343,22 +374,22 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id')
|
||||
if display_id:
|
||||
display_id = display_id.rstrip('/')
|
||||
if not display_id:
|
||||
display_id = video_id
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
display_id, data=json.dumps({
|
||||
video_id, data=json.dumps({
|
||||
'query': '''{
|
||||
playerPage(client:"%s", clipId: "%s") {
|
||||
playerPage(client: "ard", clipId: "%s") {
|
||||
blockedByFsk
|
||||
broadcastedOn
|
||||
maturityContentRating
|
||||
@@ -388,7 +419,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (mobj.group('client'), video_id),
|
||||
}''' % video_id,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json'
|
||||
})['data']['playerPage']
|
||||
@@ -413,7 +444,6 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||
info.update({
|
||||
'age_limit': age_limit,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class ArnesIE(InfoExtractor):
|
||||
IE_NAME = 'video.arnes.si'
|
||||
IE_DESC = 'Arnes Video'
|
||||
_VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
|
||||
'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
|
||||
'info_dict': {
|
||||
'id': 'a1qrWTOQfVoU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Linearna neodvisnost, definicija',
|
||||
'description': 'Linearna neodvisnost, definicija',
|
||||
'license': 'PRIVATE',
|
||||
'creator': 'Polona Oblak',
|
||||
'timestamp': 1585063725,
|
||||
'upload_date': '20200324',
|
||||
'channel': 'Polona Oblak',
|
||||
'channel_id': 'q6pc04hw24cj',
|
||||
'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
|
||||
'duration': 596.75,
|
||||
'view_count': int,
|
||||
'tags': ['linearna_algebra'],
|
||||
'start_time': 10,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_BASE_URL = 'https://video.arnes.si'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
|
||||
title = video['title']
|
||||
|
||||
formats = []
|
||||
for media in (video.get('media') or []):
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._BASE_URL + media_url,
|
||||
'format_id': remove_start(media.get('format'), 'FORMAT_'),
|
||||
'format_note': media.get('formatTranslation'),
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
channel = video.get('channel') or {}
|
||||
channel_id = channel.get('url')
|
||||
thumbnail = video.get('thumbnailUrl')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': self._BASE_URL + thumbnail,
|
||||
'description': video.get('description'),
|
||||
'license': video.get('license'),
|
||||
'creator': video.get('author'),
|
||||
'timestamp': parse_iso8601(video.get('creationTime')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'tags': video.get('hashtags'),
|
||||
'start_time': int_or_none(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||
}
|
||||
@@ -48,6 +48,7 @@ class AWAANBaseIE(InfoExtractor):
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||
'is_live': is_live,
|
||||
'uploader_id': video_data.get('user_id'),
|
||||
}
|
||||
|
||||
|
||||
@@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
|
||||
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'upload_date': '20150107',
|
||||
'timestamp': 1420588800,
|
||||
'uploader_id': '71',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
|
||||
@@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
}]
|
||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
|
||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
|
||||
_PARTNER_ID = '1719221'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class BandaiChannelIE(BrightcoveNewIE):
|
||||
IE_NAME = 'bandaichannel'
|
||||
_VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.b-ch.com/titles/514/001',
|
||||
'md5': 'a0f2d787baa5729bed71108257f613a4',
|
||||
'info_dict': {
|
||||
'id': '6128044564001',
|
||||
'ext': 'mp4',
|
||||
'title': 'メタルファイターMIKU 第1話',
|
||||
'timestamp': 1580354056,
|
||||
'uploader_id': '5797077852001',
|
||||
'upload_date': '20200130',
|
||||
'duration': 1387.733,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
attrs = extract_attributes(self._search_regex(
|
||||
r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
|
||||
bc = self._download_json(
|
||||
'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
|
||||
video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
|
||||
return self._parse_brightcove_metadata(bc, bc['id'])
|
||||
@@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader': 'Ben Prunty',
|
||||
'timestamp': 1396508491,
|
||||
'upload_date': '20140403',
|
||||
'release_timestamp': 1396483200,
|
||||
'release_date': '20140403',
|
||||
'duration': 260.877,
|
||||
'track': 'Lanius (Battle)',
|
||||
@@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader': 'Mastodon',
|
||||
'timestamp': 1322005399,
|
||||
'upload_date': '20111122',
|
||||
'release_timestamp': 1076112000,
|
||||
'release_date': '20040207',
|
||||
'duration': 120.79,
|
||||
'track': 'Hail to Fire',
|
||||
@@ -197,7 +199,7 @@ class BandcampIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'timestamp': timestamp,
|
||||
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
||||
'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
|
||||
'duration': duration,
|
||||
'track': track,
|
||||
'track_number': track_number,
|
||||
|
||||
+273
-24
@@ -1,31 +1,39 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_Element,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_etree_Element,
|
||||
compat_HTTPError,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
@@ -756,8 +764,17 @@ class BBCIE(BBCCoUkIE):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# custom redirection to www.bbc.com
|
||||
# also, video with window.__INITIAL_DATA__
|
||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 'p02xzws1',
|
||||
'ext': 'mp4',
|
||||
'title': "Pluto may have 'nitrogen glaciers'",
|
||||
'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||
@@ -793,11 +810,25 @@ class BBCIE(BBCCoUkIE):
|
||||
'description': 'Learn English words and phrases from this story',
|
||||
},
|
||||
'add_ie': [BBCCoUkIE.ie_key()],
|
||||
}, {
|
||||
# BBC Reel
|
||||
'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
|
||||
'info_dict': {
|
||||
'id': 'p07c6sb9',
|
||||
'ext': 'mp4',
|
||||
'title': 'How positive thinking is harming your happiness',
|
||||
'alt_title': 'The downsides of positive thinking',
|
||||
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
||||
'duration': 235,
|
||||
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
||||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
|
||||
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
|
||||
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
|
||||
else super(BBCIE, cls).suitable(url))
|
||||
|
||||
@@ -929,7 +960,7 @@ class BBCIE(BBCCoUkIE):
|
||||
else:
|
||||
entry['title'] = info['title']
|
||||
entry['formats'].extend(info['formats'])
|
||||
except Exception as e:
|
||||
except ExtractorError as e:
|
||||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
@@ -980,6 +1011,37 @@ class BBCIE(BBCCoUkIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
|
||||
initial_data = self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
|
||||
webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
init_data = try_get(
|
||||
initial_data, lambda x: x['initData']['items'][0], dict) or {}
|
||||
smp_data = init_data.get('smpData') or {}
|
||||
clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
|
||||
version_id = clip_data.get('versionID')
|
||||
if version_id:
|
||||
title = smp_data['title']
|
||||
formats, subtitles = self._download_media_selector(version_id)
|
||||
self._sort_formats(formats)
|
||||
image_url = smp_data.get('holdingImageURL')
|
||||
display_date = init_data.get('displayDate')
|
||||
topic_title = init_data.get('topicTitle')
|
||||
|
||||
return {
|
||||
'id': version_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'alt_title': init_data.get('shortTitle'),
|
||||
'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
|
||||
'description': smp_data.get('summary') or init_data.get('shortSummary'),
|
||||
'upload_date': display_date.replace('-', '') if display_date else None,
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(clip_data.get('duration')),
|
||||
'categories': [topic_title] if topic_title else None,
|
||||
}
|
||||
|
||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||
# There are several setPayload calls may be present but the video
|
||||
# seems to be always related to the first one
|
||||
@@ -1041,7 +1103,7 @@ class BBCIE(BBCCoUkIE):
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', '1920x1920')
|
||||
thumbnail = image_url.replace('{recipe}', 'raw')
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
@@ -1114,12 +1176,29 @@ class BBCIE(BBCCoUkIE):
|
||||
continue
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
self._sort_formats(formats)
|
||||
item_desc = None
|
||||
blocks = try_get(media, lambda x: x['summary']['blocks'], list)
|
||||
if blocks:
|
||||
summary = []
|
||||
for block in blocks:
|
||||
text = try_get(block, lambda x: x['model']['text'], compat_str)
|
||||
if text:
|
||||
summary.append(text)
|
||||
if summary:
|
||||
item_desc = '\n\n'.join(summary)
|
||||
item_time = None
|
||||
for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
|
||||
if try_get(meta, lambda x: x['label']) == 'Published':
|
||||
item_time = unified_timestamp(meta.get('timestamp'))
|
||||
break
|
||||
entries.append({
|
||||
'id': item_id,
|
||||
'title': item_title,
|
||||
'thumbnail': item.get('holdingImageUrl'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'timestamp': item_time,
|
||||
'description': strip_or_none(item_desc),
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
@@ -1293,21 +1372,149 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
playlist_id, title, description)
|
||||
|
||||
|
||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:iplayer:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
|
||||
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
|
||||
class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
|
||||
_VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
|
||||
@staticmethod
|
||||
def _get_default(episode, key, default_key='default'):
|
||||
return try_get(episode, lambda x: x[key][default_key])
|
||||
|
||||
def _get_description(self, data):
|
||||
synopsis = data.get(self._DESCRIPTION_KEY) or {}
|
||||
return dict_get(synopsis, ('large', 'medium', 'small'))
|
||||
|
||||
def _fetch_page(self, programme_id, per_page, series_id, page):
|
||||
elements = self._get_elements(self._call_api(
|
||||
programme_id, per_page, page + 1, series_id))
|
||||
for element in elements:
|
||||
episode = self._get_episode(element)
|
||||
episode_id = episode.get('id')
|
||||
if not episode_id:
|
||||
continue
|
||||
thumbnail = None
|
||||
image = self._get_episode_image(episode)
|
||||
if image:
|
||||
thumbnail = image.replace('{recipe}', 'raw')
|
||||
category = self._get_default(episode, 'labels', 'category')
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'id': episode_id,
|
||||
'title': self._get_episode_field(episode, 'subtitle'),
|
||||
'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
|
||||
'thumbnail': thumbnail,
|
||||
'description': self._get_description(episode),
|
||||
'categories': [category] if category else None,
|
||||
'series': self._get_episode_field(episode, 'title'),
|
||||
'ie_key': BBCCoUkIE.ie_key(),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
pid = self._match_id(url)
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
series_id = qs.get('seriesId', [None])[0]
|
||||
page = qs.get('page', [None])[0]
|
||||
per_page = 36 if page else self._PAGE_SIZE
|
||||
fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
|
||||
entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
|
||||
playlist_data = self._get_playlist_data(self._call_api(pid, 1))
|
||||
return self.playlist_result(
|
||||
entries, pid, self._get_playlist_title(playlist_data),
|
||||
self._get_description(playlist_data))
|
||||
|
||||
|
||||
class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:iplayer:episodes'
|
||||
_VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
||||
'info_dict': {
|
||||
'id': 'b05rcz9v',
|
||||
'title': 'The Disappearance',
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
'skip': 'This programme is not currently available on BBC iPlayer',
|
||||
'playlist_mincount': 8,
|
||||
}, {
|
||||
# all seasons
|
||||
'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
|
||||
'info_dict': {
|
||||
'id': 'b094m5t9',
|
||||
'title': 'Doctor Foster',
|
||||
'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
# explicit season
|
||||
'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
|
||||
'info_dict': {
|
||||
'id': 'b094m5t9',
|
||||
'title': 'Doctor Foster',
|
||||
'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
# all pages
|
||||
'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
|
||||
'info_dict': {
|
||||
'id': 'm0004c4v',
|
||||
'title': 'Beechgrove',
|
||||
'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
|
||||
},
|
||||
'playlist_mincount': 37,
|
||||
}, {
|
||||
# explicit page
|
||||
'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
|
||||
'info_dict': {
|
||||
'id': 'm0004c4v',
|
||||
'title': 'Beechgrove',
|
||||
'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
_DESCRIPTION_KEY = 'synopsis'
|
||||
|
||||
def _get_episode_image(self, episode):
|
||||
return self._get_default(episode, 'image')
|
||||
|
||||
def _get_episode_field(self, episode, field):
|
||||
return self._get_default(episode, field)
|
||||
|
||||
@staticmethod
|
||||
def _get_elements(data):
|
||||
return data['entities']['results']
|
||||
|
||||
@staticmethod
|
||||
def _get_episode(element):
|
||||
return element.get('episode') or {}
|
||||
|
||||
def _call_api(self, pid, per_page, page=1, series_id=None):
|
||||
variables = {
|
||||
'id': pid,
|
||||
'page': page,
|
||||
'perPage': per_page,
|
||||
}
|
||||
if series_id:
|
||||
variables['sliceId'] = series_id
|
||||
return self._download_json(
|
||||
'https://graph.ibl.api.bbc.co.uk/', pid, headers={
|
||||
'Content-Type': 'application/json'
|
||||
}, data=json.dumps({
|
||||
'id': '5692d93d5aac8d796a0305e895e61551',
|
||||
'variables': variables,
|
||||
}).encode('utf-8'))['data']['programme']
|
||||
|
||||
@staticmethod
|
||||
def _get_playlist_data(data):
|
||||
return data
|
||||
|
||||
def _get_playlist_title(self, data):
|
||||
return self._get_default(data, 'title')
|
||||
|
||||
|
||||
class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:iplayer:group'
|
||||
_VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
|
||||
_TESTS = [{
|
||||
# Available for over a year unlike 30 days for most other programmes
|
||||
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
||||
'info_dict': {
|
||||
@@ -1316,14 +1523,56 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
# all pages
|
||||
'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
|
||||
'info_dict': {
|
||||
'id': 'p081d7j7',
|
||||
'title': 'Music in Scotland',
|
||||
'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
|
||||
},
|
||||
'playlist_mincount': 47,
|
||||
}, {
|
||||
# explicit page
|
||||
'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
|
||||
'info_dict': {
|
||||
'id': 'p081d7j7',
|
||||
'title': 'Music in Scotland',
|
||||
'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}]
|
||||
_PAGE_SIZE = 200
|
||||
_DESCRIPTION_KEY = 'synopses'
|
||||
|
||||
def _extract_title_and_description(self, webpage):
|
||||
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
description = self._search_regex(
|
||||
r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
|
||||
webpage, 'description', fatal=False, group='value')
|
||||
return title, description
|
||||
def _get_episode_image(self, episode):
|
||||
return self._get_default(episode, 'images', 'standard')
|
||||
|
||||
def _get_episode_field(self, episode, field):
|
||||
return episode.get(field)
|
||||
|
||||
@staticmethod
|
||||
def _get_elements(data):
|
||||
return data['elements']
|
||||
|
||||
@staticmethod
|
||||
def _get_episode(element):
|
||||
return element
|
||||
|
||||
def _call_api(self, pid, per_page, page=1, series_id=None):
|
||||
return self._download_json(
|
||||
'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
|
||||
pid, query={
|
||||
'page': page,
|
||||
'per_page': per_page,
|
||||
})['group_episodes']
|
||||
|
||||
@staticmethod
|
||||
def _get_playlist_data(data):
|
||||
return data['group']
|
||||
|
||||
def _get_playlist_title(self, data):
|
||||
return data.get('title')
|
||||
|
||||
|
||||
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
|
||||
@@ -156,6 +156,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
cid = js['result']['cid']
|
||||
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
@@ -232,7 +233,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name'),
|
||||
'uploader': uploader_mobj.group('name').strip(),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
if not info.get('uploader'):
|
||||
|
||||
@@ -90,13 +90,19 @@ class BleacherReportCMSIE(AMPIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
||||
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
||||
'md5': '670b2d73f48549da032861130488c681',
|
||||
'info_dict': {
|
||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
||||
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
||||
'upload_date': '20150723',
|
||||
'timestamp': 1437679032,
|
||||
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Unable to download f4m manifest'
|
||||
]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,86 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BlinkxIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||
IE_NAME = 'blinkx'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
|
||||
'md5': '337cf7a344663ec79bf93a526a2e06c7',
|
||||
'info_dict': {
|
||||
'id': 'Da0Gw3xc',
|
||||
'ext': 'mp4',
|
||||
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
|
||||
'uploader': 'IGN News',
|
||||
'upload_date': '20150217',
|
||||
'timestamp': 1424215740,
|
||||
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
|
||||
'duration': 47.743333,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = video_id[:8]
|
||||
|
||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
|
||||
+ 'video=%s' % video_id)
|
||||
data_json = self._download_webpage(api_url, display_id)
|
||||
data = json.loads(data_json)['api']['results'][0]
|
||||
duration = None
|
||||
thumbnails = []
|
||||
formats = []
|
||||
for m in data['media']:
|
||||
if m['type'] == 'jpg':
|
||||
thumbnails.append({
|
||||
'url': m['link'],
|
||||
'width': int(m['w']),
|
||||
'height': int(m['h']),
|
||||
})
|
||||
elif m['type'] == 'original':
|
||||
duration = float(m['d'])
|
||||
elif m['type'] == 'youtube':
|
||||
yt_id = m['link']
|
||||
self.to_screen('Youtube video detected: %s' % yt_id)
|
||||
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
|
||||
elif m['type'] in ('flv', 'mp4'):
|
||||
vcodec = remove_start(m['vcodec'], 'ff')
|
||||
acodec = remove_start(m['acodec'], 'ff')
|
||||
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
|
||||
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
|
||||
tbr = vbr + abr if vbr and abr else None
|
||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': m['link'],
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(m.get('w')),
|
||||
'height': int_or_none(m.get('h')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'fullid': video_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'uploader': data['channel_name'],
|
||||
'timestamp': data['pubdate_epoch'],
|
||||
'description': data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
}
|
||||
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
||||
@@ -28,10 +28,13 @@ class BravoTVIE(AdobePassIE):
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
||||
@@ -53,11 +56,14 @@ class BravoTVIE(AdobePassIE):
|
||||
tp_path = release_pid = tve['release_pid']
|
||||
if tve.get('entitlement') == 'auth':
|
||||
adobe_pass = settings.get('tve_adobe_auth', {})
|
||||
if site == 'bravotv':
|
||||
site = 'bravo'
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId', 'bravo'),
|
||||
adobe_pass.get('adobePassResourceId') or site,
|
||||
tve['title'], release_pid, tve.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
|
||||
url, release_pid,
|
||||
adobe_pass.get('adobePassRequestorId') or site, resource)
|
||||
else:
|
||||
shared_playlist = settings['ls_playlist']
|
||||
account_pid = shared_playlist['account_pid']
|
||||
|
||||
@@ -7,19 +7,21 @@ from .common import InfoExtractor
|
||||
from .gigya import GigyaBaseIE
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
strip_or_none,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
||||
@@ -332,3 +334,51 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'display_id': display_id,
|
||||
'season_number': int_or_none(page.get('episode_season')),
|
||||
})
|
||||
|
||||
|
||||
class DagelijkseKostIE(InfoExtractor):
|
||||
IE_DESC = 'dagelijksekost.een.be'
|
||||
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
|
||||
'md5': '30bfffc323009a3e5f689bef6efa2365',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
|
||||
'display_id': 'hachis-parmentier-met-witloof',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hachis parmentier met witloof',
|
||||
'description': 'md5:9960478392d87f63567b5b117688cdc5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 283.02,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = strip_or_none(get_element_by_class(
|
||||
'dish-metadata__title', webpage
|
||||
) or self._html_search_meta(
|
||||
'twitter:title', webpage))
|
||||
|
||||
description = clean_html(get_element_by_class(
|
||||
'dish-description', webpage)
|
||||
) or self._html_search_meta(
|
||||
('description', 'twitter:description', 'og:description'),
|
||||
webpage)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||
group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ class CBSBaseIE(ThePlatformFeedIE):
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
@@ -52,6 +52,9 @@ class CBSIE(CBSBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
||||
|
||||
@@ -26,7 +26,7 @@ class CBSNewsEmbedIE(CBSIE):
|
||||
def _real_extract(self, url):
|
||||
item = self._parse_json(zlib.decompress(compat_b64decode(
|
||||
compat_urllib_parse_unquote(self._match_id(url))),
|
||||
-zlib.MAX_WBITS), None)['video']['items'][0]
|
||||
-zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
|
||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||
|
||||
|
||||
|
||||
@@ -1,38 +1,113 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .cbs import CBSBaseIE
|
||||
import re
|
||||
|
||||
# from .cbs import CBSBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class CBSSportsIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
|
||||
|
||||
# class CBSSportsEmbedIE(CBSBaseIE):
|
||||
class CBSSportsEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'cbssports:embed'
|
||||
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
|
||||
(?:
|
||||
ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
|
||||
pcid%3D(?P<pcid>\d+)
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
|
||||
'info_dict': {
|
||||
'id': '1214315075735',
|
||||
'ext': 'mp4',
|
||||
'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
|
||||
'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
|
||||
'timestamp': 1524111457,
|
||||
'upload_date': '20180419',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
|
||||
'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, filter_query, video_id):
|
||||
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||
# def _extract_video_info(self, filter_query, video_id):
|
||||
# return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
uuid, pcid = re.match(self._VALID_URL, url).groups()
|
||||
query = {'id': uuid} if uuid else {'pcid': pcid}
|
||||
video = self._download_json(
|
||||
'https://www.cbssports.com/api/content/video/',
|
||||
uuid or pcid, query=query)[0]
|
||||
video_id = video['id']
|
||||
title = video['title']
|
||||
metadata = video.get('metaData') or {}
|
||||
# return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
|
||||
# return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
metadata['files'][0]['url'], video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
image = video.get('image')
|
||||
thumbnails = None
|
||||
if image:
|
||||
image_path = image.get('path')
|
||||
if image_path:
|
||||
thumbnails = [{
|
||||
'url': image_path,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
'filesize': int_or_none(image.get('size')),
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': video.get('description'),
|
||||
'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
}
|
||||
|
||||
|
||||
class CBSSportsBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
[r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
|
||||
webpage, 'video id')
|
||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
|
||||
webpage, 'embed url')
|
||||
return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
|
||||
|
||||
|
||||
class CBSSportsIE(CBSSportsBaseIE):
|
||||
IE_NAME = 'cbssports'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
|
||||
'info_dict': {
|
||||
'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cover 3: Stanford Spring Gleaning',
|
||||
'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
|
||||
'timestamp': 1617218398,
|
||||
'upload_date': '20210331',
|
||||
'duration': 502,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class TwentyFourSevenSportsIE(CBSSportsBaseIE):
|
||||
IE_NAME = '247sports'
|
||||
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
|
||||
'info_dict': {
|
||||
'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
|
||||
'ext': 'mp4',
|
||||
'title': '2021 QB Jake Garcia senior highlights through five games',
|
||||
'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
|
||||
'timestamp': 1607114223,
|
||||
'upload_date': '20201204',
|
||||
'duration': 208,
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_timezone,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -24,8 +27,9 @@ class CCMAIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'espot de La Marató de TV3',
|
||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||
'timestamp': 1470918540,
|
||||
'upload_date': '20160811',
|
||||
'timestamp': 1478608140,
|
||||
'upload_date': '20161108',
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
@@ -35,8 +39,24 @@ class CCMAIE(InfoExtractor):
|
||||
'ext': 'mp3',
|
||||
'title': 'El Consell de Savis analitza el derbi',
|
||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||
'upload_date': '20171205',
|
||||
'timestamp': 1512507300,
|
||||
'upload_date': '20170512',
|
||||
'timestamp': 1494622500,
|
||||
'vcodec': 'none',
|
||||
'categories': ['Esports'],
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||
'info_dict': {
|
||||
'id': '6031387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||
'timestamp': 1582577700,
|
||||
'upload_date': '20200224',
|
||||
'subtitles': 'mincount:4',
|
||||
'age_limit': 16,
|
||||
'series': 'Crims',
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -72,17 +92,28 @@ class CCMAIE(InfoExtractor):
|
||||
|
||||
informacio = media['informacio']
|
||||
title = informacio['titol']
|
||||
durada = informacio.get('durada', {})
|
||||
durada = informacio.get('durada') or {}
|
||||
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
|
||||
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
|
||||
tematica = try_get(informacio, lambda x: x['tematica']['text'])
|
||||
|
||||
timestamp = None
|
||||
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
|
||||
try:
|
||||
timezone, data_utc = extract_timezone(data_utc)
|
||||
timestamp = calendar.timegm((datetime.datetime.strptime(
|
||||
data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
subtitles = {}
|
||||
subtitols = media.get('subtitols', {})
|
||||
if subtitols:
|
||||
sub_url = subtitols.get('url')
|
||||
subtitols = media.get('subtitols') or []
|
||||
if isinstance(subtitols, dict):
|
||||
subtitols = [subtitols]
|
||||
for st in subtitols:
|
||||
sub_url = st.get('url')
|
||||
if sub_url:
|
||||
subtitles.setdefault(
|
||||
subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
|
||||
st.get('iso') or st.get('text') or 'ca', []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
@@ -97,6 +128,16 @@ class CCMAIE(InfoExtractor):
|
||||
'height': int_or_none(imatges.get('alcada')),
|
||||
}]
|
||||
|
||||
age_limit = None
|
||||
codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
|
||||
if codi_etic:
|
||||
codi_etic_s = codi_etic.split('_')
|
||||
if len(codi_etic_s) == 2:
|
||||
if codi_etic_s[1] == 'TP':
|
||||
age_limit = 0
|
||||
else:
|
||||
age_limit = int_or_none(codi_etic_s[1])
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
@@ -106,4 +147,9 @@ class CCMAIE(InfoExtractor):
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
'alt_title': informacio.get('titol_complet'),
|
||||
'episode_number': int_or_none(informacio.get('capitol')),
|
||||
'categories': [tematica] if tematica else None,
|
||||
'series': informacio.get('programa'),
|
||||
}
|
||||
|
||||
@@ -95,8 +95,11 @@ class CDAIE(InfoExtractor):
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||
|
||||
if re.search(r'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
need_confirm_age = False
|
||||
if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
|
||||
if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
|
||||
webpage, 'birthday validate form', default=None):
|
||||
webpage = self._download_age_confirm_page(
|
||||
url, video_id, note='Confirming age')
|
||||
@@ -130,6 +133,8 @@ class CDAIE(InfoExtractor):
|
||||
'age_limit': 18 if need_confirm_age else 0,
|
||||
}
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
# Source: https://www.cda.pl/js/player.js?t=1606154898
|
||||
def decrypt_file(a):
|
||||
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
|
||||
@@ -194,7 +199,7 @@ class CDAIE(InfoExtractor):
|
||||
handler = self._download_webpage
|
||||
|
||||
webpage = handler(
|
||||
self._BASE_URL + href, video_id,
|
||||
urljoin(self._BASE_URL, href), video_id,
|
||||
'Downloading %s version information' % resolution, fatal=False)
|
||||
if not webpage:
|
||||
# Manually report warning because empty page is returned when
|
||||
@@ -206,6 +211,4 @@ class CDAIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts(info_dict, info)
|
||||
|
||||
@@ -1,142 +1,51 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
||||
(video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
|
||||
/(?P<title>.*)'''
|
||||
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
||||
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
|
||||
'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
|
||||
'md5': 'b8acb347177c680ff18a292aa2166f80',
|
||||
'info_dict': {
|
||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||
'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
|
||||
'ext': 'mp4',
|
||||
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
||||
'timestamp': 1376798400,
|
||||
'upload_date': '20130818',
|
||||
'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
|
||||
'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
|
||||
'timestamp': 1598670000,
|
||||
'upload_date': '20200829',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
||||
'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
||||
(?:full-episodes|shows(?=/[^/]+/full-episodes))
|
||||
/(?P<id>[^?]+)'''
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
|
||||
'info_dict': {
|
||||
'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
|
||||
'title': 'November 28, 2016 - Ryan Speedo Green',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1')
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
return videos_info
|
||||
|
||||
|
||||
class ToshIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'Tosh.0'
|
||||
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
||||
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||
'info_dict': {
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'title': 'Twitter Users Share Summer Plans',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
||||
'info_dict': {
|
||||
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
# It's really reported to be published on year 2077
|
||||
'upload_date': '20770610',
|
||||
'timestamp': 3390510600,
|
||||
'subtitles': {
|
||||
'en': 'mincount:3',
|
||||
},
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
||||
'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
|
||||
'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-f99b626bdfe13568579a',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
|
||||
'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
|
||||
'ext': 'mp4',
|
||||
'title': 'Josh Investigates',
|
||||
'description': 'Steht uns das Ende der Welt bevor?',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mrss_url = self._search_regex(
|
||||
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'mrss url', group='url')
|
||||
|
||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
||||
|
||||
|
||||
class ComedyCentralShortnameIE(InfoExtractor):
|
||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
|
||||
_TESTS = [{
|
||||
'url': ':tds',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':thedailyshow',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':theopposition',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
shortcut_map = {
|
||||
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'accountOverride': 'intl.mtvi.com',
|
||||
'arcEp': 'web.cc.tv',
|
||||
'ep': 'b9032c3a',
|
||||
'imageEp': 'web.cc.tv',
|
||||
'mgid': uri,
|
||||
}
|
||||
return self.url_result(shortcut_map[video_id])
|
||||
|
||||
@@ -17,7 +17,7 @@ import math
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar_Cookie,
|
||||
compat_cookies,
|
||||
compat_cookies_SimpleCookie,
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
@@ -230,8 +230,10 @@ class InfoExtractor(object):
|
||||
uploader: Full name of the video uploader.
|
||||
license: License name the video is licensed under.
|
||||
creator: The creator of the video.
|
||||
release_timestamp: UNIX timestamp of the moment the video was released.
|
||||
release_date: The date (YYYYMMDD) when the video was released.
|
||||
timestamp: UNIX timestamp of the moment the video became available.
|
||||
timestamp: UNIX timestamp of the moment the video became available
|
||||
(uploaded).
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp.
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
@@ -1273,6 +1275,7 @@ class InfoExtractor(object):
|
||||
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
author = e.get('author')
|
||||
info.update({
|
||||
'url': url_or_none(e.get('contentUrl')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
@@ -1280,7 +1283,11 @@ class InfoExtractor(object):
|
||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'uploader': str_or_none(e.get('author')),
|
||||
# author can be an instance of 'Organization' or 'Person' types.
|
||||
# both types can have 'name' property(inherited from 'Thing' type). [1]
|
||||
# however some websites are using 'Text' type instead.
|
||||
# 1. https://schema.org/VideoObject
|
||||
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
@@ -2064,7 +2071,7 @@ class InfoExtractor(object):
|
||||
})
|
||||
return entries
|
||||
|
||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}):
|
||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||
res = self._download_xml_handle(
|
||||
mpd_url, video_id,
|
||||
note=note or 'Downloading MPD manifest',
|
||||
@@ -2078,10 +2085,9 @@ class InfoExtractor(object):
|
||||
mpd_base_url = base_url(urlh.geturl())
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
|
||||
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
@@ -2359,15 +2365,7 @@ class InfoExtractor(object):
|
||||
else:
|
||||
# Assuming direct URL to unfragmented media.
|
||||
f['url'] = base_url
|
||||
|
||||
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
|
||||
# is not necessarily unique within a Period thus formats with
|
||||
# the same `format_id` are quite possible. There are numerous examples
|
||||
# of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111,
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/13919)
|
||||
full_info = formats_dict.get(representation_id, {}).copy()
|
||||
full_info.update(f)
|
||||
formats.append(full_info)
|
||||
formats.append(f)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
@@ -2903,10 +2901,10 @@ class InfoExtractor(object):
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
||||
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
||||
req = sanitized_Request(url)
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
"""
|
||||
|
||||
@@ -8,11 +8,14 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
get_element_by_attribute,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
from .senateisvp import SenateISVPIE
|
||||
@@ -116,8 +119,30 @@ class CSpanIE(InfoExtractor):
|
||||
jwsetup, video_id, require_title=False, m3u8_id='hls',
|
||||
base_url=url)
|
||||
add_referer(info['formats'])
|
||||
for subtitles in info['subtitles'].values():
|
||||
for subtitle in subtitles:
|
||||
ext = determine_ext(subtitle['url'])
|
||||
if ext == 'php':
|
||||
ext = 'vtt'
|
||||
subtitle['ext'] = ext
|
||||
ld_info = self._search_json_ld(webpage, video_id, default={})
|
||||
return merge_dicts(info, ld_info)
|
||||
title = get_element_by_class('video-page-title', webpage) or \
|
||||
self._og_search_title(webpage)
|
||||
description = get_element_by_attribute('itemprop', 'description', webpage) or \
|
||||
self._html_search_meta(['og:description', 'description'], webpage)
|
||||
return merge_dicts(info, ld_info, {
|
||||
'title': title,
|
||||
'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage),
|
||||
'description': description,
|
||||
'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)),
|
||||
'location': get_element_by_attribute('itemprop', 'contentLocation', webpage),
|
||||
'duration': int_or_none(self._search_regex(
|
||||
r'jwsetup\.seclength\s*=\s*(\d+);',
|
||||
webpage, 'duration', fatal=False)),
|
||||
'view_count': str_to_int(self._search_regex(
|
||||
r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
|
||||
webpage, 'views', fatal=False)),
|
||||
})
|
||||
|
||||
# Obsolete
|
||||
# We first look for clipid, because clipprog always appears before
|
||||
|
||||
@@ -25,12 +25,12 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
def _call_api(self, path, video_id, query=None):
|
||||
headers = {}
|
||||
if self._auth_token:
|
||||
headers['X-Auth-Token'] = self._auth_token
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + path, video_id, headers=headers)
|
||||
self._API_BASE_URL + path, video_id, headers=headers, query=query)
|
||||
self._handle_errors(result)
|
||||
return result['data']
|
||||
|
||||
@@ -52,62 +52,75 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for encoding in media.get('encodings', []):
|
||||
m3u8_url = encoding.get('master_playlist_url')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
encoding_url = encoding.get('url')
|
||||
file_url = encoding.get('file_url')
|
||||
if not encoding_url and not file_url:
|
||||
continue
|
||||
f = {
|
||||
'width': int_or_none(encoding.get('width')),
|
||||
'height': int_or_none(encoding.get('height')),
|
||||
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||
'vcodec': encoding.get('video_codec'),
|
||||
'acodec': encoding.get('audio_codec'),
|
||||
'container': encoding.get('container_type'),
|
||||
}
|
||||
for f_url in (encoding_url, file_url):
|
||||
if not f_url:
|
||||
for encoding_format in ('m3u8', 'mpd'):
|
||||
media = self._call_api('media/' + video_id, video_id, query={
|
||||
'encodingsNew': 'true',
|
||||
'encodingsFormat': encoding_format,
|
||||
})
|
||||
for encoding in media.get('encodings', []):
|
||||
playlist_url = encoding.get('master_playlist_url')
|
||||
if encoding_format == 'm3u8':
|
||||
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
playlist_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif encoding_format == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
playlist_url, video_id, mpd_id='dash', fatal=False))
|
||||
encoding_url = encoding.get('url')
|
||||
file_url = encoding.get('file_url')
|
||||
if not encoding_url and not file_url:
|
||||
continue
|
||||
fmt = f.copy()
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': f_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
formats.append(fmt)
|
||||
f = {
|
||||
'width': int_or_none(encoding.get('width')),
|
||||
'height': int_or_none(encoding.get('height')),
|
||||
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||
'vcodec': encoding.get('video_codec'),
|
||||
'acodec': encoding.get('audio_codec'),
|
||||
'container': encoding.get('container_type'),
|
||||
}
|
||||
for f_url in (encoding_url, file_url):
|
||||
if not f_url:
|
||||
continue
|
||||
fmt = f.copy()
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': f_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = media['title']
|
||||
|
||||
subtitles = {}
|
||||
for closed_caption in media.get('closed_captions', []):
|
||||
sub_url = closed_caption.get('file')
|
||||
@@ -132,7 +145,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
|
||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collection'
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.curiositystream.com/collection/2',
|
||||
'info_dict': {
|
||||
@@ -140,10 +153,13 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/series/2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/collections/36',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -32,6 +32,18 @@ class DigitallySpeakingIE(InfoExtractor):
|
||||
# From http://www.gdcvault.com/play/1013700/Advanced-Material
|
||||
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# From https://gdcvault.com/play/1016624, empty speakerVideo
|
||||
'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
|
||||
'info_dict': {
|
||||
'id': '201210-822101_1349794556671DDDD',
|
||||
'ext': 'flv',
|
||||
'title': 'Pre-launch - Preparing to Take the Plunge',
|
||||
},
|
||||
}, {
|
||||
# From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
|
||||
'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_mp4(self, metadata):
|
||||
@@ -84,26 +96,20 @@ class DigitallySpeakingIE(InfoExtractor):
|
||||
'vcodec': 'none',
|
||||
'format_id': audio.get('code'),
|
||||
})
|
||||
slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(slide_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'slide deck video',
|
||||
'quality': -2,
|
||||
'preference': -2,
|
||||
'format_id': 'slides',
|
||||
})
|
||||
speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'speaker video',
|
||||
'quality': -1,
|
||||
'preference': -1,
|
||||
'format_id': 'speaker',
|
||||
})
|
||||
for video_key, format_id, preference in (
|
||||
('slide', 'slides', -2), ('speaker', 'speaker', -1)):
|
||||
video_path = xpath_text(metadata, './%sVideo' % video_key)
|
||||
if not video_path:
|
||||
continue
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': '%s video' % video_key,
|
||||
'quality': preference,
|
||||
'preference': preference,
|
||||
'format_id': format_id,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
+129
-30
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,11 +11,13 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<domain>
|
||||
(?:www\.)?(?P<host>d
|
||||
@@ -24,7 +27,7 @@ class DPlayIE(InfoExtractor):
|
||||
)
|
||||
)|
|
||||
(?P<subdomain_country>es|it)\.dplay\.com
|
||||
)/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
|
||||
)/[^/]+''' + _PATH_REGEX
|
||||
|
||||
_TESTS = [{
|
||||
# non geo restricted, via secure api, unsigned download hls URL
|
||||
@@ -151,56 +154,79 @@ class DPlayIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _process_errors(self, e, geo_countries):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||
error = info['errors'][0]
|
||||
error_code = error.get('code')
|
||||
if error_code == 'access.denied.geoblocked':
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
elif error_code in ('access.denied.missingpackage', 'invalid.token'):
|
||||
raise ExtractorError(
|
||||
'This video is only available for registered users. You may want to use --cookies.', expected=True)
|
||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['Authorization'] = 'Bearer ' + self._download_json(
|
||||
disco_base + 'token', display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': realm,
|
||||
})['data']['attributes']['token']
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
streaming = self._download_json(
|
||||
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
||||
video_id, headers=headers)['data']['attributes']['streaming']
|
||||
streaming_list = []
|
||||
for format_id, format_dict in streaming.items():
|
||||
streaming_list.append({
|
||||
'type': format_id,
|
||||
'url': format_dict.get('url'),
|
||||
})
|
||||
return streaming_list
|
||||
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
||||
geo_countries = [country.upper()]
|
||||
self._initialize_geo_bypass({
|
||||
'countries': geo_countries,
|
||||
})
|
||||
disco_base = 'https://%s/' % disco_host
|
||||
token = self._download_json(
|
||||
disco_base + 'token', display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': realm,
|
||||
})['data']['attributes']['token']
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'Authorization': 'Bearer ' + token,
|
||||
}
|
||||
video = self._download_json(
|
||||
disco_base + 'content/videos/' + display_id, display_id,
|
||||
headers=headers, query={
|
||||
'fields[channel]': 'name',
|
||||
'fields[image]': 'height,src,width',
|
||||
'fields[show]': 'name',
|
||||
'fields[tag]': 'name',
|
||||
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
||||
'include': 'images,primaryChannel,show,tags'
|
||||
})
|
||||
self._update_disco_api_headers(headers, disco_base, display_id, realm)
|
||||
try:
|
||||
video = self._download_json(
|
||||
disco_base + 'content/videos/' + display_id, display_id,
|
||||
headers=headers, query={
|
||||
'fields[channel]': 'name',
|
||||
'fields[image]': 'height,src,width',
|
||||
'fields[show]': 'name',
|
||||
'fields[tag]': 'name',
|
||||
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
||||
'include': 'images,primaryChannel,show,tags'
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
video_id = video['data']['id']
|
||||
info = video['data']['attributes']
|
||||
title = info['name'].strip()
|
||||
formats = []
|
||||
try:
|
||||
streaming = self._download_json(
|
||||
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
||||
display_id, headers=headers)['data']['attributes']['streaming']
|
||||
streaming = self._download_video_playback_info(
|
||||
disco_base, video_id, headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||
error = info['errors'][0]
|
||||
error_code = error.get('code')
|
||||
if error_code == 'access.denied.geoblocked':
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
elif error_code == 'access.denied.missingpackage':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
for format_id, format_dict in streaming.items():
|
||||
for format_dict in streaming:
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
format_id = format_dict.get('type')
|
||||
ext = determine_ext(format_url)
|
||||
if format_id == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
@@ -248,7 +274,7 @@ class DPlayIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'description': strip_or_none(info.get('description')),
|
||||
'duration': float_or_none(info.get('videoDuration'), 1000),
|
||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||
'series': series,
|
||||
@@ -268,3 +294,76 @@ class DPlayIE(InfoExtractor):
|
||||
host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, host, 'dplay' + country, country)
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||
'info_dict': {
|
||||
'id': '1140794',
|
||||
'display_id': 'property-brothers-forever-home/food-and-family',
|
||||
'ext': 'mp4',
|
||||
'title': 'Food and Family',
|
||||
'description': 'The brothers help a Richmond family expand their single-level home.',
|
||||
'duration': 2583.113,
|
||||
'timestamp': 1609304400,
|
||||
'upload_date': '20201230',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Property Brothers: Forever Home',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}]
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
'wisteriaProperties': {
|
||||
'platform': 'desktop',
|
||||
'product': 'dplus_us',
|
||||
},
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
|
||||
|
||||
|
||||
class HGTVDeIE(DPlayIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||
'info_dict': {
|
||||
'id': '151205',
|
||||
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wer braucht schon eine Toilette',
|
||||
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||
'duration': 1177.024,
|
||||
'timestamp': 1595705400,
|
||||
'upload_date': '20200725',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Tiny House - klein, aber oho',
|
||||
'season_number': 3,
|
||||
'episode_number': 3,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
||||
+35
-185
@@ -1,193 +1,43 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
class DreiSatIE(ZDFIE):
|
||||
IE_NAME = '3sat'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
||||
'md5': 'be37228896d30a88f315b638900a026e',
|
||||
'info_dict': {
|
||||
'id': '45918',
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'uploader': 'SCHWEIZWEIT',
|
||||
'uploader_id': '100000210',
|
||||
'upload_date': '20140913'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
_TESTS = [{
|
||||
# Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
|
||||
'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
|
||||
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
|
||||
'info_dict': {
|
||||
'id': '141007_ab18_10wochensommer_film',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ab 18! - 10 Wochen Sommer',
|
||||
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
|
||||
'duration': 2660,
|
||||
'timestamp': 1608604200,
|
||||
'upload_date': '20201222',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
|
||||
'info_dict': {
|
||||
'id': '140913_sendung_schweizweit',
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'timestamp': 1410623100,
|
||||
'upload_date': '20140913'
|
||||
},
|
||||
]
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
param_groups = {}
|
||||
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
|
||||
group_id = param_group.get(self._xpath_ns(
|
||||
'id', 'http://www.w3.org/XML/1998/namespace'))
|
||||
params = {}
|
||||
for param in param_group:
|
||||
params[param.get('name')] = param.get('value')
|
||||
param_groups[group_id] = params
|
||||
|
||||
formats = []
|
||||
for video in smil.findall(self._xpath_ns('.//video', namespace)):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
group_id = video.get('paramGroup')
|
||||
param_group = param_groups[group_id]
|
||||
for proto in param_group['protocols'].split(','):
|
||||
formats.append({
|
||||
'url': '%s://%s' % (proto, param_group['host']),
|
||||
'app': param_group['app'],
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': '%s-%d' % (proto, bitrate),
|
||||
'tbr': bitrate,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def extract_from_xml_url(self, video_id, xml_url):
|
||||
doc = self._download_xml(
|
||||
xml_url, video_id,
|
||||
note='Downloading video info',
|
||||
errnote='Failed to download video info')
|
||||
|
||||
status_code = xpath_text(doc, './status/statuscode')
|
||||
if status_code and status_code != 'ok':
|
||||
if status_code == 'notVisibleAnymore':
|
||||
message = 'Video %s is not available' % video_id
|
||||
else:
|
||||
message = '%s returned error: %s' % (self.IE_NAME, status_code)
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
title = xpath_text(doc, './/information/title', 'title', True)
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for fnode in doc.findall('.//formitaeten/formitaet'):
|
||||
video_url = xpath_text(fnode, 'url')
|
||||
if not video_url or video_url in urls:
|
||||
continue
|
||||
urls.append(video_url)
|
||||
|
||||
is_available = 'http://www.metafilegenerator' not in video_url
|
||||
geoloced = 'static_geoloced_online' in video_url
|
||||
if not is_available or geoloced:
|
||||
continue
|
||||
|
||||
format_id = fnode.attrib['basetype']
|
||||
format_m = re.match(r'''(?x)
|
||||
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
|
||||
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
|
||||
''', format_id)
|
||||
|
||||
ext = determine_ext(video_url, None) or format_m.group('container')
|
||||
|
||||
if ext == 'meta':
|
||||
continue
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
video_url, video_id, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
# the certificates are misconfigured (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8665)
|
||||
if video_url.startswith('https://'):
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
quality = xpath_text(fnode, './quality')
|
||||
if quality:
|
||||
format_id += '-' + quality
|
||||
|
||||
abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
|
||||
vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
|
||||
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d+)k', video_url, 'bitrate', None))
|
||||
if tbr and vbr and not abr:
|
||||
abr = tbr - vbr
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'acodec': format_m.group('acodec'),
|
||||
'vcodec': format_m.group('vcodec'),
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(xpath_text(fnode, './width')),
|
||||
'height': int_or_none(xpath_text(fnode, './height')),
|
||||
'filesize': int_or_none(xpath_text(fnode, './filesize')),
|
||||
'protocol': format_m.group('proto').lower(),
|
||||
})
|
||||
|
||||
geolocation = xpath_text(doc, './/details/geolocation')
|
||||
if not formats and geolocation and geolocation != 'none':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for node in doc.findall('.//teaserimages/teaserimage'):
|
||||
thumbnail_url = node.text
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
thumbnail_key = node.get('key')
|
||||
if thumbnail_key:
|
||||
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||
if m:
|
||||
thumbnail['width'] = int(m.group(1))
|
||||
thumbnail['height'] = int(m.group(2))
|
||||
thumbnails.append(thumbnail)
|
||||
|
||||
upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': xpath_text(doc, './/information/detail'),
|
||||
'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': xpath_text(doc, './/details/originChannelTitle'),
|
||||
'uploader_id': xpath_text(doc, './/details/originChannelId'),
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
|
||||
return self.extract_from_xml_url(video_id, details_url)
|
||||
}, {
|
||||
# Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
|
||||
'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
|
||||
'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -12,26 +12,35 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class EggheadCourseIE(InfoExtractor):
|
||||
class EggheadBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, resource, fatal=True):
|
||||
return self._download_json(
|
||||
'https://app.egghead.io/api/v1/' + path,
|
||||
video_id, 'Downloading %s JSON' % resource, fatal=fatal)
|
||||
|
||||
|
||||
class EggheadCourseIE(EggheadBaseIE):
|
||||
IE_DESC = 'egghead.io course'
|
||||
IE_NAME = 'egghead:course'
|
||||
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
|
||||
'playlist_count': 29,
|
||||
'info_dict': {
|
||||
'id': '72',
|
||||
'id': '432655',
|
||||
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
|
||||
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
lessons = self._download_json(
|
||||
'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
|
||||
playlist_id, 'Downloading course lessons JSON')
|
||||
series_path = 'series/' + playlist_id
|
||||
lessons = self._call_api(
|
||||
series_path + '/lessons', playlist_id, 'course lessons')
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
@@ -44,9 +53,8 @@ class EggheadCourseIE(InfoExtractor):
|
||||
entries.append(self.url_result(
|
||||
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
|
||||
|
||||
course = self._download_json(
|
||||
'https://egghead.io/api/v1/series/%s' % playlist_id,
|
||||
playlist_id, 'Downloading course JSON', fatal=False) or {}
|
||||
course = self._call_api(
|
||||
series_path, playlist_id, 'course', False) or {}
|
||||
|
||||
playlist_id = course.get('id')
|
||||
if playlist_id:
|
||||
@@ -57,10 +65,10 @@ class EggheadCourseIE(InfoExtractor):
|
||||
course.get('description'))
|
||||
|
||||
|
||||
class EggheadLessonIE(InfoExtractor):
|
||||
class EggheadLessonIE(EggheadBaseIE):
|
||||
IE_DESC = 'egghead.io lesson'
|
||||
IE_NAME = 'egghead:lesson'
|
||||
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||
'info_dict': {
|
||||
@@ -74,7 +82,7 @@ class EggheadLessonIE(InfoExtractor):
|
||||
'upload_date': '20161209',
|
||||
'duration': 304,
|
||||
'view_count': 0,
|
||||
'tags': ['javascript', 'free'],
|
||||
'tags': 'count:2',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -83,13 +91,16 @@ class EggheadLessonIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
lesson = self._download_json(
|
||||
'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
|
||||
lesson = self._call_api(
|
||||
'lessons/' + display_id, display_id, 'lesson')
|
||||
|
||||
lesson_id = compat_str(lesson['id'])
|
||||
title = lesson['title']
|
||||
|
||||
@@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML
|
||||
merge_dicts,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,7 +24,8 @@ class EroProfileIE(InfoExtractor):
|
||||
'title': 'sexy babe softcore',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
'skip': 'Video not found',
|
||||
}, {
|
||||
'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
|
||||
'md5': '1baa9602ede46ce904c431f5418d8916',
|
||||
@@ -77,19 +78,15 @@ class EroProfileIE(InfoExtractor):
|
||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||
webpage, 'video id', default=None)
|
||||
|
||||
video_url = unescapeHTML(self._search_regex(
|
||||
r'<source src="([^"]+)', webpage, 'video url'))
|
||||
title = self._html_search_regex(
|
||||
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
(r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
|
||||
webpage, 'title')
|
||||
|
||||
return {
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
||||
@@ -42,7 +42,10 @@ from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amara import AmaraIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .americastestkitchen import AmericasTestKitchenIE
|
||||
from .americastestkitchen import (
|
||||
AmericasTestKitchenIE,
|
||||
AmericasTestKitchenSeasonIE,
|
||||
)
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anvato import AnvatoIE
|
||||
from .aol import AolIE
|
||||
@@ -69,6 +72,7 @@ from .arte import (
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asiancrush import (
|
||||
AsianCrushIE,
|
||||
AsianCrushPlaylistIE,
|
||||
@@ -87,11 +91,13 @@ from .awaan import (
|
||||
)
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bandaichannel import BandaiChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCCoUkArticleIE,
|
||||
BBCCoUkIPlayerPlaylistIE,
|
||||
BBCCoUkIPlayerEpisodesIE,
|
||||
BBCCoUkIPlayerGroupIE,
|
||||
BBCCoUkPlaylistIE,
|
||||
BBCIE,
|
||||
)
|
||||
@@ -126,7 +132,6 @@ from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
BleacherReportCMSIE,
|
||||
)
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
@@ -160,6 +165,7 @@ from .canvas import (
|
||||
CanvasIE,
|
||||
CanvasEenIE,
|
||||
VrtNUIE,
|
||||
DagelijkseKostIE,
|
||||
)
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
@@ -184,7 +190,11 @@ from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .cbssports import (
|
||||
CBSSportsEmbedIE,
|
||||
CBSSportsIE,
|
||||
TwentyFourSevenSportsIE,
|
||||
)
|
||||
from .ccc import (
|
||||
CCCIE,
|
||||
CCCPlaylistIE,
|
||||
@@ -232,11 +242,8 @@ from .cnn import (
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralFullEpisodesIE,
|
||||
ComedyCentralIE,
|
||||
ComedyCentralShortnameIE,
|
||||
ComedyCentralTVIE,
|
||||
ToshIE,
|
||||
)
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import (
|
||||
@@ -287,7 +294,11 @@ from .douyutv import (
|
||||
DouyuShowIE,
|
||||
DouyuTVIE,
|
||||
)
|
||||
from .dplay import DPlayIE
|
||||
from .dplay import (
|
||||
DPlayIE,
|
||||
DiscoveryPlusIE,
|
||||
HGTVDeIE,
|
||||
)
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
@@ -416,6 +427,7 @@ from .gamestar import GameStarIE
|
||||
from .gaskrank import GaskrankIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .gedidigital import GediDigitalIE
|
||||
from .generic import GenericIE
|
||||
from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
@@ -470,8 +482,8 @@ from .hungama import (
|
||||
from .hypem import HypemIE
|
||||
from .ign import (
|
||||
IGNIE,
|
||||
OneUPIE,
|
||||
PCMagIE,
|
||||
IGNVideoIE,
|
||||
IGNArticleIE,
|
||||
)
|
||||
from .iheart import (
|
||||
IHeartRadioIE,
|
||||
@@ -526,7 +538,10 @@ from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
KhanAcademyUnitIE,
|
||||
)
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
@@ -583,7 +598,11 @@ from .limelight import (
|
||||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import LineTVIE
|
||||
from .line import (
|
||||
LineTVIE,
|
||||
LineLiveIE,
|
||||
LineLiveChannelIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInLearningIE,
|
||||
LinkedInLearningCourseIE,
|
||||
@@ -591,10 +610,6 @@ from .linkedin import (
|
||||
from .linuxacademy import LinuxAcademyIE
|
||||
from .litv import LiTVIE
|
||||
from .livejournal import LiveJournalIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
LiveLeakEmbedIE,
|
||||
)
|
||||
from .livestream import (
|
||||
LivestreamIE,
|
||||
LivestreamOriginalIE,
|
||||
@@ -620,6 +635,7 @@ from .mangomolo import (
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .manyvids import ManyVidsIE
|
||||
from .maoritv import MaoriTVIE
|
||||
from .markiza import (
|
||||
MarkizaIE,
|
||||
MarkizaPageIE,
|
||||
@@ -648,6 +664,11 @@ from .microsoftvirtualacademy import (
|
||||
MicrosoftVirtualAcademyIE,
|
||||
MicrosoftVirtualAcademyCourseIE,
|
||||
)
|
||||
from .minds import (
|
||||
MindsIE,
|
||||
MindsChannelIE,
|
||||
MindsGroupIE,
|
||||
)
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
@@ -658,7 +679,10 @@ from .mixcloud import (
|
||||
MixcloudUserIE,
|
||||
MixcloudPlaylistIE,
|
||||
)
|
||||
from .mlb import MLBIE
|
||||
from .mlb import (
|
||||
MLBIE,
|
||||
MLBVideoIE,
|
||||
)
|
||||
from .mnet import MnetIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import (
|
||||
@@ -859,6 +883,11 @@ from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
)
|
||||
from .palcomp3 import (
|
||||
PalcoMP3IE,
|
||||
PalcoMP3ArtistIE,
|
||||
PalcoMP3VideoIE,
|
||||
)
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
@@ -892,6 +921,7 @@ from .platzi import (
|
||||
from .playfm import PlayFMIE
|
||||
from .playplustv import PlayPlusTVIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playstuff import PlayStuffIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
@@ -1016,6 +1046,7 @@ from .safari import (
|
||||
SafariApiIE,
|
||||
SafariCourseIE,
|
||||
)
|
||||
from .samplefocus import SampleFocusIE
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
@@ -1048,6 +1079,11 @@ from .shared import (
|
||||
VivoIE,
|
||||
)
|
||||
from .showroomlive import ShowRoomLiveIE
|
||||
from .simplecast import (
|
||||
SimplecastIE,
|
||||
SimplecastEpisodeIE,
|
||||
SimplecastPodcastIE,
|
||||
)
|
||||
from .sina import SinaIE
|
||||
from .sixplay import SixPlayIE
|
||||
from .skyit import (
|
||||
@@ -1113,6 +1149,10 @@ from .stitcher import (
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .spotify import (
|
||||
SpotifyIE,
|
||||
SpotifyShowIE,
|
||||
)
|
||||
from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerPageIE,
|
||||
@@ -1128,6 +1168,11 @@ from .srgssr import (
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .storyfire import (
|
||||
StoryFireIE,
|
||||
StoryFireUserIE,
|
||||
StoryFireSeriesIE,
|
||||
)
|
||||
from .streamable import StreamableIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
@@ -1226,6 +1271,10 @@ from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trovo import (
|
||||
TrovoIE,
|
||||
TrovoVodIE,
|
||||
)
|
||||
from .trunews import TruNewsIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
@@ -1244,6 +1293,7 @@ from .tv2 import (
|
||||
TV2IE,
|
||||
TV2ArticleIE,
|
||||
KatsomoIE,
|
||||
MTVUutisetArticleIE,
|
||||
)
|
||||
from .tv2dk import (
|
||||
TV2DKIE,
|
||||
@@ -1382,7 +1432,6 @@ from .vidme import (
|
||||
VidmeUserIE,
|
||||
VidmeUserLikesIE,
|
||||
)
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewlift import (
|
||||
ViewLiftIE,
|
||||
@@ -1442,6 +1491,7 @@ from .vrv import (
|
||||
VRVSeriesIE,
|
||||
)
|
||||
from .vshare import VShareIE
|
||||
from .vtm import VTMIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
@@ -1585,5 +1635,10 @@ from .zattoo import (
|
||||
ZattooLiveIE,
|
||||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
from .zhihu import ZhihuIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3IE,
|
||||
ZingMp3AlbumIE,
|
||||
)
|
||||
from .zoom import ZoomIE
|
||||
from .zype import ZypeIE
|
||||
|
||||
@@ -521,7 +521,10 @@ class FacebookIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||
expected=True)
|
||||
elif '>You must log in to continue' in webpage:
|
||||
elif any(p in webpage for p in (
|
||||
'>You must log in to continue',
|
||||
'id="login_form"',
|
||||
'id="loginbutton"')):
|
||||
self.raise_login_required()
|
||||
|
||||
if not video_data and '/watchparty/' in url:
|
||||
|
||||
@@ -5,29 +5,23 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class Formula1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P<id>\d+)\.html'
|
||||
_TEST = {
|
||||
'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html',
|
||||
'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8',
|
||||
'info_dict': {
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'id': '6060988138001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
'timestamp': 1463332814,
|
||||
'upload_date': '20160515',
|
||||
'uploader_id': '6057949432001',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
ooyala_embed_code = self._search_regex(
|
||||
r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
|
||||
bc_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id)
|
||||
|
||||
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class FranceCultureIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
||||
'info_dict': {
|
||||
'id': 'rendez-vous-au-pays-des-geeks',
|
||||
@@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
|
||||
'title': 'Rendez-vous au pays des geeks',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140301',
|
||||
'timestamp': 1393642916,
|
||||
'timestamp': 1393700400,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# no thumbnail
|
||||
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -36,19 +40,19 @@ class FranceCultureIE(InfoExtractor):
|
||||
</h1>|
|
||||
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
||||
).*?
|
||||
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
|
||||
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
|
||||
''',
|
||||
webpage, 'video data'))
|
||||
|
||||
video_url = video_data['data-asset-source']
|
||||
title = video_data.get('data-asset-title') or self._og_search_title(webpage)
|
||||
video_url = video_data.get('data-url') or video_data['data-asset-source']
|
||||
title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
||||
webpage, 'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
webpage, 'thumbnail', default=None)
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<span class="author">(.*?)</span>',
|
||||
webpage, 'uploader', default=None)
|
||||
@@ -64,6 +68,6 @@ class FranceCultureIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if ext == 'mp3' else None,
|
||||
'uploader': uploader,
|
||||
'timestamp': int_or_none(video_data.get('data-asset-created-date')),
|
||||
'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
|
||||
'duration': int_or_none(video_data.get('data-duration')),
|
||||
}
|
||||
|
||||
@@ -383,6 +383,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# "<figure id=" pattern (#28792)
|
||||
'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -399,7 +403,8 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
video_id = self._search_regex(
|
||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||
r'id-video=([^@]+@[^"]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||
webpage, 'video id')
|
||||
|
||||
return self._make_url_result(video_id)
|
||||
|
||||
@@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
|
||||
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4')
|
||||
for f in formats:
|
||||
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||
if wh:
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FunimationIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_NETRC_MACHINE = 'funimation'
|
||||
_TOKEN = None
|
||||
@@ -51,6 +51,10 @@ class FunimationIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with lang code
|
||||
'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
|
||||
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
urlencode_postdata,
|
||||
@@ -102,6 +103,26 @@ class GDCVaultIE(InfoExtractor):
|
||||
'format': 'mp4-408',
|
||||
},
|
||||
},
|
||||
{
|
||||
# Kaltura embed, whitespace between quote and embedded URL in iframe's src
|
||||
'url': 'https://www.gdcvault.com/play/1025699',
|
||||
'info_dict': {
|
||||
'id': '0_zagynv0a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tech Toolbox',
|
||||
'upload_date': '20190408',
|
||||
'uploader_id': 'joe@blazestreaming.com',
|
||||
'timestamp': 1554764629,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# HTML5 video
|
||||
'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _login(self, webpage_url, display_id):
|
||||
@@ -175,7 +196,18 @@ class GDCVaultIE(InfoExtractor):
|
||||
|
||||
xml_name = self._html_search_regex(
|
||||
r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
|
||||
start_page, 'xml filename')
|
||||
start_page, 'xml filename', default=None)
|
||||
if not xml_name:
|
||||
info = self._parse_html5_media_entries(url, start_page, video_id)[0]
|
||||
info.update({
|
||||
'title': remove_start(self._search_regex(
|
||||
r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
|
||||
'title', default=None) or self._og_search_title(
|
||||
start_page, default=None), 'GDC Vault - '),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info
|
||||
embed_url = '%s/xml/%s' % (xml_root, xml_name)
|
||||
ie_key = 'DigitallySpeaking'
|
||||
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GediDigitalIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://video\.
|
||||
(?:
|
||||
(?:
|
||||
(?:espresso\.)?repubblica
|
||||
|lastampa
|
||||
|ilsecoloxix
|
||||
)|
|
||||
(?:
|
||||
iltirreno
|
||||
|messaggeroveneto
|
||||
|ilpiccolo
|
||||
|gazzettadimantova
|
||||
|mattinopadova
|
||||
|laprovinciapavese
|
||||
|tribunatreviso
|
||||
|nuovavenezia
|
||||
|gazzettadimodena
|
||||
|lanuovaferrara
|
||||
|corrierealpi
|
||||
|lasentinella
|
||||
)\.gelocal
|
||||
)\.it(?:/[^/]+){2,3}?/(?P<id>\d+)(?:[/?&#]|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
|
||||
'md5': '84658d7fb9e55a6e57ecc77b73137494',
|
||||
'info_dict': {
|
||||
'id': '121559',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
|
||||
'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-full-.+?\.jpg$',
|
||||
'duration': 125,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.messaggeroveneto.gelocal.it/locale/maria-giovanna-elmi-covid-vaccino/138155/139268',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.ilpiccolo.gelocal.it/dossier/big-john/dinosauro-big-john-al-via-le-visite-guidate-a-trieste/135226/135751',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.gazzettadimantova.gelocal.it/locale/dal-ponte-visconteo-di-valeggio-l-and-8217sos-dei-ristoratori-aprire-anche-a-cena/137310/137818',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.mattinopadova.gelocal.it/dossier/coronavirus-in-veneto/covid-a-vo-un-anno-dopo-un-cuore-tricolore-per-non-dimenticare/138402/138964',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.laprovinciapavese.gelocal.it/locale/mede-zona-rossa-via-alle-vaccinazioni-per-gli-over-80/137545/138120',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.tribunatreviso.gelocal.it/dossier/coronavirus-in-veneto/ecco-le-prima-vaccinazioni-di-massa-nella-marca/134485/135024',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.nuovavenezia.gelocal.it/locale/camion-troppo-alto-per-il-ponte-ferroviario-perde-il-carico/135734/136266',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.gazzettadimodena.gelocal.it/locale/modena-scoperta-la-proteina-che-predice-il-livello-di-gravita-del-covid/139109/139796',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.lanuovaferrara.gelocal.it/locale/due-bombole-di-gpl-aperte-e-abbandonate-i-vigili-bruciano-il-gas/134391/134957',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.corrierealpi.gelocal.it/dossier/cortina-2021-i-mondiali-di-sci-alpino/mondiali-di-sci-il-timelapse-sulla-splendida-olympia/133760/134331',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.lasentinella.gelocal.it/locale/vestigne-centra-un-auto-e-si-ribalta/138931/139466',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.espresso.repubblica.it/tutti-i-video/01-ted-villa/14772',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_meta(
|
||||
['twitter:title', 'og:title'], webpage, fatal=True)
|
||||
player_data = re.findall(
|
||||
r"PlayerFactory\.setParam\('(?P<type>format|param)',\s*'(?P<name>[^']+)',\s*'(?P<val>[^']+)'\);",
|
||||
webpage)
|
||||
|
||||
formats = []
|
||||
duration = thumb = None
|
||||
for t, n, v in player_data:
|
||||
if t == 'format':
|
||||
if n in ('video-hds-vod-ec', 'video-hls-vod-ec', 'video-viralize', 'video-youtube-pfp'):
|
||||
continue
|
||||
elif n.endswith('-vod-ak'):
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
v, video_id, {'http': 'media.gedidigital.it'}))
|
||||
else:
|
||||
ext = determine_ext(v)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v, video_id, 'mp4', 'm3u8_native', m3u8_id=n, fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'format_id': n,
|
||||
'url': v,
|
||||
}
|
||||
if ext == 'mp3':
|
||||
abr = int_or_none(self._search_regex(
|
||||
r'-mp3-audio-(\d+)', v, 'abr', default=None))
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'tbr': abr,
|
||||
'vcodec': 'none'
|
||||
})
|
||||
else:
|
||||
mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n)
|
||||
if mobj:
|
||||
f.update({
|
||||
'height': int(mobj.group(1)),
|
||||
'vbr': int_or_none(mobj.group(2)),
|
||||
})
|
||||
if not f.get('vbr'):
|
||||
f['vbr'] = int_or_none(self._search_regex(
|
||||
r'-video-rrtv-(\d+)', v, 'abr', default=None))
|
||||
formats.append(f)
|
||||
elif t == 'param':
|
||||
if n in ['image_full', 'image']:
|
||||
thumb = v
|
||||
elif n == 'videoDuration':
|
||||
duration = int_or_none(v)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
['twitter:description', 'og:description', 'description'], webpage),
|
||||
'thumbnail': thumb or self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
}
|
||||
@@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .arkena import ArkenaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .kaltura import KalturaIE
|
||||
@@ -126,8 +125,11 @@ from .viqeo import ViqeoIE
|
||||
from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .vk import VKIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .simplecast import SimplecastIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -1626,31 +1628,6 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20160409',
|
||||
},
|
||||
},
|
||||
# LiveLeak embed
|
||||
{
|
||||
'url': 'http://www.wykop.pl/link/3088787/',
|
||||
'md5': '7619da8c820e835bef21a1efa2a0fc71',
|
||||
'info_dict': {
|
||||
'id': '874_1459135191',
|
||||
'ext': 'mp4',
|
||||
'title': 'Man shows poor quality of new apartment building',
|
||||
'description': 'The wall is like a sand pile.',
|
||||
'uploader': 'Lake8737',
|
||||
},
|
||||
'add_ie': [LiveLeakIE.ie_key()],
|
||||
},
|
||||
# Another LiveLeak embed pattern (#13336)
|
||||
{
|
||||
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
|
||||
'info_dict': {
|
||||
'id': '2eb_1496309988',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thief robs place where everyone was armed',
|
||||
'description': 'md5:694d73ee79e535953cf2488562288eee',
|
||||
'uploader': 'brazilwtf',
|
||||
},
|
||||
'add_ie': [LiveLeakIE.ie_key()],
|
||||
},
|
||||
# Duplicated embedded video URLs
|
||||
{
|
||||
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
||||
@@ -2223,6 +2200,34 @@ class GenericIE(InfoExtractor):
|
||||
'duration': 1581,
|
||||
},
|
||||
},
|
||||
{
|
||||
# MyChannels SDK embed
|
||||
# https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
|
||||
'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
|
||||
'md5': '90c0699c37006ef18e198c032d81739c',
|
||||
'info_dict': {
|
||||
'id': '194165',
|
||||
'ext': 'mp4',
|
||||
'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
|
||||
'timestamp': 1611740340,
|
||||
'upload_date': '20210127',
|
||||
'duration': 159,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Simplecast player embed
|
||||
'url': 'https://www.bio.org/podcast',
|
||||
'info_dict': {
|
||||
'id': 'podcast',
|
||||
'title': 'I AM BIO Podcast | BIO',
|
||||
},
|
||||
'playlist_mincount': 52,
|
||||
},
|
||||
{
|
||||
# Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
|
||||
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -2462,6 +2467,9 @@ class GenericIE(InfoExtractor):
|
||||
webpage = self._webpage_read_content(
|
||||
full_response, url, video_id, prefix=first_bytes)
|
||||
|
||||
if '<title>DPG Media Privacy Gate</title>' in webpage:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
|
||||
@@ -2593,6 +2601,11 @@ class GenericIE(InfoExtractor):
|
||||
if arc_urls:
|
||||
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
|
||||
|
||||
mychannels_urls = MedialaanIE._extract_urls(webpage)
|
||||
if mychannels_urls:
|
||||
return self.playlist_from_matches(
|
||||
mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||
@@ -2744,6 +2757,11 @@ class GenericIE(InfoExtractor):
|
||||
if odnoklassniki_url:
|
||||
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
|
||||
|
||||
# Look for sibnet embedded player
|
||||
sibnet_urls = VKIE._extract_sibnet_urls(webpage)
|
||||
if sibnet_urls:
|
||||
return self.playlist_from_matches(sibnet_urls, video_id, video_title)
|
||||
|
||||
# Look for embedded ivi player
|
||||
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
@@ -2769,6 +2787,12 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
|
||||
|
||||
# Look for Simplecast embeds
|
||||
simplecast_urls = SimplecastIE._extract_urls(webpage)
|
||||
if simplecast_urls:
|
||||
return self.playlist_from_matches(
|
||||
simplecast_urls, video_id, video_title)
|
||||
|
||||
# Look for BBC iPlayer embed
|
||||
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||
if matches:
|
||||
@@ -2914,7 +2938,7 @@ class GenericIE(InfoExtractor):
|
||||
webpage)
|
||||
if not mobj:
|
||||
mobj = re.search(
|
||||
r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
|
||||
r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'MLB')
|
||||
@@ -3129,11 +3153,6 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(
|
||||
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
|
||||
|
||||
# Look for LiveLeak embeds
|
||||
liveleak_urls = LiveLeakIE._extract_urls(webpage)
|
||||
if liveleak_urls:
|
||||
return self.playlist_from_matches(liveleak_urls, video_id, video_title)
|
||||
|
||||
# Look for 3Q SDN embeds
|
||||
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
|
||||
if threeqsdn_url:
|
||||
@@ -3361,6 +3380,9 @@ class GenericIE(InfoExtractor):
|
||||
'url': src,
|
||||
'ext': (mimetype2ext(src_type)
|
||||
or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
|
||||
'http_headers': {
|
||||
'Referer': full_response.geturl(),
|
||||
},
|
||||
})
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
@@ -3429,7 +3451,7 @@ class GenericIE(InfoExtractor):
|
||||
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
||||
found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
|
||||
if not found:
|
||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||
found = re.search(
|
||||
|
||||
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -116,6 +118,18 @@ class GoIE(AdobePassIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
|
||||
'info_dict': {
|
||||
'id': 'VDKA22600213',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pilot',
|
||||
'description': 'md5:74306df917cfc199d76d061d66bebdb4',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'only_matching': True,
|
||||
@@ -149,14 +163,30 @@ class GoIE(AdobePassIE):
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
video_id = self._search_regex(
|
||||
(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)',
|
||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||
), webpage, 'video id', default=video_id)
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
|
||||
'data', default='{}'),
|
||||
display_id or video_id, fatal=False)
|
||||
# https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
|
||||
layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
|
||||
video_id = None
|
||||
if layout:
|
||||
video_id = try_get(
|
||||
layout,
|
||||
(lambda x: x['videoid'], lambda x: x['video']['id']),
|
||||
compat_str)
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)',
|
||||
# page.analytics.videoIdCode
|
||||
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
|
||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||
), webpage, 'video id', default=video_id)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
|
||||
@@ -7,6 +7,7 @@ from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
try_get,
|
||||
@@ -237,7 +238,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
if confirmation_webpage:
|
||||
confirm = self._search_regex(
|
||||
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||
'confirmation code', fatal=False)
|
||||
'confirmation code', default=None)
|
||||
if confirm:
|
||||
confirmed_source_url = update_url_query(source_url, {
|
||||
'confirm': confirm,
|
||||
@@ -245,6 +246,11 @@ class GoogleDriveIE(InfoExtractor):
|
||||
urlh = request_source_file(confirmed_source_url, 'confirmed source')
|
||||
if urlh and urlh.headers.get('Content-Disposition'):
|
||||
add_source_format(urlh)
|
||||
else:
|
||||
self.report_warning(
|
||||
get_element_by_class('uc-error-subcaption', confirmation_webpage)
|
||||
or get_element_by_class('uc-error-caption', confirmation_webpage)
|
||||
or 'unable to extract confirmation code')
|
||||
|
||||
if not formats and reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
||||
+198
-173
@@ -3,230 +3,255 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class IGNIE(InfoExtractor):
|
||||
class IGNBaseIE(InfoExtractor):
|
||||
def _call_api(self, slug):
|
||||
return self._download_json(
|
||||
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
|
||||
|
||||
|
||||
class IGNIE(IGNBaseIE):
|
||||
"""
|
||||
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
||||
Some videos of it.ign.com are also supported
|
||||
"""
|
||||
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:[^/]+/)?(?P<type>videos|show_videos|articles|feature|(?:[^/]+/\d+/video))(/.+)?/(?P<name_or_id>.+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
|
||||
IE_NAME = 'ign.com'
|
||||
_PAGE_TYPE = 'video'
|
||||
|
||||
_API_URL_TEMPLATE = 'http://apis.ign.com/video/v3/videos/%s'
|
||||
_EMBED_RE = r'<iframe[^>]+?["\']((?:https?:)?//.+?\.ign\.com.+?/embed.+?)["\']'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
||||
'md5': 'febda82c4bafecd2d44b6e1a18a595f8',
|
||||
'info_dict': {
|
||||
'id': '8f862beef863986b2785559b9e1aa599',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Last of Us Review',
|
||||
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
||||
'timestamp': 1370440800,
|
||||
'upload_date': '20130605',
|
||||
'uploader_id': 'cberidon@ign.com',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||
'info_dict': {
|
||||
'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '5ebbd138523268b93c9141af17bec937',
|
||||
'ext': 'mp4',
|
||||
'title': 'GTA 5 Video Review',
|
||||
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||
'timestamp': 1379339880,
|
||||
'upload_date': '20130916',
|
||||
'uploader_id': 'danieljkrupa@gmail.com',
|
||||
},
|
||||
},
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '638672ee848ae4ff108df2a296418ee2',
|
||||
'ext': 'mp4',
|
||||
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
|
||||
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||
'timestamp': 1386878820,
|
||||
'upload_date': '20131212',
|
||||
'uploader_id': 'togilvie@ign.com',
|
||||
},
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||
'md5': '618fedb9c901fd086f6f093564ef8558',
|
||||
'info_dict': {
|
||||
'id': '078fdd005f6d3c02f63d795faa1b984f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||
'description': 'Brian and Jared explore Michel Ancel\'s captivating new preview.',
|
||||
'timestamp': 1408047180,
|
||||
'upload_date': '20140814',
|
||||
'uploader_id': 'jamesduggan1990@gmail.com',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# videoId pattern
|
||||
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _find_video_id(self, webpage):
|
||||
res_id = [
|
||||
r'"video_id"\s*:\s*"(.*?)"',
|
||||
r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
|
||||
r'data-video-id="(.+?)"',
|
||||
r'<object id="vid_(.+?)"',
|
||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||
r'videoId"\s*:\s*"(.+?)"',
|
||||
r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
|
||||
]
|
||||
return self._search_regex(res_id, webpage, 'video id', default=None)
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
||||
'md5': 'd2e1586d9987d40fad7867bf96a018ea',
|
||||
'info_dict': {
|
||||
'id': '8f862beef863986b2785559b9e1aa599',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Last of Us Review',
|
||||
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
||||
'timestamp': 1370440800,
|
||||
'upload_date': '20130605',
|
||||
'tags': 'count:9',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
|
||||
'info_dict': {
|
||||
'id': 'ee10d774b508c9b8ec07e763b9125b91',
|
||||
'ext': 'mp4',
|
||||
'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
|
||||
'description': 'md5:817a20299de610bd56f13175386da6fa',
|
||||
'timestamp': 1420571160,
|
||||
'upload_date': '20150106',
|
||||
'tags': 'count:4',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name_or_id = mobj.group('name_or_id')
|
||||
page_type = mobj.group('type')
|
||||
webpage = self._download_webpage(url, name_or_id)
|
||||
if page_type != 'video':
|
||||
multiple_urls = re.findall(
|
||||
r'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
||||
webpage)
|
||||
if multiple_urls:
|
||||
entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': name_or_id,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
video_id = self._find_video_id(webpage)
|
||||
if not video_id:
|
||||
return self.url_result(self._search_regex(
|
||||
self._EMBED_RE, webpage, 'embed url'))
|
||||
return self._get_video_info(video_id)
|
||||
|
||||
def _get_video_info(self, video_id):
|
||||
api_data = self._download_json(
|
||||
self._API_URL_TEMPLATE % video_id, video_id)
|
||||
display_id = self._match_id(url)
|
||||
video = self._call_api(display_id)
|
||||
video_id = video['videoId']
|
||||
metadata = video['metadata']
|
||||
title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
|
||||
|
||||
formats = []
|
||||
m3u8_url = api_data['refs'].get('m3uUrl')
|
||||
refs = video.get('refs') or {}
|
||||
|
||||
m3u8_url = refs.get('m3uUrl')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
f4m_url = api_data['refs'].get('f4mUrl')
|
||||
|
||||
f4m_url = refs.get('f4mUrl')
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
for asset in api_data['assets']:
|
||||
|
||||
for asset in (video.get('assets') or []):
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': asset['url'],
|
||||
'tbr': asset.get('actual_bitrate_kbps'),
|
||||
'fps': asset.get('frame_rate'),
|
||||
'url': asset_url,
|
||||
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
||||
'fps': int_or_none(asset.get('frame_rate')),
|
||||
'height': int_or_none(asset.get('height')),
|
||||
'width': int_or_none(asset.get('width')),
|
||||
})
|
||||
|
||||
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
|
||||
if mezzanine_url:
|
||||
formats.append({
|
||||
'ext': determine_ext(mezzanine_url, 'mp4'),
|
||||
'format_id': 'mezzanine',
|
||||
'preference': 1,
|
||||
'url': mezzanine_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail['url']
|
||||
} for thumbnail in api_data.get('thumbnails', [])]
|
||||
thumbnails = []
|
||||
for thumbnail in (video.get('thumbnails') or []):
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
})
|
||||
|
||||
metadata = api_data['metadata']
|
||||
tags = []
|
||||
for tag in (video.get('tags') or []):
|
||||
display_name = tag.get('displayName')
|
||||
if not display_name:
|
||||
continue
|
||||
tags.append(display_name)
|
||||
|
||||
return {
|
||||
'id': api_data.get('videoId') or video_id,
|
||||
'title': metadata.get('longTitle') or metadata.get('name') or metadata.get['title'],
|
||||
'description': metadata.get('description'),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(metadata.get('description')),
|
||||
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
'display_id': metadata.get('slug') or video_id,
|
||||
'uploader_id': metadata.get('creator'),
|
||||
'display_id': display_id,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'tags': tags,
|
||||
}
|
||||
|
||||
|
||||
class OneUPIE(IGNIE):
|
||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
|
||||
IE_NAME = '1up.com'
|
||||
|
||||
class IGNVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
|
||||
_TESTS = [{
|
||||
'url': 'http://gamevideos.1up.com/video/id/34976.html',
|
||||
'md5': 'c9cc69e07acb675c31a16719f909e347',
|
||||
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
||||
'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
|
||||
'info_dict': {
|
||||
'id': '34976',
|
||||
'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sniper Elite V2 - Trailer',
|
||||
'description': 'md5:bf0516c5ee32a3217aa703e9b1bc7826',
|
||||
'timestamp': 1313099220,
|
||||
'upload_date': '20110811',
|
||||
'uploader_id': 'IGN',
|
||||
'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
|
||||
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
|
||||
'timestamp': 1444665600,
|
||||
'upload_date': '20151012',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Youtube embed
|
||||
'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Twitter embed
|
||||
'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Vimeo embed
|
||||
'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
result = super(OneUPIE, self)._real_extract(url)
|
||||
result['id'] = mobj.group('name_or_id')
|
||||
return result
|
||||
video_id = self._match_id(url)
|
||||
req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
|
||||
url = self._request_webpage(req, video_id).geturl()
|
||||
ign_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
|
||||
if ign_url:
|
||||
return self.url_result(ign_url, IGNIE.ie_key())
|
||||
return self.url_result(url)
|
||||
|
||||
|
||||
class PCMagIE(IGNIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
|
||||
IE_NAME = 'pcmag'
|
||||
|
||||
_EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
|
||||
|
||||
class IGNArticleIE(IGNBaseIE):
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
|
||||
_PAGE_TYPE = 'article'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||
'md5': '212d6154fd0361a2781075f1febbe9ad',
|
||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||
'info_dict': {
|
||||
'id': 'ee10d774b508c9b8ec07e763b9125b91',
|
||||
'ext': 'mp4',
|
||||
'title': '010615_What\'s New Now: Is GoGo Snooping on Your Data?',
|
||||
'description': 'md5:a7071ae64d2f68cc821c729d4ded6bb3',
|
||||
'timestamp': 1420571160,
|
||||
'upload_date': '20150106',
|
||||
'uploader_id': 'cozzipix@gmail.com',
|
||||
}
|
||||
'id': '524497489e4e8ff5848ece34',
|
||||
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '5ebbd138523268b93c9141af17bec937',
|
||||
'ext': 'mp4',
|
||||
'title': 'GTA 5 Video Review',
|
||||
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||
'timestamp': 1379339880,
|
||||
'upload_date': '20130916',
|
||||
},
|
||||
},
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '638672ee848ae4ff108df2a296418ee2',
|
||||
'ext': 'mp4',
|
||||
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
|
||||
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||
'timestamp': 1386878820,
|
||||
'upload_date': '20131212',
|
||||
},
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'playlist_items': '2-3',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp',
|
||||
'md5': '94130c1ca07ba0adb6088350681f16c1',
|
||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||
'info_dict': {
|
||||
'id': '042e560ba94823d43afcb12ddf7142ca',
|
||||
'ext': 'mp4',
|
||||
'title': 'HTC\'s Weird New Re Camera - What\'s New Now',
|
||||
'description': 'md5:53433c45df96d2ea5d0fda18be2ca908',
|
||||
'timestamp': 1412953920,
|
||||
'upload_date': '20141010',
|
||||
'uploader_id': 'chris_snyder@pcmag.com',
|
||||
}
|
||||
'id': '53ee806780a81ec46e0790f8',
|
||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# videoId pattern
|
||||
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Youtube embed
|
||||
'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# IMDB embed
|
||||
'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Facebook embed
|
||||
'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Brightcove embed
|
||||
'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
article = self._call_api(display_id)
|
||||
|
||||
def entries():
|
||||
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
|
||||
if media_url:
|
||||
yield self.url_result(media_url, IGNIE.ie_key())
|
||||
for content in (article.get('content') or []):
|
||||
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
||||
yield self.url_result(video_url)
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), article.get('articleId'),
|
||||
strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
@@ -32,6 +33,7 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Video by naomipq',
|
||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': 'naomipq',
|
||||
@@ -48,6 +50,7 @@ class InstagramIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by britneyspears',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1453760977,
|
||||
'upload_date': '20160125',
|
||||
'uploader_id': 'britneyspears',
|
||||
@@ -86,6 +89,24 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Post by instagram',
|
||||
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
||||
},
|
||||
}, {
|
||||
# IGTV
|
||||
'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
|
||||
'info_dict': {
|
||||
'id': 'BkfuX9UB-eK',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fingerboarding Tricks with @cass.fb',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 53.83,
|
||||
'timestamp': 1530032919,
|
||||
'upload_date': '20180626',
|
||||
'uploader_id': 'instagram',
|
||||
'uploader': 'Instagram',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||
'only_matching': True,
|
||||
@@ -159,7 +180,9 @@ class InstagramIE(InfoExtractor):
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
title = media.get('title')
|
||||
thumbnail = media.get('display_src') or media.get('display_url')
|
||||
duration = float_or_none(media.get('video_duration'))
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
@@ -200,9 +223,10 @@ class InstagramIE(InfoExtractor):
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'title': node.get('title') or 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'duration': float_or_none(node.get('video_duration')),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
@@ -239,8 +263,9 @@ class InstagramIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by %s' % uploader_id,
|
||||
'title': title or 'Video by %s' % uploader_id,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
|
||||
@@ -29,34 +29,51 @@ class JamendoIE(InfoExtractor):
|
||||
'id': '196219',
|
||||
'display_id': 'stories-from-emona-i',
|
||||
'ext': 'flac',
|
||||
'title': 'Maya Filipič - Stories from Emona I',
|
||||
'artist': 'Maya Filipič',
|
||||
# 'title': 'Maya Filipič - Stories from Emona I',
|
||||
'title': 'Stories from Emona I',
|
||||
# 'artist': 'Maya Filipič',
|
||||
'track': 'Stories from Emona I',
|
||||
'duration': 210,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1217438117,
|
||||
'upload_date': '20080730',
|
||||
'license': 'by-nc-nd',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'average_rating': int,
|
||||
'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, resource, resource_id):
|
||||
path = '/api/%ss' % resource
|
||||
rand = compat_str(random.random())
|
||||
return self._download_json(
|
||||
'https://www.jamendo.com' + path, resource_id, query={
|
||||
'id[]': resource_id,
|
||||
}, headers={
|
||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||
})[0]
|
||||
|
||||
def _real_extract(self, url):
|
||||
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
||||
webpage = self._download_webpage(
|
||||
'https://www.jamendo.com/track/' + track_id, track_id)
|
||||
models = self._parse_json(self._html_search_regex(
|
||||
r"data-bundled-models='([^']+)",
|
||||
webpage, 'bundled models'), track_id)
|
||||
track = models['track']['models'][0]
|
||||
# webpage = self._download_webpage(
|
||||
# 'https://www.jamendo.com/track/' + track_id, track_id)
|
||||
# models = self._parse_json(self._html_search_regex(
|
||||
# r"data-bundled-models='([^']+)",
|
||||
# webpage, 'bundled models'), track_id)
|
||||
# track = models['track']['models'][0]
|
||||
track = self._call_api('track', track_id)
|
||||
title = track_name = track['name']
|
||||
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||
artist = get_model('artist')
|
||||
artist_name = artist.get('name')
|
||||
if artist_name:
|
||||
title = '%s - %s' % (artist_name, title)
|
||||
album = get_model('album')
|
||||
# get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||
# artist = get_model('artist')
|
||||
# artist_name = artist.get('name')
|
||||
# if artist_name:
|
||||
# title = '%s - %s' % (artist_name, title)
|
||||
# album = get_model('album')
|
||||
|
||||
formats = [{
|
||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||
@@ -74,7 +91,7 @@ class JamendoIE(InfoExtractor):
|
||||
|
||||
urls = []
|
||||
thumbnails = []
|
||||
for _, covers in track.get('cover', {}).items():
|
||||
for covers in (track.get('cover') or {}).values():
|
||||
for cover_id, cover_url in covers.items():
|
||||
if not cover_url or cover_url in urls:
|
||||
continue
|
||||
@@ -88,13 +105,14 @@ class JamendoIE(InfoExtractor):
|
||||
})
|
||||
|
||||
tags = []
|
||||
for tag in track.get('tags', []):
|
||||
for tag in (track.get('tags') or []):
|
||||
tag_name = tag.get('name')
|
||||
if not tag_name:
|
||||
continue
|
||||
tags.append(tag_name)
|
||||
|
||||
stats = track.get('stats') or {}
|
||||
license = track.get('licenseCC') or []
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
@@ -103,11 +121,11 @@ class JamendoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': track.get('description'),
|
||||
'duration': int_or_none(track.get('duration')),
|
||||
'artist': artist_name,
|
||||
# 'artist': artist_name,
|
||||
'track': track_name,
|
||||
'album': album.get('name'),
|
||||
# 'album': album.get('name'),
|
||||
'formats': formats,
|
||||
'license': '-'.join(track.get('licenseCC', [])) or None,
|
||||
'license': '-'.join(license) if license else None,
|
||||
'timestamp': int_or_none(track.get('dateCreated')),
|
||||
'view_count': int_or_none(stats.get('listenedAll')),
|
||||
'like_count': int_or_none(stats.get('favorited')),
|
||||
@@ -116,9 +134,9 @@ class JamendoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class JamendoAlbumIE(InfoExtractor):
|
||||
class JamendoAlbumIE(JamendoIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||
'info_dict': {
|
||||
'id': '121486',
|
||||
@@ -151,17 +169,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
}
|
||||
}
|
||||
|
||||
def _call_api(self, resource, resource_id):
|
||||
path = '/api/%ss' % resource
|
||||
rand = compat_str(random.random())
|
||||
return self._download_json(
|
||||
'https://www.jamendo.com' + path, resource_id, query={
|
||||
'id[]': resource_id,
|
||||
}, headers={
|
||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||
})[0]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
@@ -169,7 +177,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||
album_name = album.get('name')
|
||||
|
||||
entries = []
|
||||
for track in album.get('tracks', []):
|
||||
for track in (album.get('tracks') or []):
|
||||
track_id = track.get('id')
|
||||
if not track_id:
|
||||
continue
|
||||
|
||||
@@ -3,10 +3,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
@@ -23,7 +26,7 @@ class KakaoIE(InfoExtractor):
|
||||
'id': '301965083',
|
||||
'ext': 'mp4',
|
||||
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
|
||||
'uploader_id': 2671005,
|
||||
'uploader_id': '2671005',
|
||||
'uploader': '그랑그랑이',
|
||||
'timestamp': 1488160199,
|
||||
'upload_date': '20170227',
|
||||
@@ -36,11 +39,15 @@ class KakaoIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||
'uploader_id': 2653210,
|
||||
'uploader_id': '2653210',
|
||||
'uploader': '쇼! 음악중심',
|
||||
'timestamp': 1485684628,
|
||||
'upload_date': '20170129',
|
||||
}
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -68,8 +75,7 @@ class KakaoIE(InfoExtractor):
|
||||
'fields': ','.join([
|
||||
'-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
|
||||
'description', 'channelId', 'createTime', 'duration', 'playCount',
|
||||
'likeCount', 'commentCount', 'tagList', 'channel', 'name',
|
||||
'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
|
||||
'likeCount', 'commentCount', 'tagList', 'channel', 'name', 'thumbnailUrl',
|
||||
'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
|
||||
}
|
||||
|
||||
@@ -82,24 +88,28 @@ class KakaoIE(InfoExtractor):
|
||||
|
||||
title = clip.get('title') or clip_link.get('displayTitle')
|
||||
|
||||
query['tid'] = impress.get('tid', '')
|
||||
query.update({
|
||||
'fields': '-*,code,message,url',
|
||||
'tid': impress.get('tid') or '',
|
||||
})
|
||||
|
||||
formats = []
|
||||
for fmt in clip.get('videoOutputList', []):
|
||||
for fmt in (clip.get('videoOutputList') or []):
|
||||
try:
|
||||
profile_name = fmt['profile']
|
||||
if profile_name == 'AUDIO':
|
||||
continue
|
||||
query.update({
|
||||
'profile': profile_name,
|
||||
'fields': '-*,url',
|
||||
})
|
||||
fmt_url_json = self._download_json(
|
||||
api_base + 'raw/videolocation', display_id,
|
||||
'Downloading video URL for profile %s' % profile_name,
|
||||
query=query, headers=player_header, fatal=False)
|
||||
|
||||
if fmt_url_json is None:
|
||||
query['profile'] = profile_name
|
||||
try:
|
||||
fmt_url_json = self._download_json(
|
||||
api_base + 'raw/videolocation', display_id,
|
||||
'Downloading video URL for profile %s' % profile_name,
|
||||
query=query, headers=player_header)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
resp = self._parse_json(e.cause.read().decode(), video_id)
|
||||
if resp.get('code') == 'GeoBlocked':
|
||||
self.raise_geo_restricted()
|
||||
continue
|
||||
|
||||
fmt_url = fmt_url_json['url']
|
||||
@@ -116,27 +126,13 @@ class KakaoIE(InfoExtractor):
|
||||
pass
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbs = []
|
||||
for thumb in clip.get('clipChapterThumbnailList', []):
|
||||
thumbs.append({
|
||||
'url': thumb.get('thumbnailUrl'),
|
||||
'id': compat_str(thumb.get('timeInSec')),
|
||||
'preference': -1 if thumb.get('isDefault') else 0
|
||||
})
|
||||
top_thumbnail = clip.get('thumbnailUrl')
|
||||
if top_thumbnail:
|
||||
thumbs.append({
|
||||
'url': top_thumbnail,
|
||||
'preference': 10,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(clip.get('description')),
|
||||
'uploader': clip_link.get('channel', {}).get('name'),
|
||||
'uploader_id': clip_link.get('channelId'),
|
||||
'thumbnails': thumbs,
|
||||
'uploader': try_get(clip_link, lambda x: x['channel']['name']),
|
||||
'uploader_id': str_or_none(clip_link.get('channelId')),
|
||||
'thumbnail': clip.get('thumbnailUrl'),
|
||||
'timestamp': unified_timestamp(clip_link.get('createTime')),
|
||||
'duration': int_or_none(clip.get('duration')),
|
||||
'view_count': int_or_none(clip.get('playCount')),
|
||||
|
||||
@@ -120,7 +120,7 @@ class KalturaIE(InfoExtractor):
|
||||
def _extract_urls(webpage):
|
||||
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
||||
finditer = (
|
||||
re.finditer(
|
||||
list(re.finditer(
|
||||
r"""(?xs)
|
||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||
\{.*?
|
||||
@@ -128,8 +128,8 @@ class KalturaIE(InfoExtractor):
|
||||
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
||||
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
||||
""", webpage)
|
||||
or re.finditer(
|
||||
""", webpage))
|
||||
or list(re.finditer(
|
||||
r'''(?xs)
|
||||
(?P<q1>["'])
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
@@ -142,16 +142,16 @@ class KalturaIE(InfoExtractor):
|
||||
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
|
||||
)
|
||||
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||
''', webpage)
|
||||
or re.finditer(
|
||||
''', webpage))
|
||||
or list(re.finditer(
|
||||
r'''(?xs)
|
||||
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
|
||||
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
|
||||
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
||||
(?:(?!(?P=q1)).)*
|
||||
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
|
||||
(?:(?!(?P=q1)).)*
|
||||
(?P=q1)
|
||||
''', webpage)
|
||||
''', webpage))
|
||||
)
|
||||
urls = []
|
||||
for mobj in finditer:
|
||||
|
||||
@@ -1,82 +1,107 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class KhanAcademyIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
IE_NAME = 'KhanAcademy'
|
||||
class KhanAcademyBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.khanacademy.org/video/one-time-pad',
|
||||
'md5': '7b391cce85e758fb94f763ddc1bbb979',
|
||||
def _parse_video(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video['youtubeId'],
|
||||
'id': video.get('slug'),
|
||||
'title': video.get('title'),
|
||||
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'description': video.get('description'),
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
component_props = self._parse_json(self._download_json(
|
||||
'https://www.khanacademy.org/api/internal/graphql',
|
||||
display_id, query={
|
||||
'hash': 1604303425,
|
||||
'variables': json.dumps({
|
||||
'path': display_id,
|
||||
'queryParams': '',
|
||||
}),
|
||||
})['data']['contentJson'], display_id)['componentProps']
|
||||
return self._parse_component_props(component_props)
|
||||
|
||||
|
||||
class KhanAcademyIE(KhanAcademyBaseIE):
|
||||
IE_NAME = 'khanacademy'
|
||||
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
|
||||
_TEST = {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
|
||||
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
|
||||
'info_dict': {
|
||||
'id': 'one-time-pad',
|
||||
'ext': 'webm',
|
||||
'id': 'FlIG3TvQCBQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'The one-time pad',
|
||||
'description': 'The perfect cipher',
|
||||
'duration': 176,
|
||||
'uploader': 'Brit Cruise',
|
||||
'uploader_id': 'khanacademy',
|
||||
'upload_date': '20120411',
|
||||
'timestamp': 1334170113,
|
||||
'license': 'cc-by-nc-sa',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
|
||||
}
|
||||
|
||||
def _parse_component_props(self, component_props):
|
||||
video = component_props['tutorialPageData']['contentModel']
|
||||
info = self._parse_video(video)
|
||||
author_names = video.get('authorNames')
|
||||
info.update({
|
||||
'uploader': ', '.join(author_names) if author_names else None,
|
||||
'timestamp': parse_iso8601(video.get('dateAdded')),
|
||||
'license': video.get('kaUserLicense'),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class KhanAcademyUnitIE(KhanAcademyBaseIE):
|
||||
IE_NAME = 'khanacademy:unit'
|
||||
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
|
||||
'info_dict': {
|
||||
'id': 'cryptography',
|
||||
'title': 'Journey into cryptography',
|
||||
'title': 'Cryptography',
|
||||
'description': 'How have humans protected their secret messages through history? What has changed today?',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
'playlist_mincount': 31,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
def _parse_component_props(self, component_props):
|
||||
curation = component_props['curation']
|
||||
|
||||
if m.group('key') == 'video':
|
||||
data = self._download_json(
|
||||
'http://api.khanacademy.org/api/v1/videos/' + video_id,
|
||||
video_id, 'Downloading video info')
|
||||
|
||||
upload_date = unified_strdate(data['date_added'])
|
||||
uploader = ', '.join(data['author_names'])
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': data['url'],
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'thumbnail': data['image_url'],
|
||||
'duration': data['duration'],
|
||||
'description': data['description'],
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
entries = []
|
||||
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
|
||||
for tutorial_number, tutorial in enumerate(tutorials, 1):
|
||||
chapter_info = {
|
||||
'chapter': tutorial.get('title'),
|
||||
'chapter_number': tutorial_number,
|
||||
'chapter_id': tutorial.get('id'),
|
||||
}
|
||||
else:
|
||||
# topic
|
||||
data = self._download_json(
|
||||
'http://api.khanacademy.org/api/v1/topic/' + video_id,
|
||||
video_id, 'Downloading topic info')
|
||||
for content_item in (tutorial.get('contentItems') or []):
|
||||
if content_item.get('kind') == 'Video':
|
||||
info = self._parse_video(content_item)
|
||||
info.update(chapter_info)
|
||||
entries.append(info)
|
||||
|
||||
entries = [
|
||||
{
|
||||
'_type': 'url',
|
||||
'url': c['url'],
|
||||
'id': c['id'],
|
||||
'title': c['title'],
|
||||
}
|
||||
for c in data['children'] if c['kind'] in ('Video', 'Topic')]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'entries': entries,
|
||||
}
|
||||
return self.playlist_result(
|
||||
entries, curation.get('unit'), curation.get('title'),
|
||||
curation.get('description'))
|
||||
|
||||
@@ -5,7 +5,12 @@ import functools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -57,6 +62,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
'description': stream_value.get('description'),
|
||||
'license': stream_value.get('license'),
|
||||
'timestamp': int_or_none(stream.get('timestamp')),
|
||||
'release_timestamp': int_or_none(stream_value.get('release_time')),
|
||||
'tags': stream_value.get('tags'),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': try_get(signing_channel, lambda x: x['value']['title']),
|
||||
@@ -89,6 +95,8 @@ class LBRYIE(LBRYBaseIE):
|
||||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||
'timestamp': 1595694354,
|
||||
'upload_date': '20200725',
|
||||
'release_timestamp': 1595340697,
|
||||
'release_date': '20200721',
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}
|
||||
@@ -103,6 +111,8 @@ class LBRYIE(LBRYBaseIE):
|
||||
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||
'timestamp': 1591312601,
|
||||
'upload_date': '20200604',
|
||||
'release_timestamp': 1591312421,
|
||||
'release_date': '20200604',
|
||||
'tags': list,
|
||||
'duration': 2570,
|
||||
'channel': 'The LBRY Foundation',
|
||||
@@ -110,6 +120,26 @@ class LBRYIE(LBRYBaseIE):
|
||||
'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}, {
|
||||
# HLS
|
||||
'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
|
||||
'md5': 'fc82f45ea54915b1495dd7cb5cc1289f',
|
||||
'info_dict': {
|
||||
'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
|
||||
'ext': 'mp4',
|
||||
'title': 'PLANTS I WILL NEVER GROW AGAIN. THE BLACK LIST PLANTS FOR A CANADIAN GARDEN | Gardening in Canada 🍁',
|
||||
'description': 'md5:9c539c6a03fb843956de61a4d5288d5e',
|
||||
'timestamp': 1618254123,
|
||||
'upload_date': '20210412',
|
||||
'release_timestamp': 1618254002,
|
||||
'release_date': '20210412',
|
||||
'tags': list,
|
||||
'duration': 554,
|
||||
'channel': 'Gardening In Canada',
|
||||
'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
|
||||
'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
|
||||
'formats': 'mincount:3',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
'only_matching': True,
|
||||
@@ -131,6 +161,9 @@ class LBRYIE(LBRYBaseIE):
|
||||
}, {
|
||||
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -139,6 +172,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
||||
else:
|
||||
display_id = display_id.replace(':', '#')
|
||||
display_id = compat_urllib_parse_unquote(display_id)
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._resolve_url(uri, display_id, 'stream')
|
||||
result_value = result['value']
|
||||
@@ -149,10 +183,18 @@ class LBRYIE(LBRYBaseIE):
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
info = self._parse_stream(result, url)
|
||||
urlh = self._request_webpage(
|
||||
streaming_url, display_id, note='Downloading streaming redirect url info')
|
||||
if determine_ext(urlh.geturl()) == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
urlh.geturl(), display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(info['formats'])
|
||||
else:
|
||||
info['url'] = streaming_url
|
||||
info.update({
|
||||
'id': claim_id,
|
||||
'title': title,
|
||||
'url': streaming_url,
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -174,17 +216,18 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, claim_id, url, page):
|
||||
def _fetch_page(self, claim_id, url, params, page):
|
||||
page += 1
|
||||
page_params = {
|
||||
'channel_ids': [claim_id],
|
||||
'claim_type': 'stream',
|
||||
'no_totals': True,
|
||||
'page': page,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
}
|
||||
page_params.update(params)
|
||||
result = self._call_api_proxy(
|
||||
'claim_search', claim_id, {
|
||||
'channel_ids': [claim_id],
|
||||
'claim_type': 'stream',
|
||||
'no_totals': True,
|
||||
'page': page,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
'stream_types': self._SUPPORTED_STREAM_TYPES,
|
||||
}, 'page %d' % page)
|
||||
'claim_search', claim_id, page_params, 'page %d' % page)
|
||||
for item in (result.get('items') or []):
|
||||
stream_claim_name = item.get('name')
|
||||
stream_claim_id = item.get('claim_id')
|
||||
@@ -205,8 +248,31 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
result = self._resolve_url(
|
||||
'lbry://' + display_id, display_id, 'channel')
|
||||
claim_id = result['claim_id']
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
content = qs.get('content', [None])[0]
|
||||
params = {
|
||||
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
|
||||
'order_by': {
|
||||
'new': ['release_time'],
|
||||
'top': ['effective_amount'],
|
||||
'trending': ['trending_group', 'trending_mixed'],
|
||||
}[qs.get('order', ['new'])[0]],
|
||||
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
|
||||
}
|
||||
duration = qs.get('duration', [None])[0]
|
||||
if duration:
|
||||
params['duration'] = {
|
||||
'long': '>=1200',
|
||||
'short': '<=240',
|
||||
}[duration]
|
||||
language = qs.get('language', ['all'])[0]
|
||||
if language != 'all':
|
||||
languages = [language]
|
||||
if language == 'en':
|
||||
languages.append('none')
|
||||
params['any_languages'] = languages
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, claim_id, url),
|
||||
functools.partial(self._fetch_page, claim_id, url, params),
|
||||
self._PAGE_SIZE)
|
||||
result_value = result.get('value') or {}
|
||||
return self.playlist_result(
|
||||
|
||||
@@ -4,7 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class LineTVIE(InfoExtractor):
|
||||
@@ -88,3 +94,137 @@ class LineTVIE(InfoExtractor):
|
||||
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
||||
'view_count': video_info.get('meta', {}).get('count'),
|
||||
}
|
||||
|
||||
|
||||
class LineLiveBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
|
||||
|
||||
def _parse_broadcast_item(self, item):
|
||||
broadcast_id = compat_str(item['id'])
|
||||
title = item['title']
|
||||
is_live = item.get('isBroadcastingNow')
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
})
|
||||
|
||||
channel = item.get('channel') or {}
|
||||
channel_id = str_or_none(channel.get('id'))
|
||||
|
||||
return {
|
||||
'id': broadcast_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': int_or_none(item.get('createdAt')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
|
||||
'duration': int_or_none(item.get('archiveDuration')),
|
||||
'view_count': int_or_none(item.get('viewerCount')),
|
||||
'comment_count': int_or_none(item.get('chatCount')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class LineLiveIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
|
||||
'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
|
||||
'info_dict': {
|
||||
'id': '16331360',
|
||||
'title': '振りコピ講座😙😙😙',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1617095132,
|
||||
'upload_date': '20210330',
|
||||
'channel': '白川ゆめか',
|
||||
'channel_id': '4867368',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'is_live': False,
|
||||
}
|
||||
}, {
|
||||
# archiveStatus == 'DELETED'
|
||||
'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
|
||||
broadcast = self._download_json(
|
||||
self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
|
||||
broadcast_id)
|
||||
item = broadcast['item']
|
||||
info = self._parse_broadcast_item(item)
|
||||
protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
|
||||
formats = []
|
||||
for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
|
||||
if not v:
|
||||
continue
|
||||
if k == 'abr':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v, broadcast_id, 'mp4', protocol,
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls-' + k,
|
||||
'protocol': protocol,
|
||||
'url': v,
|
||||
}
|
||||
if not k.isdigit():
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
archive_status = item.get('archiveStatus')
|
||||
if archive_status != 'ARCHIVED':
|
||||
raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class LineLiveChannelIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://live.line.me/channels/5893542',
|
||||
'info_dict': {
|
||||
'id': '5893542',
|
||||
'title': 'いくらちゃん',
|
||||
'description': 'md5:c3a4af801f43b2fac0b02294976580be',
|
||||
},
|
||||
'playlist_mincount': 29
|
||||
}
|
||||
|
||||
def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
|
||||
while True:
|
||||
for row in (archived_broadcasts.get('rows') or []):
|
||||
share_url = str_or_none(row.get('shareURL'))
|
||||
if not share_url:
|
||||
continue
|
||||
info = self._parse_broadcast_item(row)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': share_url,
|
||||
'ie_key': LineLiveIE.ie_key(),
|
||||
})
|
||||
yield info
|
||||
if not archived_broadcasts.get('hasNextPage'):
|
||||
return
|
||||
archived_broadcasts = self._download_json(
|
||||
self._API_BASE_URL + channel_id + '/archived_broadcasts',
|
||||
channel_id, query={
|
||||
'lastId': info['id'],
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
|
||||
return self.playlist_result(
|
||||
self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
|
||||
channel_id, channel.get('title'), channel.get('information'))
|
||||
|
||||
@@ -1,191 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||
'info_dict': {
|
||||
'id': '757_1364311680',
|
||||
'ext': 'mp4',
|
||||
'description': 'extremely bad day for this guy..!',
|
||||
'uploader': 'ljfriel2',
|
||||
'title': 'Most unlucky car accident',
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||
'info_dict': {
|
||||
'id': 'f93_1390833151',
|
||||
'ext': 'mp4',
|
||||
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
||||
'uploader': 'ARD_Stinkt',
|
||||
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
# Prochan embed
|
||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
||||
'info_dict': {
|
||||
'id': '4f7_1392687779',
|
||||
'ext': 'mp4',
|
||||
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
|
||||
'uploader': 'CapObveus',
|
||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': 'Video is dead',
|
||||
}, {
|
||||
# Covers https://github.com/ytdl-org/youtube-dl/pull/5983
|
||||
# Multiple resolutions
|
||||
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
||||
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
|
||||
'info_dict': {
|
||||
'id': '801_1409392012',
|
||||
'ext': 'mp4',
|
||||
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
|
||||
'uploader': 'bony333',
|
||||
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
# Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
|
||||
'url': 'http://m.liveleak.com/view?i=763_1473349649',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': '763_1473349649',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
|
||||
'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
|
||||
'uploader': 'Ziz',
|
||||
'upload_date': '20160908',
|
||||
'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/view?i=677_1439397581',
|
||||
'info_dict': {
|
||||
'id': '677_1439397581',
|
||||
'title': 'Fuel Depot in China Explosion caught on video',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# No original video
|
||||
'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||
video_description = self._og_search_description(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
|
||||
age_limit = int_or_none(self._search_regex(
|
||||
r'you confirm that you are ([0-9]+) years and over.',
|
||||
webpage, 'age limit', default=None))
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
if not entries:
|
||||
# Maybe an embed?
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
||||
webpage, 'embed URL')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
for idx, info_dict in enumerate(entries):
|
||||
formats = []
|
||||
for a_format in info_dict['formats']:
|
||||
if not a_format.get('height'):
|
||||
a_format['height'] = int_or_none(self._search_regex(
|
||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||
default=None))
|
||||
formats.append(a_format)
|
||||
|
||||
# Removing '.*.mp4' gives the raw video, which is essentially
|
||||
# the same video without the LiveLeak logo at the top (see
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/4768)
|
||||
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
|
||||
if a_format['url'] != orig_url:
|
||||
format_id = a_format.get('format_id')
|
||||
format_id = 'original' + ('-' + format_id if format_id else '')
|
||||
if self._is_valid_url(orig_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': orig_url,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
|
||||
# Don't append entry ID for one-video pages to keep backward compatibility
|
||||
if len(entries) > 1:
|
||||
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
|
||||
else:
|
||||
info_dict['id'] = video_id
|
||||
|
||||
info_dict.update({
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'age_limit': age_limit,
|
||||
'thumbnail': video_thumbnail,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, video_id, video_title)
|
||||
|
||||
|
||||
class LiveLeakEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
|
||||
|
||||
# See generic.py for actual test cases
|
||||
_TESTS = [{
|
||||
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
kind, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
if kind == 'f':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
liveleak_url = self._search_regex(
|
||||
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
||||
webpage, 'LiveLeak URL', group='url')
|
||||
else:
|
||||
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
|
||||
|
||||
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
||||
@@ -0,0 +1,31 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MaoriTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
|
||||
'md5': '5ade8ef53851b6a132c051b1cd858899',
|
||||
'info_dict': {
|
||||
'id': '4774724855001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kōrero Mai, Series 1 Episode 54',
|
||||
'upload_date': '20160226',
|
||||
'timestamp': 1456455018,
|
||||
'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
|
||||
'uploader_id': '1614493167001',
|
||||
},
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_id = self._search_regex(
|
||||
r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'BrightcoveNew', brightcove_id)
|
||||
@@ -15,33 +15,39 @@ from ..utils import (
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
|
||||
'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
|
||||
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||
'info_dict': {
|
||||
'id': '34934644',
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quad Cold',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'MowgliSB',
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': 10619174,
|
||||
'uploader_id': '10619174',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/36787208',
|
||||
'url': 'https://medal.tv/clips/2um24TWdty0NA',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '36787208',
|
||||
'id': '2um24TWdty0NA',
|
||||
'ext': 'mp4',
|
||||
'title': 'u tk me i tk u bigger',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'Mimicc',
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': 5156321,
|
||||
'uploader_id': '5156321',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -2,268 +2,113 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .gigya import GigyaBaseIE
|
||||
|
||||
from ..compat import compat_str
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class MedialaanIE(GigyaBaseIE):
|
||||
class MedialaanIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.|nieuws\.)?
|
||||
(?:
|
||||
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
||||
(?:
|
||||
video(?:/[^/]+/id/|/?\?.*?\baid=)|
|
||||
(?:[^/]+/)*
|
||||
)
|
||||
(?:embed\.)?mychannels.video/embed/|
|
||||
embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
|
||||
(?:www\.)?(?:
|
||||
(?:
|
||||
7sur7|
|
||||
demorgen|
|
||||
hln|
|
||||
joe|
|
||||
qmusic
|
||||
)\.be|
|
||||
(?:
|
||||
[abe]d|
|
||||
bndestem|
|
||||
destentor|
|
||||
gelderlander|
|
||||
pzc|
|
||||
tubantia|
|
||||
volkskrant
|
||||
)\.nl
|
||||
)/video/(?:[^/]+/)*[^/?&#]+~p
|
||||
)
|
||||
(?P<id>[^/?#&]+)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_NETRC_MACHINE = 'medialaan'
|
||||
_APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
|
||||
_SITE_TO_APP_ID = {
|
||||
'vtm': 'vtm_watch',
|
||||
'q2': 'q2',
|
||||
'vtmkzoom': 'vtmkzoom',
|
||||
}
|
||||
_TESTS = [{
|
||||
# vod
|
||||
'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
|
||||
'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
|
||||
'info_dict': {
|
||||
'id': 'vtm_20170219_VM0678361_vtmwatch',
|
||||
'id': '193993',
|
||||
'ext': 'mp4',
|
||||
'title': 'Allemaal Chris afl. 6',
|
||||
'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
|
||||
'timestamp': 1487533280,
|
||||
'upload_date': '20170219',
|
||||
'duration': 2562,
|
||||
'series': 'Allemaal Chris',
|
||||
'season': 'Allemaal Chris',
|
||||
'season_number': 1,
|
||||
'season_id': '256936078124527',
|
||||
'episode': 'Allemaal Chris afl. 6',
|
||||
'episode_number': 6,
|
||||
'episode_id': '256936078591527',
|
||||
'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
|
||||
'timestamp': 1611663540,
|
||||
'upload_date': '20210126',
|
||||
'duration': 238,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires account credentials',
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://vtm.be/video?aid=168332',
|
||||
'info_dict': {
|
||||
'id': '168332',
|
||||
'ext': 'mp4',
|
||||
'title': '"Veronique liegt!"',
|
||||
'description': 'md5:1385e2b743923afe54ba4adc38476155',
|
||||
'timestamp': 1489002029,
|
||||
'upload_date': '20170308',
|
||||
'duration': 96,
|
||||
},
|
||||
}, {
|
||||
# vod
|
||||
'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
|
||||
'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vod
|
||||
'url': 'http://vtm.be/video?aid=163157',
|
||||
'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vod
|
||||
'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
|
||||
'url': 'https://embed.mychannels.video/script/production/193993',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
||||
'url': 'https://embed.mychannels.video/production/193993',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# http/s redirect
|
||||
'url': 'https://vtmkzoom.be/video?aid=45724',
|
||||
'info_dict': {
|
||||
'id': '257136373657000',
|
||||
'ext': 'mp4',
|
||||
'title': 'K3 Dansstudio Ushuaia afl.6',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires account credentials',
|
||||
'url': 'https://mychannels.video/embed/193993',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# nieuws.vtm.be
|
||||
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
|
||||
'url': 'https://embed.mychannels.video/embed/193993',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._logged_in = False
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required()
|
||||
|
||||
auth_data = {
|
||||
'APIKey': self._APIKEY,
|
||||
'sdk': 'js_6.1',
|
||||
'format': 'json',
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
auth_info = self._gigya_login(auth_data)
|
||||
|
||||
self._uid = auth_info['UID']
|
||||
self._uid_signature = auth_info['UIDSignature']
|
||||
self._signature_timestamp = auth_info['signatureTimestamp']
|
||||
|
||||
self._logged_in = True
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
entries = []
|
||||
for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
|
||||
mychannels_id = extract_attributes(element).get('data-mychannels-id')
|
||||
if mychannels_id:
|
||||
entries.append('https://mychannels.video/embed/' + mychannels_id)
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, site_id = mobj.group('id', 'site_id')
|
||||
production_id = self._match_id(url)
|
||||
production = self._download_json(
|
||||
'https://embed.mychannels.video/sdk/production/' + production_id,
|
||||
production_id, query={'options': 'UUUU_default'})['productions'][0]
|
||||
title = production['title']
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
|
||||
webpage, 'config', default='{}'), video_id,
|
||||
transform_source=lambda s: s.replace(
|
||||
'\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
|
||||
|
||||
vod_id = config.get('vodId') or self._search_regex(
|
||||
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
|
||||
r'"vodId"\s*:\s*"(.+?)"',
|
||||
r'<[^>]+id=["\']vod-(\d+)'),
|
||||
webpage, 'video_id', default=None)
|
||||
|
||||
# clip, no authentication required
|
||||
if not vod_id:
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
|
||||
default=''),
|
||||
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
||||
if player:
|
||||
video = player[-1]
|
||||
if video['videoUrl'] in ('http', 'https'):
|
||||
return self.url_result(video['url'], MedialaanIE.ie_key())
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video['videoUrl'],
|
||||
'title': video['title'],
|
||||
'thumbnail': video.get('imageUrl'),
|
||||
'timestamp': int_or_none(video.get('createdDate')),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
}
|
||||
formats = []
|
||||
for source in (production.get('sources') or []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, production_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
info = self._parse_html5_media_entries(
|
||||
url, webpage, video_id, m3u8_id='hls')[0]
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta('description', webpage),
|
||||
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
||||
formats.append({
|
||||
'ext': ext,
|
||||
'url': src,
|
||||
})
|
||||
# vod, authentication required
|
||||
else:
|
||||
if not self._logged_in:
|
||||
self._login()
|
||||
self._sort_formats(formats)
|
||||
|
||||
settings = self._parse_json(
|
||||
self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings', default='{}'),
|
||||
video_id)
|
||||
|
||||
def get(container, item):
|
||||
return try_get(
|
||||
settings, lambda x: x[container][item],
|
||||
compat_str) or self._search_regex(
|
||||
r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
|
||||
default=None)
|
||||
|
||||
app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
|
||||
sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
|
||||
|
||||
data = self._download_json(
|
||||
'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
|
||||
video_id, query={
|
||||
'app_id': app_id,
|
||||
'user_network': sso,
|
||||
'UID': self._uid,
|
||||
'UIDSignature': self._uid_signature,
|
||||
'signatureTimestamp': self._signature_timestamp,
|
||||
})
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['response']['uri'], video_id, entry_protocol='m3u8_native',
|
||||
ext='mp4', m3u8_id='hls')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': vod_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
api_key = get('vod', 'apiKey')
|
||||
channel = get('medialaanGigya', 'channel')
|
||||
|
||||
if api_key:
|
||||
videos = self._download_json(
|
||||
'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
|
||||
query={
|
||||
'channels': channel,
|
||||
'ids': vod_id,
|
||||
'limit': 1,
|
||||
'apikey': api_key,
|
||||
})
|
||||
if videos:
|
||||
video = try_get(
|
||||
videos, lambda x: x['response']['videos'][0], dict)
|
||||
if video:
|
||||
def get(container, item, expected_type=None):
|
||||
return try_get(
|
||||
video, lambda x: x[container][item], expected_type)
|
||||
|
||||
def get_string(container, item):
|
||||
return get(container, item, compat_str)
|
||||
|
||||
info.update({
|
||||
'series': get_string('program', 'title'),
|
||||
'season': get_string('season', 'title'),
|
||||
'season_number': int_or_none(get('season', 'number')),
|
||||
'season_id': get_string('season', 'id'),
|
||||
'episode': get_string('episode', 'title'),
|
||||
'episode_number': int_or_none(get('episode', 'number')),
|
||||
'episode_id': get_string('episode', 'id'),
|
||||
'duration': int_or_none(
|
||||
video.get('duration')) or int_or_none(
|
||||
video.get('durationMillis'), scale=1000),
|
||||
'title': get_string('episode', 'title'),
|
||||
'description': get_string('episode', 'text'),
|
||||
'timestamp': unified_timestamp(get_string(
|
||||
'publication', 'begin')),
|
||||
})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = try_get(
|
||||
config, lambda x: x['videoConfig']['title'],
|
||||
compat_str) or self._html_search_regex(
|
||||
r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
|
||||
if not info.get('description'):
|
||||
info['description'] = self._html_search_regex(
|
||||
r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
|
||||
webpage, 'description', default=None)
|
||||
|
||||
return info
|
||||
return {
|
||||
'id': production_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': production.get('posterUrl'),
|
||||
'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
|
||||
'duration': int_or_none(production.get('duration')) or None,
|
||||
}
|
||||
|
||||
@@ -0,0 +1,196 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MindsBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
|
||||
|
||||
def _call_api(self, path, video_id, resource, query=None):
|
||||
api_url = 'https://www.minds.com/api/' + path
|
||||
token = self._get_cookies(api_url).get('XSRF-TOKEN')
|
||||
return self._download_json(
|
||||
api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
|
||||
'Referer': 'https://www.minds.com/',
|
||||
'X-XSRF-TOKEN': token.value if token else '',
|
||||
}, query=query)
|
||||
|
||||
|
||||
class MindsIE(MindsBaseIE):
|
||||
IE_NAME = 'minds'
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.minds.com/media/100000000000086822',
|
||||
'md5': '215a658184a419764852239d4970b045',
|
||||
'info_dict': {
|
||||
'id': '100000000000086822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Minds intro sequence',
|
||||
'thumbnail': r're:https?://.+\.png',
|
||||
'uploader_id': 'ottman',
|
||||
'upload_date': '20130524',
|
||||
'timestamp': 1369404826,
|
||||
'uploader': 'Bill Ottman',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'tags': ['animation'],
|
||||
'comment_count': int,
|
||||
'license': 'attribution-cc',
|
||||
},
|
||||
}, {
|
||||
# entity.type == 'activity' and empty title
|
||||
'url': 'https://www.minds.com/newsfeed/798025111988506624',
|
||||
'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
|
||||
'info_dict': {
|
||||
'id': '798022190320226304',
|
||||
'ext': 'mp4',
|
||||
'title': '798022190320226304',
|
||||
'uploader': 'ColinFlaherty',
|
||||
'upload_date': '20180111',
|
||||
'timestamp': 1515639316,
|
||||
'uploader_id': 'ColinFlaherty',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.minds.com/archive/view/715172106794442752',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# youtube perma_url
|
||||
'url': 'https://www.minds.com/newsfeed/1197131838022602752',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
entity_id = self._match_id(url)
|
||||
entity = self._call_api(
|
||||
'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
|
||||
if entity.get('type') == 'activity':
|
||||
if entity.get('custom_type') == 'video':
|
||||
video_id = entity['entity_guid']
|
||||
else:
|
||||
return self.url_result(entity['perma_url'])
|
||||
else:
|
||||
assert(entity['subtype'] == 'video')
|
||||
video_id = entity_id
|
||||
# 1080p and webm formats available only on the sources array
|
||||
video = self._call_api(
|
||||
'v2/media/video/' + video_id, video_id, 'video')
|
||||
|
||||
formats = []
|
||||
for source in (video.get('sources') or []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': source.get('label'),
|
||||
'height': int_or_none(source.get('size')),
|
||||
'url': src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entity = video.get('entity') or entity
|
||||
owner = entity.get('ownerObj') or {}
|
||||
uploader_id = owner.get('username')
|
||||
|
||||
tags = entity.get('tags')
|
||||
if tags and isinstance(tags, compat_str):
|
||||
tags = [tags]
|
||||
|
||||
thumbnail = None
|
||||
poster = video.get('poster') or entity.get('thumbnail_src')
|
||||
if poster:
|
||||
urlh = self._request_webpage(poster, video_id, fatal=False)
|
||||
if urlh:
|
||||
thumbnail = urlh.geturl()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': entity.get('title') or video_id,
|
||||
'formats': formats,
|
||||
'description': clean_html(entity.get('description')) or None,
|
||||
'license': str_or_none(entity.get('license')),
|
||||
'timestamp': int_or_none(entity.get('time_created')),
|
||||
'uploader': strip_or_none(owner.get('name')),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
|
||||
'view_count': int_or_none(entity.get('play:count')),
|
||||
'like_count': int_or_none(entity.get('thumbs:up:count')),
|
||||
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
|
||||
'tags': tags,
|
||||
'comment_count': int_or_none(entity.get('comments:count')),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class MindsFeedBaseIE(MindsBaseIE):
|
||||
_PAGE_SIZE = 150
|
||||
|
||||
def _entries(self, feed_id):
|
||||
query = {'limit': self._PAGE_SIZE, 'sync': 1}
|
||||
i = 1
|
||||
while True:
|
||||
data = self._call_api(
|
||||
'v2/feeds/container/%s/videos' % feed_id,
|
||||
feed_id, 'page %s' % i, query)
|
||||
entities = data.get('entities') or []
|
||||
for entity in entities:
|
||||
guid = entity.get('guid')
|
||||
if not guid:
|
||||
continue
|
||||
yield self.url_result(
|
||||
'https://www.minds.com/newsfeed/' + guid,
|
||||
MindsIE.ie_key(), guid)
|
||||
query['from_timestamp'] = data['load-next']
|
||||
if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
|
||||
break
|
||||
i += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed_id = self._match_id(url)
|
||||
feed = self._call_api(
|
||||
'v1/%s/%s' % (self._FEED_PATH, feed_id),
|
||||
feed_id, self._FEED_TYPE)[self._FEED_TYPE]
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(feed['guid']), feed_id,
|
||||
strip_or_none(feed.get('name')),
|
||||
feed.get('briefdescription'))
|
||||
|
||||
|
||||
class MindsChannelIE(MindsFeedBaseIE):
|
||||
_FEED_TYPE = 'channel'
|
||||
IE_NAME = 'minds:' + _FEED_TYPE
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
|
||||
_FEED_PATH = 'channel'
|
||||
_TEST = {
|
||||
'url': 'https://www.minds.com/ottman',
|
||||
'info_dict': {
|
||||
'id': 'ottman',
|
||||
'title': 'Bill Ottman',
|
||||
'description': 'Co-creator & CEO @minds',
|
||||
},
|
||||
'playlist_mincount': 54,
|
||||
}
|
||||
|
||||
|
||||
class MindsGroupIE(MindsFeedBaseIE):
|
||||
_FEED_TYPE = 'group'
|
||||
IE_NAME = 'minds:' + _FEED_TYPE
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
|
||||
_FEED_PATH = 'groups/group'
|
||||
_TEST = {
|
||||
'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
|
||||
'info_dict': {
|
||||
'id': '785582576369672204',
|
||||
'title': 'Cooking Videos',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}
|
||||
@@ -251,8 +251,11 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE):
|
||||
cloudcast_url = cloudcast.get('url')
|
||||
if not cloudcast_url:
|
||||
continue
|
||||
slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
|
||||
owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
|
||||
video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
|
||||
entries.append(self.url_result(
|
||||
cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
|
||||
cloudcast_url, MixcloudIE.ie_key(), video_id))
|
||||
|
||||
page_info = items['pageInfo']
|
||||
has_next_page = page_info['hasNextPage']
|
||||
@@ -321,7 +324,8 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
_DESCRIPTION_KEY = 'biog'
|
||||
_ROOT_TYPE = 'user'
|
||||
_NODE_TEMPLATE = '''slug
|
||||
url'''
|
||||
url
|
||||
owner { username }'''
|
||||
|
||||
def _get_playlist_title(self, title, slug):
|
||||
return '%s (%s)' % (title, slug)
|
||||
@@ -345,6 +349,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||
_NODE_TEMPLATE = '''cloudcast {
|
||||
slug
|
||||
url
|
||||
owner { username }
|
||||
}'''
|
||||
|
||||
def _get_cloudcast(self, node):
|
||||
|
||||
+168
-21
@@ -1,15 +1,91 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .nhl import NHLBaseIE
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MLBIE(NHLBaseIE):
|
||||
class MLBBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
video = self._download_video_data(display_id)
|
||||
video_id = video['id']
|
||||
title = video['title']
|
||||
feed = self._get_feed(video)
|
||||
|
||||
formats = []
|
||||
for playback in (feed.get('playbacks') or []):
|
||||
playback_url = playback.get('url')
|
||||
if not playback_url:
|
||||
continue
|
||||
name = playback.get('name')
|
||||
ext = determine_ext(playback_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
playback_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id=name, fatal=False))
|
||||
else:
|
||||
f = {
|
||||
'format_id': name,
|
||||
'url': playback_url,
|
||||
}
|
||||
mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name)
|
||||
if mobj:
|
||||
f.update({
|
||||
'height': int(mobj.group(3)),
|
||||
'tbr': int(mobj.group(1)),
|
||||
'width': int(mobj.group(2)),
|
||||
})
|
||||
mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'fps': int(mobj.group(3)),
|
||||
'height': int(mobj.group(2)),
|
||||
'tbr': int(mobj.group(4)),
|
||||
'width': int(mobj.group(1)),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
|
||||
src = cut.get('src')
|
||||
if not src:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'height': int_or_none(cut.get('height')),
|
||||
'url': src,
|
||||
'width': int_or_none(cut.get('width')),
|
||||
})
|
||||
|
||||
language = (video.get('language') or 'EN').lower()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': video.get('description'),
|
||||
'duration': parse_duration(feed.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)),
|
||||
'subtitles': self._extract_mlb_subtitles(feed, language),
|
||||
}
|
||||
|
||||
|
||||
class MLBIE(MLBBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
|
||||
(?:[\da-z_-]+\.)*mlb\.com/
|
||||
(?:
|
||||
(?:
|
||||
(?:[^/]+/)*c-|
|
||||
(?:[^/]+/)*video/[^/]+/c-|
|
||||
(?:
|
||||
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
||||
(?:[^/]+/)+(?:play|index)\.jsp|
|
||||
@@ -18,7 +94,6 @@ class MLBIE(NHLBaseIE):
|
||||
(?P<id>\d+)
|
||||
)
|
||||
'''
|
||||
_CONTENT_DOMAIN = 'content.mlb.com'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
|
||||
@@ -76,18 +151,6 @@ class MLBIE(NHLBaseIE):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
|
||||
'md5': 'e09e37b552351fddbf4d9e699c924d68',
|
||||
'info_dict': {
|
||||
'id': '75609783',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must C: Pillar climbs for catch',
|
||||
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
||||
'timestamp': 1429139220,
|
||||
'upload_date': '20150415',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
|
||||
'only_matching': True,
|
||||
@@ -113,8 +176,92 @@ class MLBIE(NHLBaseIE):
|
||||
'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
_TIMESTAMP_KEY = 'date'
|
||||
|
||||
@staticmethod
|
||||
def _get_feed(video):
|
||||
return video
|
||||
|
||||
@staticmethod
|
||||
def _extract_mlb_subtitles(feed, language):
|
||||
subtitles = {}
|
||||
for keyword in (feed.get('keywordsAll') or []):
|
||||
keyword_type = keyword.get('type')
|
||||
if keyword_type and keyword_type.startswith('closed_captions_location_'):
|
||||
cc_location = keyword.get('value')
|
||||
if cc_location:
|
||||
subtitles.setdefault(language, []).append({
|
||||
'url': cc_location,
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _download_video_data(self, display_id):
|
||||
return self._download_json(
|
||||
'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id,
|
||||
display_id)
|
||||
|
||||
|
||||
class MLBVideoIE(MLBBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933',
|
||||
'md5': '632358dacfceec06bad823b83d21df2d',
|
||||
'info_dict': {
|
||||
'id': 'c04a8863-f569-42e6-9f87-992393657614',
|
||||
'ext': 'mp4',
|
||||
'title': "Ackley's spectacular catch",
|
||||
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
|
||||
'duration': 66,
|
||||
'timestamp': 1405995000,
|
||||
'upload_date': '20140722',
|
||||
'thumbnail': r're:^https?://.+',
|
||||
},
|
||||
}
|
||||
_TIMESTAMP_KEY = 'timestamp'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url)
|
||||
|
||||
@staticmethod
|
||||
def _get_feed(video):
|
||||
return video['feeds'][0]
|
||||
|
||||
@staticmethod
|
||||
def _extract_mlb_subtitles(feed, language):
|
||||
subtitles = {}
|
||||
for cc_location in (feed.get('closedCaptions') or []):
|
||||
subtitles.setdefault(language, []).append({
|
||||
'url': cc_location,
|
||||
})
|
||||
|
||||
def _download_video_data(self, display_id):
|
||||
# https://www.mlb.com/data-service/en/videos/[SLUG]
|
||||
return self._download_json(
|
||||
'https://fastball-gateway.mlb.com/graphql',
|
||||
display_id, query={
|
||||
'query': '''{
|
||||
mediaPlayback(ids: "%s") {
|
||||
description
|
||||
feeds(types: CMS) {
|
||||
closedCaptions
|
||||
duration
|
||||
image {
|
||||
cuts {
|
||||
width
|
||||
height
|
||||
src
|
||||
}
|
||||
}
|
||||
playbacks {
|
||||
name
|
||||
url
|
||||
}
|
||||
}
|
||||
id
|
||||
timestamp
|
||||
title
|
||||
}
|
||||
}''' % display_id,
|
||||
})['data']['mediaPlayback'][0]
|
||||
|
||||
+15
-13
@@ -253,6 +253,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
|
||||
|
||||
@staticmethod
|
||||
def _extract_child_with_type(parent, t):
|
||||
for c in parent['children']:
|
||||
if c.get('type') == t:
|
||||
return c
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
try:
|
||||
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
||||
@@ -278,6 +284,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if not mgid:
|
||||
mgid = self._extract_triforce_mgid(webpage)
|
||||
|
||||
if not mgid:
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||
ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
|
||||
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
|
||||
mgid = video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
return mgid
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -309,7 +323,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
@@ -349,18 +363,6 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def extract_child_with_type(parent, t):
|
||||
children = parent['children']
|
||||
return next(c for c in children if c.get('type') == t)
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self.extract_child_with_type(data, 'MainContainer')
|
||||
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
|
||||
return video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
|
||||
class MTVJapanIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtvjapan'
|
||||
|
||||
@@ -23,11 +23,9 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
destination_code, content_id = re.match(self._VALID_URL, url).groups()
|
||||
api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
|
||||
content = self._download_json(api_base_url, content_id, query={
|
||||
'$include': '[Media,Season,ContentPackages]',
|
||||
'$include': '[Media.Name,Season,ContentPackages.Duration,ContentPackages.Id]',
|
||||
})
|
||||
title = content['Name']
|
||||
if len(content['ContentPackages']) > 1:
|
||||
raise ExtractorError('multiple content packages')
|
||||
content_package = content['ContentPackages'][0]
|
||||
package_id = content_package['Id']
|
||||
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
|
||||
|
||||
+107
-81
@@ -1,104 +1,130 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_to_int
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NineGagIE(InfoExtractor):
|
||||
IE_NAME = '9gag'
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
||||
'url': 'https://9gag.com/gag/ae5Ag7B',
|
||||
'info_dict': {
|
||||
'id': 'kXzwOKyGlSA',
|
||||
'id': 'ae5Ag7B',
|
||||
'ext': 'mp4',
|
||||
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
||||
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
||||
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
|
||||
'uploader': 'CompilationChannel',
|
||||
'upload_date': '20131110',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'title': 'Capybara Agility Training',
|
||||
'upload_date': '20191108',
|
||||
'timestamp': 1573237208,
|
||||
'categories': ['Awesome'],
|
||||
'tags': ['Weimaraner', 'American Pit Bull Terrier'],
|
||||
'duration': 44,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/p/aKolP3',
|
||||
'info_dict': {
|
||||
'id': 'aKolP3',
|
||||
'ext': 'mp4',
|
||||
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
|
||||
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
|
||||
'uploader_id': 'rickmereki',
|
||||
'uploader': 'Rick Mereki',
|
||||
'upload_date': '20110803',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/p/KklwM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.tv/p/Kk2X5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/embed/a5Dmvl',
|
||||
# HTML escaped title
|
||||
'url': 'https://9gag.com/gag/av5nvyb',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_EXTERNAL_VIDEO_PROVIDER = {
|
||||
'1': {
|
||||
'url': '%s',
|
||||
'ie_key': 'Youtube',
|
||||
},
|
||||
'2': {
|
||||
'url': 'http://player.vimeo.com/video/%s',
|
||||
'ie_key': 'Vimeo',
|
||||
},
|
||||
'3': {
|
||||
'url': 'http://instagram.com/p/%s',
|
||||
'ie_key': 'Instagram',
|
||||
},
|
||||
'4': {
|
||||
'url': 'http://vine.co/v/%s',
|
||||
'ie_key': 'Vine',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
post_id = self._match_id(url)
|
||||
post = self._download_json(
|
||||
'https://9gag.com/v1/post', post_id, query={
|
||||
'id': post_id
|
||||
})['data']['post']
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if post.get('type') != 'Animated':
|
||||
raise ExtractorError(
|
||||
'The given url does not contain a video',
|
||||
expected=True)
|
||||
|
||||
post_view = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
|
||||
webpage, 'post view'),
|
||||
display_id)
|
||||
title = unescapeHTML(post['title'])
|
||||
|
||||
ie_key = None
|
||||
source_url = post_view.get('sourceUrl')
|
||||
if not source_url:
|
||||
external_video_id = post_view['videoExternalId']
|
||||
external_video_provider = post_view['videoExternalProvider']
|
||||
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
|
||||
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
|
||||
title = post_view['title']
|
||||
description = post_view.get('description')
|
||||
view_count = str_to_int(post_view.get('externalView'))
|
||||
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
||||
duration = None
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for key, image in (post.get('images') or {}).items():
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
ext = determine_ext(image_url)
|
||||
image_id = key.strip('image')
|
||||
common = {
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
}
|
||||
if ext in ('jpg', 'png'):
|
||||
webp_url = image.get('webpUrl')
|
||||
if webp_url:
|
||||
t = common.copy()
|
||||
t.update({
|
||||
'id': image_id + '-webp',
|
||||
'url': webp_url,
|
||||
})
|
||||
thumbnails.append(t)
|
||||
common.update({
|
||||
'id': image_id,
|
||||
'ext': ext,
|
||||
})
|
||||
thumbnails.append(common)
|
||||
elif ext in ('webm', 'mp4'):
|
||||
if not duration:
|
||||
duration = int_or_none(image.get('duration'))
|
||||
common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
|
||||
for vcodec in ('vp8', 'vp9', 'h265'):
|
||||
c_url = image.get(vcodec + 'Url')
|
||||
if not c_url:
|
||||
continue
|
||||
c_f = common.copy()
|
||||
c_f.update({
|
||||
'format_id': image_id + '-' + vcodec,
|
||||
'url': c_url,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
formats.append(c_f)
|
||||
common.update({
|
||||
'ext': ext,
|
||||
'format_id': image_id,
|
||||
})
|
||||
formats.append(common)
|
||||
self._sort_formats(formats)
|
||||
|
||||
section = try_get(post, lambda x: x['postSection']['name'])
|
||||
|
||||
tags = None
|
||||
post_tags = post.get('tags')
|
||||
if post_tags:
|
||||
tags = []
|
||||
for tag in post_tags:
|
||||
tag_key = tag.get('key')
|
||||
if not tag_key:
|
||||
continue
|
||||
tags.append(tag_key)
|
||||
|
||||
get_count = lambda x: int_or_none(post.get(x + 'Count'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': source_url,
|
||||
'ie_key': ie_key,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'id': post_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': int_or_none(post.get('creationTs')),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'like_count': get_count('upVote'),
|
||||
'dislike_count': get_count('downVote'),
|
||||
'comment_count': get_count('comments'),
|
||||
'age_limit': 18 if post.get('nsfw') == 1 else None,
|
||||
'categories': [section] if section else None,
|
||||
'tags': tags,
|
||||
}
|
||||
|
||||
@@ -6,30 +6,40 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class NJPWWorldIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
|
||||
_VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
|
||||
IE_DESC = '新日本プロレスワールド'
|
||||
_NETRC_MACHINE = 'njpwworld'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
|
||||
'info_dict': {
|
||||
'id': 's_series_00155_1_9',
|
||||
'ext': 'mp4',
|
||||
'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
|
||||
'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # AES-encrypted m3u8
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
|
||||
'info_dict': {
|
||||
'id': 's_series_00563_16_bs',
|
||||
'ext': 'mp4',
|
||||
'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
|
||||
'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
|
||||
|
||||
@@ -64,35 +74,27 @@ class NJPWWorldIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
|
||||
player = extract_attributes(mobj.group(0))
|
||||
player_path = player.get('href')
|
||||
if not player_path:
|
||||
continue
|
||||
kind = self._search_regex(
|
||||
r'(low|high)$', player.get('class') or '', 'kind',
|
||||
default='low')
|
||||
for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
|
||||
player_path = '/intent?id=%s&type=url' % vid
|
||||
player_url = compat_urlparse.urljoin(url, player_path)
|
||||
player_page = self._download_webpage(
|
||||
player_url, video_id, note='Downloading player page')
|
||||
entries = self._parse_html5_media_entries(
|
||||
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
|
||||
m3u8_entry_protocol='m3u8_native')
|
||||
kind_formats = entries[0]['formats']
|
||||
for f in kind_formats:
|
||||
f['quality'] = 2 if kind == 'high' else 1
|
||||
formats.extend(kind_formats)
|
||||
formats.append({
|
||||
'url': player_url,
|
||||
'format_id': kind,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
'quality': 2 if kind == 'high' else 1,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
post_content = get_element_by_class('post-content', webpage)
|
||||
tag_block = get_element_by_class('tag-block', webpage)
|
||||
tags = re.findall(
|
||||
r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
|
||||
) if post_content else None
|
||||
r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
|
||||
) if tag_block else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'tags': tags,
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
|
||||
return self._download_json(
|
||||
urljoin('http://psapi.nrk.no/', path),
|
||||
urljoin('https://psapi.nrk.no/', path),
|
||||
video_id, note or 'Downloading %s JSON' % item,
|
||||
fatal=fatal, query=query,
|
||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
||||
|
||||
@@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor):
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src, video_id, f4m_id=format_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@@ -140,6 +143,25 @@ class ORFTVthekIE(InfoExtractor):
|
||||
})
|
||||
|
||||
upload_date = unified_strdate(sd.get('created_date'))
|
||||
|
||||
thumbnails = []
|
||||
preview = sd.get('preview_image_url')
|
||||
if preview:
|
||||
thumbnails.append({
|
||||
'id': 'preview',
|
||||
'url': preview,
|
||||
'preference': 0,
|
||||
})
|
||||
image = sd.get('image_full_url')
|
||||
if not image and len(data_jsb) == 1:
|
||||
image = self._og_search_thumbnail(webpage)
|
||||
if image:
|
||||
thumbnails.append({
|
||||
'id': 'full',
|
||||
'url': image,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
@@ -149,7 +171,7 @@ class ORFTVthekIE(InfoExtractor):
|
||||
'description': sd.get('description'),
|
||||
'duration': int_or_none(sd.get('duration_in_seconds')),
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': sd.get('image_full_url'),
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -182,7 +204,7 @@ class ORFRadioIE(InfoExtractor):
|
||||
duration = end - start if end and start else None
|
||||
entries.append({
|
||||
'id': loop_stream_id.replace('.mp3', ''),
|
||||
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
|
||||
'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
|
||||
'title': title,
|
||||
'description': clean_html(data.get('subtitle')),
|
||||
'duration': duration,
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class PalcoMP3BaseIE(InfoExtractor):
|
||||
_GQL_QUERY_TMPL = '''{
|
||||
artist(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}'''
|
||||
_ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
|
||||
%s
|
||||
}'''
|
||||
_MUSIC_FIELDS = '''duration
|
||||
hls
|
||||
mp3File
|
||||
musicID
|
||||
plays
|
||||
title'''
|
||||
|
||||
def _call_api(self, artist_slug, artist_fields):
|
||||
return self._download_json(
|
||||
'https://www.palcomp3.com.br/graphql/', artist_slug, query={
|
||||
'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
|
||||
})['data']
|
||||
|
||||
def _parse_music(self, music):
|
||||
music_id = compat_str(music['musicID'])
|
||||
title = music['title']
|
||||
|
||||
formats = []
|
||||
hls_url = music.get('hls')
|
||||
if hls_url:
|
||||
formats.append({
|
||||
'url': hls_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
mp3_file = music.get('mp3File')
|
||||
if mp3_file:
|
||||
formats.append({
|
||||
'url': mp3_file,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': music_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(music.get('duration')),
|
||||
'view_count': int_or_none(music.get('plays')),
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
|
||||
artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
|
||||
music = self._call_api(artist_slug, artist_fields)['artist']['music']
|
||||
return self._parse_music(music)
|
||||
|
||||
|
||||
class PalcoMP3IE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:song'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
|
||||
'md5': '99fd6405b2d8fd589670f6db1ba3b358',
|
||||
'info_dict': {
|
||||
'id': '3162927',
|
||||
'ext': 'mp3',
|
||||
'title': 'Nossas Composições - CUIDA BEM DELA',
|
||||
'duration': 210,
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
|
||||
|
||||
|
||||
class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:artist'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com.br/condedoforro/',
|
||||
'info_dict': {
|
||||
'id': '358396',
|
||||
'title': 'Conde do Forró',
|
||||
},
|
||||
'playlist_mincount': 188,
|
||||
}]
|
||||
_ARTIST_FIELDS_TMPL = '''artistID
|
||||
musics {
|
||||
nodes {
|
||||
%s
|
||||
}
|
||||
}
|
||||
name'''
|
||||
|
||||
@ classmethod
|
||||
def suitable(cls, url):
|
||||
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_slug = self._match_id(url)
|
||||
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
||||
|
||||
def entries():
|
||||
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
|
||||
yield self._parse_music(music)
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), str_or_none(artist.get('artistID')), artist.get('name'))
|
||||
|
||||
|
||||
class PalcoMP3VideoIE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': '_pD1nR2qqPg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
||||
'description': 'md5:7043342c09a224598e93546e98e49282',
|
||||
'upload_date': '20161107',
|
||||
'uploader_id': 'maiaramaraisaoficial',
|
||||
'uploader': 'Maiara e Maraisa',
|
||||
}
|
||||
}]
|
||||
_MUSIC_FIELDS = 'youtubeID'
|
||||
|
||||
def _parse_music(self, music):
|
||||
youtube_id = music['youtubeID']
|
||||
return self.url_result(youtube_id, 'Youtube', youtube_id)
|
||||
@@ -413,7 +413,8 @@ class PeerTubeIE(InfoExtractor):
|
||||
peertube3\.cpy\.re|
|
||||
peertube2\.cpy\.re|
|
||||
videos\.tcit\.fr|
|
||||
peertube\.cpy\.re
|
||||
peertube\.cpy\.re|
|
||||
canard\.tube
|
||||
)'''
|
||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||
_API_BASE = 'https://%s/api/v1/videos/%s/%s'
|
||||
@@ -450,6 +451,18 @@ class PeerTubeIE(InfoExtractor):
|
||||
'tags': ['framasoft', 'peertube'],
|
||||
'categories': ['Science & Technology'],
|
||||
}
|
||||
}, {
|
||||
# Issue #26002
|
||||
'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
|
||||
'info_dict': {
|
||||
'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dot matrix printer shell demo',
|
||||
'uploader_id': '3',
|
||||
'timestamp': 1587401293,
|
||||
'upload_date': '20200420',
|
||||
'uploader': 'Drew DeVault',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
||||
'only_matching': True,
|
||||
@@ -526,7 +539,15 @@ class PeerTubeIE(InfoExtractor):
|
||||
title = video['name']
|
||||
|
||||
formats = []
|
||||
for file_ in video['files']:
|
||||
files = video.get('files') or []
|
||||
for playlist in (video.get('streamingPlaylists') or []):
|
||||
if not isinstance(playlist, dict):
|
||||
continue
|
||||
playlist_files = playlist.get('files')
|
||||
if not (playlist_files and isinstance(playlist_files, list)):
|
||||
continue
|
||||
files.extend(playlist_files)
|
||||
for file_ in files:
|
||||
if not isinstance(file_, dict):
|
||||
continue
|
||||
file_url = url_or_none(file_.get('fileUrl'))
|
||||
@@ -548,15 +569,15 @@ class PeerTubeIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
full_description = self._call_api(
|
||||
host, video_id, 'description', note='Downloading description JSON',
|
||||
fatal=False)
|
||||
description = video.get('description')
|
||||
if len(description) >= 250:
|
||||
# description is shortened
|
||||
full_description = self._call_api(
|
||||
host, video_id, 'description', note='Downloading description JSON',
|
||||
fatal=False)
|
||||
|
||||
description = None
|
||||
if isinstance(full_description, dict):
|
||||
description = str_or_none(full_description.get('description'))
|
||||
if not description:
|
||||
description = video.get('description')
|
||||
if isinstance(full_description, dict):
|
||||
description = str_or_none(full_description.get('description')) or description
|
||||
|
||||
subtitles = self.extract_subtitles(host, video_id)
|
||||
|
||||
@@ -578,11 +599,13 @@ class PeerTubeIE(InfoExtractor):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': urljoin(url, video.get('thumbnailPath')),
|
||||
'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
|
||||
'timestamp': unified_timestamp(video.get('publishedAt')),
|
||||
'uploader': account_data('displayName', compat_str),
|
||||
'uploader_id': str_or_none(account_data('id', int)),
|
||||
@@ -600,5 +623,6 @@ class PeerTubeIE(InfoExtractor):
|
||||
'tags': try_get(video, lambda x: x['tags'], list),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
'subtitles': subtitles,
|
||||
'webpage_url': webpage_url,
|
||||
}
|
||||
|
||||
@@ -12,6 +12,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class PeriscopeBaseIE(InfoExtractor):
|
||||
_M3U8_HEADERS = {
|
||||
'Referer': 'https://www.periscope.tv/'
|
||||
}
|
||||
|
||||
def _call_api(self, method, query, item_id):
|
||||
return self._download_json(
|
||||
'https://api.periscope.tv/api/v2/%s' % method,
|
||||
@@ -54,9 +58,11 @@ class PeriscopeBaseIE(InfoExtractor):
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native'
|
||||
if state in ('ended', 'timed_out') else 'm3u8',
|
||||
m3u8_id=format_id, fatal=fatal)
|
||||
m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS)
|
||||
if len(m3u8_formats) == 1:
|
||||
self._add_width_and_height(m3u8_formats[0], width, height)
|
||||
for f in m3u8_formats:
|
||||
f.setdefault('http_headers', {}).update(self._M3U8_HEADERS)
|
||||
return m3u8_formats
|
||||
|
||||
|
||||
|
||||
+121
-33
@@ -1,45 +1,133 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .dreisat import DreiSatIE
|
||||
import re
|
||||
|
||||
from .youtube import YoutubeIE
|
||||
from .zdf import ZDFBaseIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PhoenixIE(DreiSatIE):
|
||||
class PhoenixIE(ZDFBaseIE):
|
||||
IE_NAME = 'phoenix.de'
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
|
||||
(?:
|
||||
phoenix/die_sendungen/(?:[^/]+/)?
|
||||
)?
|
||||
(?P<id>[0-9]+)'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.phoenix.de/content/884301',
|
||||
'md5': 'ed249f045256150c92e72dbb70eadec6',
|
||||
'info_dict': {
|
||||
'id': '884301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Michael Krons mit Hans-Werner Sinn',
|
||||
'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
|
||||
'upload_date': '20141025',
|
||||
'uploader': 'Im Dialog',
|
||||
}
|
||||
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
# Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
|
||||
'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
|
||||
'md5': '34ec321e7eb34231fd88616c65c92db0',
|
||||
'info_dict': {
|
||||
'id': '210222_phx_nachgehakt_corona_protest',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wohin führt der Protest in der Pandemie?',
|
||||
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
|
||||
'duration': 1691,
|
||||
'timestamp': 1613902500,
|
||||
'upload_date': '20210221',
|
||||
'uploader': 'Phoenix',
|
||||
'series': 'corona nachgehakt',
|
||||
'episode': 'Wohin führt der Protest in der Pandemie?',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Youtube embed
|
||||
'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
|
||||
'info_dict': {
|
||||
'id': 'hMQtqFYjomk',
|
||||
'ext': 'mp4',
|
||||
'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
|
||||
'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
|
||||
'duration': 3509,
|
||||
'upload_date': '20201219',
|
||||
'uploader': 'phoenix',
|
||||
'uploader_id': 'phoenix',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
|
||||
'only_matching': True,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# no media
|
||||
'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html
|
||||
'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
article_id = self._match_id(url)
|
||||
|
||||
internal_id = self._search_regex(
|
||||
r'<div class="phx_vod" id="phx_vod_([0-9]+)"',
|
||||
webpage, 'internal video ID')
|
||||
article = self._download_json(
|
||||
'https://www.phoenix.de/response/id/%s' % article_id, article_id,
|
||||
'Downloading article JSON')
|
||||
|
||||
api_url = 'http://www.phoenix.de/php/mediaplayer/data/beitrags_details.php?ak=web&id=%s' % internal_id
|
||||
return self.extract_from_xml_url(video_id, api_url)
|
||||
video = article['absaetze'][0]
|
||||
title = video.get('titel') or article.get('subtitel')
|
||||
|
||||
if video.get('typ') == 'video-youtube':
|
||||
video_id = video['id']
|
||||
return self.url_result(
|
||||
video_id, ie=YoutubeIE.ie_key(), video_id=video_id,
|
||||
video_title=title)
|
||||
|
||||
video_id = compat_str(video.get('basename') or video.get('content'))
|
||||
|
||||
details = self._download_json(
|
||||
'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
|
||||
video_id, 'Downloading details JSON', query={
|
||||
'ak': 'web',
|
||||
'ptmd': 'true',
|
||||
'id': video_id,
|
||||
'profile': 'player2',
|
||||
})
|
||||
|
||||
title = title or details['title']
|
||||
content_id = details['tracking']['nielsen']['content']['assetid']
|
||||
|
||||
info = self._extract_ptmd(
|
||||
'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
|
||||
content_id, None, url)
|
||||
|
||||
duration = int_or_none(try_get(
|
||||
details, lambda x: x['tracking']['nielsen']['content']['length']))
|
||||
timestamp = unified_timestamp(details.get('editorialDate'))
|
||||
series = try_get(
|
||||
details, lambda x: x['tracking']['nielsen']['content']['program'],
|
||||
compat_str)
|
||||
episode = title if details.get('contentType') == 'episode' else None
|
||||
|
||||
thumbnails = []
|
||||
teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
|
||||
for thumbnail_key, thumbnail_url in teaser_images.items():
|
||||
thumbnail_url = urljoin(url, thumbnail_url)
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||
if m:
|
||||
thumbnail['width'] = int(m.group(1))
|
||||
thumbnail['height'] = int(m.group(2))
|
||||
thumbnails.append(thumbnail)
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': details.get('leadParagraph'),
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'uploader': details.get('tvService'),
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
})
|
||||
|
||||
@@ -1,22 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
try_get,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PicartoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://picarto.tv/Setz',
|
||||
'info_dict': {
|
||||
@@ -34,65 +27,46 @@ class PicartoIE(InfoExtractor):
|
||||
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.picarto.tv/v1/channel/name/' + channel_id,
|
||||
channel_id)
|
||||
data = self._download_json(
|
||||
'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
|
||||
'query': '''{
|
||||
channel(name: "%s") {
|
||||
adult
|
||||
id
|
||||
online
|
||||
stream_name
|
||||
title
|
||||
}
|
||||
getLoadBalancerUrl(channel_name: "%s") {
|
||||
url
|
||||
}
|
||||
}''' % (channel_id, channel_id),
|
||||
})['data']
|
||||
metadata = data['channel']
|
||||
|
||||
if metadata.get('online') is False:
|
||||
if metadata.get('online') == 0:
|
||||
raise ExtractorError('Stream is offline', expected=True)
|
||||
title = metadata['title']
|
||||
|
||||
cdn_data = self._download_json(
|
||||
'https://picarto.tv/process/channel', channel_id,
|
||||
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
||||
note='Downloading load balancing info')
|
||||
data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
|
||||
channel_id, 'Downloading load balancing info')
|
||||
|
||||
token = mobj.group('token') or 'public'
|
||||
params = {
|
||||
'con': int(time.time() * 1000),
|
||||
'token': token,
|
||||
}
|
||||
|
||||
prefered_edge = cdn_data.get('preferedEdge')
|
||||
formats = []
|
||||
|
||||
for edge in cdn_data['edges']:
|
||||
edge_ep = edge.get('ep')
|
||||
if not edge_ep or not isinstance(edge_ep, compat_str):
|
||||
for source in (cdn_data.get('source') or []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
edge_id = edge.get('id')
|
||||
for tech in cdn_data['techs']:
|
||||
tech_label = tech.get('label')
|
||||
tech_type = tech.get('type')
|
||||
preference = 0
|
||||
if edge_id == prefered_edge:
|
||||
preference += 1
|
||||
format_id = []
|
||||
if edge_id:
|
||||
format_id.append(edge_id)
|
||||
if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
|
||||
format_id.append('hls')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
update_url_query(
|
||||
'https://%s/hls/%s/index.m3u8'
|
||||
% (edge_ep, channel_id), params),
|
||||
channel_id, 'mp4', preference=preference,
|
||||
m3u8_id='-'.join(format_id), fatal=False))
|
||||
continue
|
||||
elif tech_type == 'video/mp4' or tech_label == 'MP4':
|
||||
format_id.append('mp4')
|
||||
formats.append({
|
||||
'url': update_url_query(
|
||||
'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
|
||||
params),
|
||||
'format_id': '-'.join(format_id),
|
||||
'preference': preference,
|
||||
})
|
||||
else:
|
||||
# rtmp format does not seem to work
|
||||
continue
|
||||
source_type = source.get('type')
|
||||
if source_type == 'html5/application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'html5/video/mp4':
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
mature = metadata.get('adult')
|
||||
@@ -103,10 +77,10 @@ class PicartoIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': self._live_title(metadata.get('title') or channel_id),
|
||||
'title': self._live_title(title.strip()),
|
||||
'is_live': True,
|
||||
'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
|
||||
'channel': channel_id,
|
||||
'channel_id': metadata.get('id'),
|
||||
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
|
||||
@@ -31,6 +31,7 @@ class PinterestBaseIE(InfoExtractor):
|
||||
|
||||
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
duration = None
|
||||
if extract_formats:
|
||||
@@ -38,8 +39,9 @@ class PinterestBaseIE(InfoExtractor):
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('url'))
|
||||
if not format_url:
|
||||
if not format_url or format_url in urls:
|
||||
continue
|
||||
urls.append(format_url)
|
||||
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||
ext = determine_ext(format_url)
|
||||
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class PlayStuffIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a',
|
||||
'md5': 'c82d3669e5247c64bc382577843e5bd0',
|
||||
'info_dict': {
|
||||
'id': '6250584958001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga',
|
||||
'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913',
|
||||
'uploader_id': '6005208634001',
|
||||
'timestamp': 1619491027,
|
||||
'upload_date': '20210427',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
# geo restricted, bypassable
|
||||
'url': 'https://play.stuff.co.nz/details/_6155660351001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
state = self._parse_json(
|
||||
self._search_regex(
|
||||
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'),
|
||||
video_id)
|
||||
|
||||
account_id = try_get(
|
||||
state, lambda x: x['configurations']['accountId'],
|
||||
compat_str) or '6005208634001'
|
||||
player_id = try_get(
|
||||
state, lambda x: x['configurations']['playerId'],
|
||||
compat_str) or 'default'
|
||||
|
||||
entries = []
|
||||
for item_id, video in state['items'].items():
|
||||
if not isinstance(video, dict):
|
||||
continue
|
||||
asset_id = try_get(
|
||||
video, lambda x: x['content']['attributes']['assetId'],
|
||||
compat_str)
|
||||
if not asset_id:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id),
|
||||
{'geo_countries': ['NZ']}),
|
||||
'BrightcoveNew', video_id))
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
@@ -393,7 +393,7 @@ query viewClip {
|
||||
# To somewhat reduce the probability of these consequences
|
||||
# we will sleep random amount of time before each call to ViewClip.
|
||||
self._sleep(
|
||||
random.randint(2, 5), display_id,
|
||||
random.randint(5, 10), display_id,
|
||||
'%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
|
||||
|
||||
if not viewclip:
|
||||
|
||||
+198
-71
@@ -22,11 +22,16 @@ from ..utils import (
|
||||
orderedSet,
|
||||
remove_quotes,
|
||||
str_to_int,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PornHubBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'pornhub'
|
||||
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
def dl(*args, **kwargs):
|
||||
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
@@ -52,17 +57,79 @@ class PornHubBaseIE(InfoExtractor):
|
||||
|
||||
return webpage, urlh
|
||||
|
||||
def _real_initialize(self):
|
||||
self._logged_in = False
|
||||
|
||||
def _login(self, host):
|
||||
if self._logged_in:
|
||||
return
|
||||
|
||||
site = host.split('.')[0]
|
||||
|
||||
# Both sites pornhub and pornhubpremium have separate accounts
|
||||
# so there should be an option to provide credentials for both.
|
||||
# At the same time some videos are available under the same video id
|
||||
# on both sites so that we have to identify them as the same video.
|
||||
# For that purpose we have to keep both in the same extractor
|
||||
# but under different netrc machines.
|
||||
username, password = self._get_login_info(netrc_machine=site)
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
|
||||
login_page = self._download_webpage(
|
||||
login_url, None, 'Downloading %s login page' % site)
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'class=["\']signOut',
|
||||
r'>Sign\s+[Oo]ut\s*<'))
|
||||
|
||||
if is_logged(login_page):
|
||||
self._logged_in = True
|
||||
return
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.%s/front/authenticate' % host, None,
|
||||
'Logging in to %s' % site,
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': login_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
|
||||
if response.get('success') == '1':
|
||||
self._logged_in = True
|
||||
return
|
||||
|
||||
message = response.get('message')
|
||||
if message is not None:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % message, expected=True)
|
||||
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class PornHubIE(PornHubBaseIE):
|
||||
IE_DESC = 'PornHub and Thumbzilla'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:[^/]+\.)?
|
||||
%s
|
||||
/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:www\.)?thumbzilla\.com/video/
|
||||
)
|
||||
(?P<id>[\da-z]+)
|
||||
'''
|
||||
''' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
|
||||
@@ -103,6 +170,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
|
||||
}, {
|
||||
# subtitles
|
||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
||||
@@ -163,12 +231,27 @@ class PornHubIE(PornHubBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Some videos are available with the same id on both premium
|
||||
# and non-premium sites (e.g. this and the following test)
|
||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
|
||||
webpage)
|
||||
|
||||
def _extract_count(self, pattern, webpage, name):
|
||||
@@ -180,12 +263,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
host = mobj.group('host') or 'pornhub.com'
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if 'premium' in host:
|
||||
if not self._downloader.params.get('cookiefile'):
|
||||
raise ExtractorError(
|
||||
'PornHub Premium requires authentication.'
|
||||
' You may want to use --cookies.',
|
||||
expected=True)
|
||||
self._login(host)
|
||||
|
||||
self._set_cookie(host, 'age_verified', '1')
|
||||
|
||||
@@ -198,7 +276,8 @@ class PornHubIE(PornHubBaseIE):
|
||||
webpage = dl_webpage('pc')
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
(r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
|
||||
webpage, 'error message', default=None, group='error')
|
||||
if error_msg:
|
||||
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||
@@ -206,6 +285,11 @@ class PornHubIE(PornHubBaseIE):
|
||||
'PornHub said: %s' % error_msg,
|
||||
expected=True, video_id=video_id)
|
||||
|
||||
if any(re.search(p, webpage) for p in (
|
||||
r'class=["\']geoBlocked["\']',
|
||||
r'>\s*This content is unavailable in your country')):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||
# on that anymore.
|
||||
@@ -327,35 +411,49 @@ class PornHubIE(PornHubBaseIE):
|
||||
|
||||
upload_date = None
|
||||
formats = []
|
||||
|
||||
def add_format(format_url, height=None):
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
return
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
return
|
||||
if not height:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
|
||||
default=None))
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
for video_url, height in video_urls:
|
||||
if not upload_date:
|
||||
upload_date = self._search_regex(
|
||||
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
||||
if upload_date:
|
||||
upload_date = upload_date.replace('/', '')
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
if '/video/get_media' in video_url:
|
||||
medias = self._download_json(video_url, video_id, fatal=False)
|
||||
if isinstance(medias, list):
|
||||
for media in medias:
|
||||
if not isinstance(media, dict):
|
||||
continue
|
||||
video_url = url_or_none(media.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
height = int_or_none(media.get('quality'))
|
||||
add_format(video_url, height)
|
||||
continue
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
tbr = None
|
||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
|
||||
if mobj:
|
||||
if not height:
|
||||
height = int(mobj.group('height'))
|
||||
tbr = int(mobj.group('tbr'))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
add_format(video_url)
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'fps', 'format_id'))
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
@@ -405,6 +503,10 @@ class PornHubIE(PornHubBaseIE):
|
||||
|
||||
|
||||
class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||
def _extract_page(self, url):
|
||||
return int_or_none(self._search_regex(
|
||||
r'\bpage=(\d+)', url, 'page', default=None))
|
||||
|
||||
def _extract_entries(self, webpage, host):
|
||||
# Only process container div with main playlist content skipping
|
||||
# drop-down menu that uses similar pattern for videos (see
|
||||
@@ -422,29 +524,9 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||
container))
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = self._extract_entries(webpage, host)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
|
||||
'playlist', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
title = playlist.get('title') or self._search_regex(
|
||||
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, title, playlist.get('description'))
|
||||
|
||||
|
||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||
'playlist_mincount': 118,
|
||||
@@ -463,14 +545,30 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Unavailable via /videos page, but available with direct pagination
|
||||
# on pornstar page (see [1]), requires premium
|
||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
|
||||
'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Same as before, multi page
|
||||
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('id')
|
||||
videos_url = '%s/videos' % mobj.group('url')
|
||||
page = self._extract_page(url)
|
||||
if page:
|
||||
videos_url = update_url_query(videos_url, {'page': page})
|
||||
return self.url_result(
|
||||
'%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
|
||||
video_id=user_id)
|
||||
videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
|
||||
|
||||
|
||||
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
@@ -483,36 +581,59 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
<button[^>]+\bid=["\']moreDataBtn
|
||||
''', webpage) is not None
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
item_id = mobj.group('id')
|
||||
def _entries(self, url, host, item_id):
|
||||
page = self._extract_page(url)
|
||||
|
||||
page = int_or_none(self._search_regex(
|
||||
r'\bpage=(\d+)', url, 'page', default=None))
|
||||
VIDEOS = '/videos'
|
||||
|
||||
entries = []
|
||||
for page_num in (page, ) if page is not None else itertools.count(1):
|
||||
def download_page(base_url, num, fallback=False):
|
||||
note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
|
||||
return self._download_webpage(
|
||||
base_url, item_id, note, query={'page': num})
|
||||
|
||||
def is_404(e):
|
||||
return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
|
||||
|
||||
base_url = url
|
||||
has_page = page is not None
|
||||
first_page = page if has_page else 1
|
||||
for page_num in (first_page, ) if has_page else itertools.count(first_page):
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
url, item_id, 'Downloading page %d' % page_num,
|
||||
query={'page': page_num})
|
||||
try:
|
||||
webpage = download_page(base_url, page_num)
|
||||
except ExtractorError as e:
|
||||
# Some sources may not be available via /videos page,
|
||||
# trying to fallback to main page pagination (see [1])
|
||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
|
||||
if is_404(e) and page_num == first_page and VIDEOS in base_url:
|
||||
base_url = base_url.replace(VIDEOS, '')
|
||||
webpage = download_page(base_url, page_num, fallback=True)
|
||||
else:
|
||||
raise
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
if is_404(e) and page_num != first_page:
|
||||
break
|
||||
raise
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
if not page_entries:
|
||||
break
|
||||
entries.extend(page_entries)
|
||||
for e in page_entries:
|
||||
yield e
|
||||
if not self._has_more(webpage):
|
||||
break
|
||||
|
||||
return self.playlist_result(orderedSet(entries), item_id)
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
item_id = mobj.group('id')
|
||||
|
||||
self._login(host)
|
||||
|
||||
return self.playlist_result(self._entries(url, host, item_id), item_id)
|
||||
|
||||
|
||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
||||
'only_matching': True,
|
||||
@@ -617,6 +738,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
}, {
|
||||
'url': 'https://de.pornhub.com/playlist/4667351',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -627,7 +751,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
|
||||
|
||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||
'info_dict': {
|
||||
@@ -637,4 +761,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -15,17 +15,17 @@ class RDSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
|
||||
# has two 9c9media ContentPackages, the web player selects the first ContentPackage
|
||||
'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606',
|
||||
'info_dict': {
|
||||
'id': '604333',
|
||||
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
|
||||
'id': '2083309',
|
||||
'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande',
|
||||
'ext': 'flv',
|
||||
'title': 'Fowler Jr. prend la direction de Jacksonville',
|
||||
'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
|
||||
'timestamp': 1430397346,
|
||||
'upload_date': '20150430',
|
||||
'duration': 154.354,
|
||||
'age_limit': 0,
|
||||
'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande',
|
||||
'description': 'md5:83fa38ecc4a79b19e433433254077f25',
|
||||
'timestamp': 1606129030,
|
||||
'upload_date': '20201123',
|
||||
'duration': 773.039,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
|
||||
|
||||
@@ -133,8 +133,10 @@ class RedBullEmbedIE(RedBullTVIE):
|
||||
rrn_id = self._match_id(url)
|
||||
asset_id = self._download_json(
|
||||
'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql',
|
||||
rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'},
|
||||
query={
|
||||
rrn_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'API-KEY': 'e90a1ff11335423998b100c929ecc866',
|
||||
}, query={
|
||||
'query': '''{
|
||||
resource(id: "%s", enforceGeoBlocking: false) {
|
||||
%s
|
||||
|
||||
@@ -6,11 +6,12 @@ import re
|
||||
from .srgssr import SRGSSRIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
determine_ext,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -21,7 +22,7 @@ class RTSIE(SRGSSRIE):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
||||
'md5': 'ff7f8450a90cf58dacb64e29707b4a8e',
|
||||
'md5': '753b877968ad8afaeddccc374d4256a5',
|
||||
'info_dict': {
|
||||
'id': '3449373',
|
||||
'display_id': 'les-enfants-terribles',
|
||||
@@ -35,6 +36,7 @@ class RTSIE(SRGSSRIE):
|
||||
'thumbnail': r're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
|
||||
@@ -63,11 +65,12 @@ class RTSIE(SRGSSRIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
|
||||
'skip': 'Blocked outside Switzerland',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
|
||||
'md5': '1bae984fe7b1f78e94abc74e802ed99f',
|
||||
'md5': '9bb06503773c07ce83d3cbd793cebb91',
|
||||
'info_dict': {
|
||||
'id': '5745356',
|
||||
'display_id': 'londres-cachee-par-un-epais-smog',
|
||||
@@ -81,6 +84,7 @@ class RTSIE(SRGSSRIE):
|
||||
'thumbnail': r're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
|
||||
@@ -160,7 +164,7 @@ class RTSIE(SRGSSRIE):
|
||||
media_type = 'video' if 'video' in all_info else 'audio'
|
||||
|
||||
# check for errors
|
||||
self.get_media_data('rts', media_type, media_id)
|
||||
self._get_media_data('rts', media_type, media_id)
|
||||
|
||||
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
|
||||
|
||||
@@ -194,6 +198,7 @@ class RTSIE(SRGSSRIE):
|
||||
'tbr': extract_bitrate(format_url),
|
||||
})
|
||||
|
||||
download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '')
|
||||
for media in info.get('media', []):
|
||||
media_url = media.get('url')
|
||||
if not media_url or re.match(r'https?://', media_url):
|
||||
@@ -205,7 +210,7 @@ class RTSIE(SRGSSRIE):
|
||||
format_id += '-%dk' % rate
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': 'http://download-video.rts.ch/' + media_url,
|
||||
'url': urljoin(download_base, media_url),
|
||||
'tbr': rate or extract_bitrate(media_url),
|
||||
})
|
||||
|
||||
|
||||
+104
-128
@@ -2,8 +2,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
import re
|
||||
import time
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -14,56 +15,13 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
)
|
||||
|
||||
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = compat_b64decode(png)
|
||||
text_index = encrypted_data.find(b'tEXt')
|
||||
text_chunk = encrypted_data[text_index - 4:]
|
||||
length = compat_struct_unpack('!I', text_chunk[:4])[0]
|
||||
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
||||
data = bytearray(text_chunk[8:8 + length])
|
||||
data = [chr(b) for b in data if b != 0]
|
||||
hash_index = data.index('#')
|
||||
alphabet_data = data[:hash_index]
|
||||
url_data = data[hash_index + 1:]
|
||||
if url_data[0] == 'H' and url_data[3] == '%':
|
||||
# remove useless HQ%% at the start
|
||||
url_data = url_data[4:]
|
||||
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for l in alphabet_data:
|
||||
if d == 0:
|
||||
alphabet.append(l)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for letter in url_data:
|
||||
if f == 0:
|
||||
l = int(letter) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(letter)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
|
||||
return url
|
||||
_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
|
||||
|
||||
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
@@ -79,28 +37,31 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||
'duration': 5024.566,
|
||||
'series': 'Balonmano',
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}, {
|
||||
'note': 'Live stream',
|
||||
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||
'info_dict': {
|
||||
'id': '1694255',
|
||||
'ext': 'flv',
|
||||
'title': 'TODO',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
},
|
||||
'skip': 'The f4m manifest can\'t be used yet',
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||
'md5': 'e55e162379ad587e9640eda4f7353c0f',
|
||||
'md5': 'd850f3c8731ea53952ebab489cf81cbf',
|
||||
'info_dict': {
|
||||
'id': '4236788',
|
||||
'ext': 'mp4',
|
||||
'title': 'Servir y proteger - Capítulo 104 ',
|
||||
'title': 'Servir y proteger - Capítulo 104',
|
||||
'duration': 3222.0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||
'only_matching': True,
|
||||
@@ -111,58 +72,102 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
|
||||
def _real_initialize(self):
|
||||
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
||||
manager_info = self._download_json(
|
||||
self._manager = self._download_json(
|
||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||
None, 'Fetching manager info')
|
||||
self._manager = manager_info['manager']
|
||||
None, 'Fetching manager info')['manager']
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
|
||||
while True:
|
||||
length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
|
||||
chunk_type = encrypted_data.read(4)
|
||||
if chunk_type == b'IEND':
|
||||
break
|
||||
data = encrypted_data.read(length)
|
||||
if chunk_type == b'tEXt':
|
||||
alphabet_data, text = data.split(b'\0')
|
||||
quality, url_data = text.split(b'%%')
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for l in _bytes_to_chr(alphabet_data):
|
||||
if d == 0:
|
||||
alphabet.append(l)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for letter in _bytes_to_chr(url_data):
|
||||
if f == 0:
|
||||
l = int(letter) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(letter)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
|
||||
yield quality.decode(), url
|
||||
encrypted_data.read(4) # CRC
|
||||
|
||||
def _extract_png_formats(self, video_id):
|
||||
png = self._download_webpage(
|
||||
'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
|
||||
video_id, 'Downloading url information', query={'q': 'v2'})
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
formats = []
|
||||
for quality, video_url in self._decrypt_url(png):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, 'dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': video_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||
video_id)['page']['items'][0]
|
||||
if info['state'] == 'DESPU':
|
||||
raise ExtractorError('The video is no longer available', expected=True)
|
||||
title = info['title']
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
||||
png_request = sanitized_Request(png_url)
|
||||
png_request.add_header('Referer', url)
|
||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
formats = []
|
||||
if not video_url.endswith('.f4m') and ext != 'm3u8':
|
||||
if '?' not in video_url:
|
||||
video_url = video_url.replace('resources/', 'auth/resources/')
|
||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
title = info['title'].strip()
|
||||
formats = self._extract_png_formats(video_id)
|
||||
|
||||
subtitles = None
|
||||
if info.get('sbtFile') is not None:
|
||||
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
|
||||
sbt_file = info.get('sbtFile')
|
||||
if sbt_file:
|
||||
subtitles = self.extract_subtitles(video_id, sbt_file)
|
||||
|
||||
is_live = info.get('live') is True
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('image'),
|
||||
'page_url': url,
|
||||
'subtitles': subtitles,
|
||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
'is_live': is_live,
|
||||
'series': info.get('programTitle'),
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, sub_file):
|
||||
@@ -174,48 +179,26 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
for s in subs)
|
||||
|
||||
|
||||
class RTVEInfantilIE(InfoExtractor):
|
||||
class RTVEInfantilIE(RTVEALaCartaIE):
|
||||
IE_NAME = 'rtve.es:infantil'
|
||||
IE_DESC = 'RTVE infantil'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||
'md5': '915319587b33720b8e0357caaa6617e6',
|
||||
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
|
||||
'info_dict': {
|
||||
'id': '3040283',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maneras de vivir',
|
||||
'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
|
||||
'thumbnail': r're:https?://.+/1426182947956\.JPG',
|
||||
'duration': 357.958,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||
video_id)['page']['items'][0]
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vidplayer_id = self._search_regex(
|
||||
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'thumbnail': info.get('image'),
|
||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
||||
}
|
||||
|
||||
|
||||
class RTVELiveIE(InfoExtractor):
|
||||
class RTVELiveIE(RTVEALaCartaIE):
|
||||
IE_NAME = 'rtve.es:live'
|
||||
IE_DESC = 'RTVE.es live streams'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
||||
@@ -225,7 +208,7 @@ class RTVELiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'la-1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
@@ -234,29 +217,22 @@ class RTVELiveIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
start_time = time.gmtime()
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
||||
title = remove_start(title, 'Estoy viendo ')
|
||||
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
||||
|
||||
vidplayer_id = self._search_regex(
|
||||
(r'playerId=player([0-9]+)',
|
||||
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
||||
r'data-id=["\'](\d+)'),
|
||||
webpage, 'internal video ID')
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
m3u8_url = _decrypt_url(png)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'title': self._live_title(title),
|
||||
'formats': self._extract_png_formats(vidplayer_id),
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SampleFocusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar',
|
||||
'md5': '48c8d62d60be467293912e0e619a5120',
|
||||
'info_dict': {
|
||||
'id': '40316',
|
||||
'display_id': 'lil-peep-sad-emo-guitar',
|
||||
'ext': 'mp3',
|
||||
'title': 'Lil Peep Sad Emo Guitar',
|
||||
'thumbnail': r're:^https?://.+\.png',
|
||||
'license': 'Standard License',
|
||||
'uploader': 'CapsCtrl',
|
||||
'uploader_id': 'capsctrl',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['Samples', 'Guitar', 'Electric guitar'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://samplefocus.com/samples/dababy-style-bass-808',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://samplefocus.com/samples/young-chop-kick',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
sample_id = self._search_regex(
|
||||
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
|
||||
webpage, 'sample id', group='id')
|
||||
|
||||
title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(
|
||||
r'<h1>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
mp3_url = self._search_regex(
|
||||
r'<input[^>]+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P<url>(?:(?!\2).)+)',
|
||||
webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex(
|
||||
r'<meta[^>]+itemprop=(["\'])contentUrl\1[^>]*>',
|
||||
webpage, 'mp3 url', group=0))['content']
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex(
|
||||
r'<img[^>]+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P<url>(?:(?!\1).)+)',
|
||||
webpage, 'mp3', fatal=False, group='url')
|
||||
|
||||
comments = []
|
||||
for author_id, author, body in re.findall(r'(?s)<p[^>]+class="comment-author"><a[^>]+href="/users/([^"]+)">([^"]+)</a>.+?<p[^>]+class="comment-body">([^>]+)</p>', webpage):
|
||||
comments.append({
|
||||
'author': author,
|
||||
'author_id': author_id,
|
||||
'text': body,
|
||||
})
|
||||
|
||||
uploader_id = uploader = None
|
||||
mobj = re.search(r'>By <a[^>]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage)
|
||||
if mobj:
|
||||
uploader_id, uploader = mobj.groups()
|
||||
|
||||
breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage)
|
||||
categories = []
|
||||
if breadcrumb:
|
||||
for _, name in re.findall(r'<span[^>]+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb):
|
||||
categories.append(name)
|
||||
|
||||
def extract_count(klass):
|
||||
return int_or_none(self._html_search_regex(
|
||||
r'<span[^>]+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass,
|
||||
webpage, klass, fatal=False))
|
||||
|
||||
return {
|
||||
'id': sample_id,
|
||||
'title': title,
|
||||
'url': mp3_url,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'license': self._html_search_regex(
|
||||
r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<',
|
||||
webpage, 'license', fatal=False, group='license'),
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': extract_count('sample-%s-favorites' % sample_id),
|
||||
'comment_count': extract_count('comments'),
|
||||
'comments': comments,
|
||||
'categories': categories,
|
||||
}
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
class SBSIE(InfoExtractor):
|
||||
IE_DESC = 'sbs.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=|/watch/)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Original URL is handled by the generic IE which finds the iframe:
|
||||
@@ -43,6 +43,9 @@ class SBSIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -2,12 +2,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ScreencastOMaticIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
|
||||
'md5': '483583cb80d92588f15ccbedd90f0c18',
|
||||
'info_dict': {
|
||||
@@ -16,22 +22,30 @@ class ScreencastOMaticIE(InfoExtractor):
|
||||
'title': 'Welcome to 3-4 Philosophy @ DECV!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
|
||||
'duration': 369.163,
|
||||
'duration': 369,
|
||||
'upload_date': '20141216',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
jwplayer_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
|
||||
info_dict.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
webpage = self._download_webpage(
|
||||
'https://screencast-o-matic.com/player/' + video_id, video_id)
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': get_element_by_class('overlayTitle', webpage),
|
||||
'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None,
|
||||
'duration': int_or_none(self._search_regex(
|
||||
r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};',
|
||||
webpage, 'duration', default=None)),
|
||||
'upload_date': unified_strdate(remove_start(
|
||||
get_element_by_class('overlayPublished', webpage), 'Published: ')),
|
||||
})
|
||||
return info_dict
|
||||
return info
|
||||
|
||||
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
class ShahidBaseIE(AWSIE):
|
||||
_AWS_PROXY_HOST = 'api2.shahid.net'
|
||||
_AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
|
||||
_VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/'
|
||||
|
||||
def _handle_error(self, e):
|
||||
fail_data = self._parse_json(
|
||||
@@ -49,15 +50,18 @@ class ShahidBaseIE(AWSIE):
|
||||
|
||||
class ShahidIE(ShahidBaseIE):
|
||||
_NETRC_MACHINE = 'shahid'
|
||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
|
||||
_VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
|
||||
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
|
||||
'info_dict': {
|
||||
'id': '275286',
|
||||
'id': '816924',
|
||||
'ext': 'mp4',
|
||||
'title': 'مجلس الشباب الموسم 1 كليب 1',
|
||||
'timestamp': 1506988800,
|
||||
'upload_date': '20171003',
|
||||
'title': 'متحف الدحيح الموسم 1 كليب 1',
|
||||
'timestamp': 1602806400,
|
||||
'upload_date': '20201016',
|
||||
'description': 'برومو',
|
||||
'duration': 22,
|
||||
'categories': ['كوميديا'],
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -70,6 +74,9 @@ class ShahidIE(ShahidBaseIE):
|
||||
# shahid plus subscriber only
|
||||
'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -109,12 +116,15 @@ class ShahidIE(ShahidBaseIE):
|
||||
page_type = 'episode'
|
||||
|
||||
playout = self._call_api(
|
||||
'playout/url/' + video_id, video_id)['playout']
|
||||
'playout/new/url/' + video_id, video_id)['playout']
|
||||
|
||||
if playout.get('drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
|
||||
formats = self._extract_m3u8_formats(re.sub(
|
||||
# https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
|
||||
r'aws\.manifestfilter=[\w:;,-]+&?',
|
||||
'', playout['url']), video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
# video = self._call_api(
|
||||
@@ -162,7 +172,7 @@ class ShahidIE(ShahidBaseIE):
|
||||
|
||||
|
||||
class ShahidShowIE(ShahidBaseIE):
|
||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
|
||||
_VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
|
||||
'info_dict': {
|
||||
|
||||
@@ -86,10 +86,10 @@ class SharedIE(SharedBaseIE):
|
||||
|
||||
class VivoIE(SharedBaseIE):
|
||||
IE_DESC = 'vivo.sx'
|
||||
_VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})'
|
||||
_VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})'
|
||||
_FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://vivo.sx/d7ddda0e78',
|
||||
'md5': '15b3af41be0b4fe01f4df075c2678b2c',
|
||||
'info_dict': {
|
||||
@@ -98,7 +98,10 @@ class VivoIE(SharedBaseIE):
|
||||
'title': 'Chicken',
|
||||
'filesize': 515659,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vivo.st/d7ddda0e78',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_title(self, webpage):
|
||||
title = self._html_search_regex(
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class SimplecastBaseIE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
_API_BASE = 'https://api.simplecast.com/'
|
||||
|
||||
def _call_api(self, path_tmpl, video_id):
|
||||
return self._download_json(
|
||||
self._API_BASE + path_tmpl % video_id, video_id)
|
||||
|
||||
def _call_search_api(self, resource, resource_id, resource_url):
|
||||
return self._download_json(
|
||||
'https://api.simplecast.com/%ss/search' % resource, resource_id,
|
||||
data=urlencode_postdata({'url': resource_url}))
|
||||
|
||||
def _parse_episode(self, episode):
|
||||
episode_id = episode['id']
|
||||
title = episode['title'].strip()
|
||||
audio_file = episode.get('audio_file') or {}
|
||||
audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url']
|
||||
|
||||
season = episode.get('season') or {}
|
||||
season_href = season.get('href')
|
||||
season_id = None
|
||||
if season_href:
|
||||
season_id = self._search_regex(
|
||||
r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX,
|
||||
season_href, 'season id', default=None)
|
||||
|
||||
webpage_url = episode.get('episode_url')
|
||||
channel_url = None
|
||||
if webpage_url:
|
||||
channel_url = self._search_regex(
|
||||
r'(https?://[^/]+\.simplecast\.com)',
|
||||
webpage_url, 'channel url', default=None)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'display_id': episode.get('slug'),
|
||||
'title': title,
|
||||
'url': clean_podcast_url(audio_file_url),
|
||||
'webpage_url': webpage_url,
|
||||
'channel_url': channel_url,
|
||||
'series': try_get(episode, lambda x: x['podcast']['title']),
|
||||
'season_number': int_or_none(season.get('number')),
|
||||
'season_id': season_id,
|
||||
'thumbnail': episode.get('image_url'),
|
||||
'episode_id': episode_id,
|
||||
'episode_number': int_or_none(episode.get('number')),
|
||||
'description': strip_or_none(episode.get('description')),
|
||||
'timestamp': parse_iso8601(episode.get('published_at')),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')),
|
||||
}
|
||||
|
||||
|
||||
class SimplecastIE(SimplecastBaseIE):
|
||||
IE_NAME = 'simplecast'
|
||||
_VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
|
||||
_COMMON_TEST_INFO = {
|
||||
'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
|
||||
'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'ext': 'mp3',
|
||||
'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
|
||||
'episode_number': 1,
|
||||
'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
|
||||
'season_number': 1,
|
||||
'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
|
||||
'series': 'The RE:BIND.io Podcast',
|
||||
'duration': 5343,
|
||||
'timestamp': 1580979475,
|
||||
'upload_date': '20200206',
|
||||
'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
|
||||
'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$',
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'md5': '8c93be7be54251bf29ee97464eabe61c',
|
||||
'info_dict': _COMMON_TEST_INFO,
|
||||
}, {
|
||||
'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'''(?x)<iframe[^>]+src=["\']
|
||||
(
|
||||
https?://(?:embed\.simplecast\.com/[0-9a-f]{8}|
|
||||
player\.simplecast\.com/%s
|
||||
))''' % SimplecastBaseIE._UUID_REGEX, webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api('episodes/%s', episode_id)
|
||||
return self._parse_episode(episode)
|
||||
|
||||
|
||||
class SimplecastEpisodeIE(SimplecastBaseIE):
|
||||
IE_NAME = 'simplecast:episode'
|
||||
_VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
|
||||
'md5': '8c93be7be54251bf29ee97464eabe61c',
|
||||
'info_dict': SimplecastIE._COMMON_TEST_INFO,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
episode = self._call_search_api(
|
||||
'episode', mobj.group(1), mobj.group(0))
|
||||
return self._parse_episode(episode)
|
||||
|
||||
|
||||
class SimplecastPodcastIE(SimplecastBaseIE):
|
||||
IE_NAME = 'simplecast:podcast'
|
||||
_VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://the-re-bind-io-podcast.simplecast.com',
|
||||
'playlist_mincount': 33,
|
||||
'info_dict': {
|
||||
'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c',
|
||||
'title': 'The RE:BIND.io Podcast',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
subdomain = self._match_id(url)
|
||||
site = self._call_search_api('site', subdomain, url)
|
||||
podcast = site['podcast']
|
||||
podcast_id = podcast['id']
|
||||
podcast_title = podcast.get('title')
|
||||
|
||||
def entries():
|
||||
episodes = self._call_api('podcasts/%s/episodes', podcast_id)
|
||||
for episode in (episodes.get('collection') or []):
|
||||
info = self._parse_episode(episode)
|
||||
info['series'] = podcast_title
|
||||
yield info
|
||||
|
||||
return self.playlist_result(entries(), podcast_id, podcast_title)
|
||||
@@ -6,9 +6,9 @@ from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
class SouthParkIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'southpark.cc.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
|
||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
|
||||
@@ -23,8 +23,20 @@ class SouthParkIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.southparkstudios.com/episodes/h4o269/south-park-stunning-and-brave-season-19-ep-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'accountOverride': 'intl.mtvi.com',
|
||||
'arcEp': 'shared.southpark.global',
|
||||
'ep': '90877963',
|
||||
'imageEp': 'shared.southpark.global',
|
||||
'mgid': uri,
|
||||
}
|
||||
|
||||
|
||||
class SouthParkEsIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.cc.com:español'
|
||||
|
||||
@@ -20,9 +20,6 @@ class BellatorIE(MTVServicesInfoExtractor):
|
||||
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._extract_triforce_mgid(webpage)
|
||||
|
||||
|
||||
class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
|
||||
@@ -46,16 +43,6 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'arcEp': 'paramountnetwork.com',
|
||||
'imageEp': 'paramountnetwork.com',
|
||||
'mgid': uri,
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
root_data = self._parse_json(self._search_regex(
|
||||
r'window\.__DATA__\s*=\s*({.+})',
|
||||
webpage, 'data'), None)
|
||||
|
||||
def find_sub_data(data, data_type):
|
||||
return next(c for c in data['children'] if c.get('type') == data_type)
|
||||
|
||||
c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
|
||||
return c['props']['media']['video']['config']['uri']
|
||||
|
||||
@@ -1,82 +1,105 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class SportDeutschlandIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
|
||||
'info_dict': {
|
||||
'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
|
||||
'id': '5318cac0275701382770543d7edaf0a0',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
|
||||
'categories': ['Badminton-Deutschland'],
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': int,
|
||||
'upload_date': '20200201',
|
||||
'description': 're:.*', # meaningless description for THIS video
|
||||
'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
|
||||
'duration': 16106.36,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
|
||||
'info_dict': {
|
||||
'id': 'c6e2fdd01f63013854c47054d2ab776f',
|
||||
'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
|
||||
'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
|
||||
'duration': 31397,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
sport_id = mobj.group('sport')
|
||||
|
||||
api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
|
||||
sport_id, video_id)
|
||||
req = sanitized_Request(api_url, headers={
|
||||
'Accept': 'application/vnd.vidibus.v2.html+json',
|
||||
'Referer': url,
|
||||
})
|
||||
data = self._download_json(req, video_id)
|
||||
|
||||
display_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
|
||||
display_id, query={'access_token': 'true'})
|
||||
asset = data['asset']
|
||||
categories = [data['section']['title']]
|
||||
|
||||
formats = []
|
||||
smil_url = asset['video']
|
||||
if '.smil' in smil_url:
|
||||
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
|
||||
|
||||
smil_doc = self._download_xml(
|
||||
smil_url, video_id, note='Downloading SMIL metadata')
|
||||
base_url_el = smil_doc.find('./head/meta')
|
||||
if base_url_el:
|
||||
base_url = base_url_el.attrib['base']
|
||||
formats.extend([{
|
||||
'format_id': 'rmtp',
|
||||
'url': base_url if base_url_el else n.attrib['src'],
|
||||
'play_path': n.attrib['src'],
|
||||
'ext': 'flv',
|
||||
'preference': -100,
|
||||
'format_note': 'Seems to fail at example stream',
|
||||
} for n in smil_doc.findall('./body/video')])
|
||||
else:
|
||||
formats.append({'url': smil_url})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': asset['title'],
|
||||
'thumbnail': asset.get('image'),
|
||||
'description': asset.get('teaser'),
|
||||
'duration': asset.get('duration'),
|
||||
'categories': categories,
|
||||
'view_count': asset.get('views'),
|
||||
'rtmp_live': asset.get('live'),
|
||||
'timestamp': parse_iso8601(asset.get('date')),
|
||||
title = (asset.get('title') or asset['label']).strip()
|
||||
asset_id = asset.get('id') or asset.get('uuid')
|
||||
info = {
|
||||
'id': asset_id,
|
||||
'title': title,
|
||||
'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
|
||||
'duration': int_or_none(asset.get('seconds')),
|
||||
}
|
||||
videos = asset.get('videos') or []
|
||||
if len(videos) > 1:
|
||||
playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0]
|
||||
if playlist_id:
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
videos = [videos[int(playlist_id)]]
|
||||
self.to_screen('Downloading just a single video because of --no-playlist')
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id)
|
||||
|
||||
def entries():
|
||||
for i, video in enumerate(videos, 1):
|
||||
video_id = video.get('uuid')
|
||||
video_url = video.get('url')
|
||||
if not (video_id and video_url):
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
|
||||
if not formats:
|
||||
continue
|
||||
yield {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
}
|
||||
info.update({
|
||||
'_type': 'multi_video',
|
||||
'entries': entries(),
|
||||
})
|
||||
else:
|
||||
formats = self._extract_m3u8_formats(
|
||||
videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
|
||||
section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
|
||||
info.update({
|
||||
'formats': formats,
|
||||
'display_id': asset.get('permalink'),
|
||||
'thumbnail': try_get(asset, lambda x: x['images'][0]),
|
||||
'categories': [section_title] if section_title else None,
|
||||
'view_count': int_or_none(asset.get('views')),
|
||||
'is_live': asset.get('is_live') is True,
|
||||
'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -0,0 +1,156 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SpotifyBaseIE(InfoExtractor):
|
||||
_ACCESS_TOKEN = None
|
||||
_OPERATION_HASHES = {
|
||||
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
|
||||
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
|
||||
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
|
||||
}
|
||||
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._ACCESS_TOKEN = self._download_json(
|
||||
'https://open.spotify.com/get_access_token', None)['accessToken']
|
||||
|
||||
def _call_api(self, operation, video_id, variables):
|
||||
return self._download_json(
|
||||
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
|
||||
'operationName': 'query' + operation,
|
||||
'variables': json.dumps(variables),
|
||||
'extensions': json.dumps({
|
||||
'persistedQuery': {
|
||||
'sha256Hash': self._OPERATION_HASHES[operation],
|
||||
},
|
||||
})
|
||||
}, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
|
||||
|
||||
def _extract_episode(self, episode, series):
|
||||
episode_id = episode['id']
|
||||
title = episode['name'].strip()
|
||||
|
||||
formats = []
|
||||
audio_preview = episode.get('audioPreview') or {}
|
||||
audio_preview_url = audio_preview.get('url')
|
||||
if audio_preview_url:
|
||||
f = {
|
||||
'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
audio_preview_format = audio_preview.get('format')
|
||||
if audio_preview_format:
|
||||
f['format_id'] = audio_preview_format
|
||||
mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
|
||||
if mobj:
|
||||
f.update({
|
||||
'abr': int(mobj.group(2)),
|
||||
'ext': mobj.group(1).lower(),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
for item in (try_get(episode, lambda x: x['audio']['items']) or []):
|
||||
item_url = item.get('url')
|
||||
if not (item_url and item.get('externallyHosted')):
|
||||
continue
|
||||
formats.append({
|
||||
'url': clean_podcast_url(item_url),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': strip_or_none(episode.get('description')),
|
||||
'duration': float_or_none(try_get(
|
||||
episode, lambda x: x['duration']['totalMilliseconds']), 1000),
|
||||
'release_date': unified_strdate(try_get(
|
||||
episode, lambda x: x['releaseDate']['isoString'])),
|
||||
'series': series,
|
||||
}
|
||||
|
||||
|
||||
class SpotifyIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
|
||||
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
|
||||
'info_dict': {
|
||||
'id': '4Z7GAJ50bgctf6uclHlWKo',
|
||||
'ext': 'mp3',
|
||||
'title': 'From the archive: Why time management is ruining our lives',
|
||||
'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
|
||||
'duration': 2083.605,
|
||||
'release_date': '20201217',
|
||||
'series': "The Guardian's Audio Long Reads",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api('Episode', episode_id, {
|
||||
'uri': 'spotify:episode:' + episode_id
|
||||
})['episode']
|
||||
return self._extract_episode(
|
||||
episode, try_get(episode, lambda x: x['podcast']['name']))
|
||||
|
||||
|
||||
class SpotifyShowIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify:show'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
|
||||
'info_dict': {
|
||||
'id': '4PM9Ke6l66IRNpottHKV9M',
|
||||
'title': 'The Story from the Guardian',
|
||||
'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
podcast = self._call_api('ShowEpisodes', show_id, {
|
||||
'limit': 1000000000,
|
||||
'offset': 0,
|
||||
'uri': 'spotify:show:' + show_id,
|
||||
})['podcast']
|
||||
podcast_name = podcast.get('name')
|
||||
|
||||
entries = []
|
||||
for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
|
||||
episode = item.get('episode')
|
||||
if not episode:
|
||||
continue
|
||||
entries.append(self._extract_episode(episode, podcast_name))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_id, podcast_name, podcast.get('description'))
|
||||
+133
-73
@@ -4,16 +4,32 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class SRGSSRIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|
|
||||
srgssr
|
||||
):
|
||||
(?P<bu>
|
||||
srf|rts|rsi|rtr|swi
|
||||
):(?:[^:]+:)?
|
||||
(?P<type>
|
||||
video|audio
|
||||
):
|
||||
(?P<id>
|
||||
[0-9a-f\-]{36}|\d+
|
||||
)
|
||||
'''
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['CH']
|
||||
|
||||
@@ -25,25 +41,39 @@ class SRGSSRIE(InfoExtractor):
|
||||
'LEGAL': 'The video cannot be transmitted for legal reasons.',
|
||||
'STARTDATE': 'This video is not yet available. Please try again later.',
|
||||
}
|
||||
_DEFAULT_LANGUAGE_CODES = {
|
||||
'srf': 'de',
|
||||
'rts': 'fr',
|
||||
'rsi': 'it',
|
||||
'rtr': 'rm',
|
||||
'swi': 'en',
|
||||
}
|
||||
|
||||
def _get_tokenized_src(self, url, video_id, format_id):
|
||||
sp = compat_urllib_parse_urlparse(url).path.split('/')
|
||||
token = self._download_json(
|
||||
'http://tp.srgssr.ch/akahd/token?acl=/%s/%s/*' % (sp[1], sp[2]),
|
||||
'http://tp.srgssr.ch/akahd/token?acl=*',
|
||||
video_id, 'Downloading %s token' % format_id, fatal=False) or {}
|
||||
auth_params = token.get('token', {}).get('authparams')
|
||||
auth_params = try_get(token, lambda x: x['token']['authparams'])
|
||||
if auth_params:
|
||||
url += '?' + auth_params
|
||||
url += ('?' if '?' not in url else '&') + auth_params
|
||||
return url
|
||||
|
||||
def get_media_data(self, bu, media_type, media_id):
|
||||
media_data = self._download_json(
|
||||
'http://il.srgssr.ch/integrationlayer/1.0/ue/%s/%s/play/%s.json' % (bu, media_type, media_id),
|
||||
media_id)[media_type.capitalize()]
|
||||
def _get_media_data(self, bu, media_type, media_id):
|
||||
query = {'onlyChapters': True} if media_type == 'video' else {}
|
||||
full_media_data = self._download_json(
|
||||
'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json'
|
||||
% (bu, media_type, media_id),
|
||||
media_id, query=query)['chapterList']
|
||||
try:
|
||||
media_data = next(
|
||||
x for x in full_media_data if x.get('id') == media_id)
|
||||
except StopIteration:
|
||||
raise ExtractorError('No media information found')
|
||||
|
||||
if media_data.get('block') and media_data['block'] in self._ERRORS:
|
||||
message = self._ERRORS[media_data['block']]
|
||||
if media_data['block'] == 'GEOBLOCK':
|
||||
block_reason = media_data.get('blockReason')
|
||||
if block_reason and block_reason in self._ERRORS:
|
||||
message = self._ERRORS[block_reason]
|
||||
if block_reason == 'GEOBLOCK':
|
||||
self.raise_geo_restricted(
|
||||
msg=message, countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(
|
||||
@@ -53,53 +83,75 @@ class SRGSSRIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
bu, media_type, media_id = re.match(self._VALID_URL, url).groups()
|
||||
media_data = self._get_media_data(bu, media_type, media_id)
|
||||
title = media_data['title']
|
||||
|
||||
media_data = self.get_media_data(bu, media_type, media_id)
|
||||
|
||||
metadata = media_data['AssetMetadatas']['AssetMetadata'][0]
|
||||
title = metadata['title']
|
||||
description = metadata.get('description')
|
||||
created_date = media_data.get('createdDate') or metadata.get('createdDate')
|
||||
timestamp = parse_iso8601(created_date)
|
||||
|
||||
thumbnails = [{
|
||||
'id': image.get('id'),
|
||||
'url': image['url'],
|
||||
} for image in media_data.get('Image', {}).get('ImageRepresentations', {}).get('ImageRepresentation', [])]
|
||||
|
||||
preference = qualities(['LQ', 'MQ', 'SD', 'HQ', 'HD'])
|
||||
formats = []
|
||||
for source in media_data.get('Playlists', {}).get('Playlist', []) + media_data.get('Downloads', {}).get('Download', []):
|
||||
protocol = source.get('@protocol')
|
||||
for asset in source['url']:
|
||||
asset_url = asset['text']
|
||||
quality = asset['@quality']
|
||||
format_id = '%s-%s' % (protocol, quality)
|
||||
if protocol.startswith('HTTP-HDS') or protocol.startswith('HTTP-HLS'):
|
||||
asset_url = self._get_tokenized_src(asset_url, media_id, format_id)
|
||||
if protocol.startswith('HTTP-HDS'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
asset_url + ('?' if '?' not in asset_url else '&') + 'hdcore=3.4.0',
|
||||
media_id, f4m_id=format_id, fatal=False))
|
||||
elif protocol.startswith('HTTP-HLS'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, media_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': asset_url,
|
||||
'preference': preference(quality),
|
||||
'ext': 'flv' if protocol == 'RTMP' else None,
|
||||
})
|
||||
q = qualities(['SD', 'HD'])
|
||||
for source in (media_data.get('resourceList') or []):
|
||||
format_url = source.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
protocol = source.get('protocol')
|
||||
quality = source.get('quality')
|
||||
format_id = []
|
||||
for e in (protocol, source.get('encoding'), quality):
|
||||
if e:
|
||||
format_id.append(e)
|
||||
format_id = '-'.join(format_id)
|
||||
|
||||
if protocol in ('HDS', 'HLS'):
|
||||
if source.get('tokenType') == 'AKAMAI':
|
||||
format_url = self._get_tokenized_src(
|
||||
format_url, media_id, format_id)
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
format_url, media_id))
|
||||
elif protocol == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, media_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
elif protocol in ('HTTP', 'HTTPS'):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'quality': q(quality),
|
||||
})
|
||||
|
||||
# This is needed because for audio medias the podcast url is usually
|
||||
# always included, even if is only an audio segment and not the
|
||||
# whole episode.
|
||||
if int_or_none(media_data.get('position')) == 0:
|
||||
for p in ('S', 'H'):
|
||||
podcast_url = media_data.get('podcast%sdUrl' % p)
|
||||
if not podcast_url:
|
||||
continue
|
||||
quality = p + 'D'
|
||||
formats.append({
|
||||
'format_id': 'PODCAST-' + quality,
|
||||
'url': podcast_url,
|
||||
'quality': q(quality),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
if media_type == 'video':
|
||||
for sub in (media_data.get('subtitleList') or []):
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
lang = sub.get('locale') or self._DEFAULT_LANGUAGE_CODES[bu]
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnails': thumbnails,
|
||||
'description': media_data.get('description'),
|
||||
'timestamp': parse_iso8601(media_data.get('date')),
|
||||
'thumbnail': media_data.get('imageUrl'),
|
||||
'duration': float_or_none(media_data.get('duration'), 1000),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -119,26 +171,17 @@ class SRGSSRPlayIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
||||
'md5': 'da6b5b3ac9fa4761a942331cef20fcb3',
|
||||
'md5': '6db2226ba97f62ad42ce09783680046c',
|
||||
'info_dict': {
|
||||
'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130701',
|
||||
'title': 'Snowden beantragt Asyl in Russland',
|
||||
'timestamp': 1372713995,
|
||||
}
|
||||
}, {
|
||||
# No Speichern (Save) button
|
||||
'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa',
|
||||
'md5': '0a274ce38fda48c53c01890651985bc6',
|
||||
'info_dict': {
|
||||
'id': '677f5829-e473-4823-ac83-a1087fe97faa',
|
||||
'ext': 'flv',
|
||||
'upload_date': '20130710',
|
||||
'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive',
|
||||
'description': 'md5:88604432b60d5a38787f152dec89cd56',
|
||||
'timestamp': 1373493600,
|
||||
'timestamp': 1372708215,
|
||||
'duration': 113.827,
|
||||
'thumbnail': r're:^https?://.*1383719781\.png$',
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc',
|
||||
'info_dict': {
|
||||
@@ -146,7 +189,8 @@ class SRGSSRPlayIE(InfoExtractor):
|
||||
'ext': 'mp3',
|
||||
'upload_date': '20151013',
|
||||
'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem',
|
||||
'timestamp': 1444750398,
|
||||
'timestamp': 1444709160,
|
||||
'duration': 336.816,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -159,19 +203,32 @@ class SRGSSRPlayIE(InfoExtractor):
|
||||
'id': '6348260',
|
||||
'display_id': '6348260',
|
||||
'ext': 'mp4',
|
||||
'duration': 1796,
|
||||
'duration': 1796.76,
|
||||
'title': 'Le 19h30',
|
||||
'description': '',
|
||||
'uploader': '19h30',
|
||||
'upload_date': '20141201',
|
||||
'timestamp': 1417458600,
|
||||
'thumbnail': r're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270',
|
||||
'info_dict': {
|
||||
'id': '42960270',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why people were against tax reforms',
|
||||
'description': 'md5:7ac442c558e9630e947427469c4b824d',
|
||||
'duration': 94.0,
|
||||
'upload_date': '20170215',
|
||||
'timestamp': 1487173560,
|
||||
'thumbnail': r're:https?://www\.swissinfo\.ch/srgscalableimage/42961964',
|
||||
'subtitles': 'count:9',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
|
||||
'only_matching': True,
|
||||
@@ -181,6 +238,10 @@ class SRGSSRPlayIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio segment, has podcastSdUrl of the full episode
|
||||
'url': 'https://www.srf.ch/play/radio/popupaudioplayer?id=50b20dc8-f05b-4972-bf03-e438ff2833eb',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -188,5 +249,4 @@ class SRGSSRPlayIE(InfoExtractor):
|
||||
bu = mobj.group('bu')
|
||||
media_type = mobj.group('type') or mobj.group('type_2')
|
||||
media_id = mobj.group('id')
|
||||
# other info can be extracted from url + '&layout=json'
|
||||
return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
# HEADRequest,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class StoryFireBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?storyfire\.com/'
|
||||
|
||||
def _call_api(self, path, video_id, resource, query=None):
|
||||
return self._download_json(
|
||||
'https://storyfire.com/app/%s/%s' % (path, video_id), video_id,
|
||||
'Downloading %s JSON metadata' % resource, query=query)
|
||||
|
||||
def _parse_video(self, video):
|
||||
title = video['title']
|
||||
vimeo_id = self._search_regex(
|
||||
r'https?://player\.vimeo\.com/external/(\d+)',
|
||||
video['vimeoVideoURL'], 'vimeo id')
|
||||
|
||||
# video_url = self._request_webpage(
|
||||
# HEADRequest(video['vimeoVideoURL']), video_id).geturl()
|
||||
# formats = []
|
||||
# for v_url, suffix in [(video_url, '_sep'), (video_url.replace('/sep/video/', '/video/'), '')]:
|
||||
# formats.extend(self._extract_m3u8_formats(
|
||||
# v_url, video_id, 'mp4', 'm3u8_native',
|
||||
# m3u8_id='hls' + suffix, fatal=False))
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# v_url.replace('.m3u8', '.mpd'), video_id,
|
||||
# mpd_id='dash' + suffix, fatal=False))
|
||||
# self._sort_formats(formats)
|
||||
|
||||
uploader_id = video.get('hostID')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': vimeo_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'url': smuggle_url(
|
||||
'https://player.vimeo.com/video/' + vimeo_id, {
|
||||
'http_headers': {
|
||||
'Referer': 'https://storyfire.com/',
|
||||
}
|
||||
}),
|
||||
# 'formats': formats,
|
||||
'thumbnail': video.get('storyImage'),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'like_count': int_or_none(video.get('likesCount')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
'duration': int_or_none(video.get('videoDuration')),
|
||||
'timestamp': int_or_none(video.get('publishDate')),
|
||||
'uploader': video.get('username'),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': 'https://storyfire.com/user/%s/video' % uploader_id if uploader_id else None,
|
||||
'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
|
||||
}
|
||||
|
||||
|
||||
class StoryFireIE(StoryFireBaseIE):
|
||||
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'video-details/(?P<id>[0-9a-f]{24})'
|
||||
_TEST = {
|
||||
'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
|
||||
'md5': 'caec54b9e4621186d6079c7ec100c1eb',
|
||||
'info_dict': {
|
||||
'id': '378954662',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buzzfeed Teaches You About Memes',
|
||||
'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
||||
'timestamp': 1576129028,
|
||||
'description': 'md5:0b4e28021548e144bed69bb7539e62ea',
|
||||
'uploader': 'whang!',
|
||||
'upload_date': '20191212',
|
||||
'duration': 418,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata']
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video = self._call_api(
|
||||
'generic/video-detail', video_id, 'video')['video']
|
||||
return self._parse_video(video)
|
||||
|
||||
|
||||
class StoryFireUserIE(StoryFireBaseIE):
|
||||
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'user/(?P<id>[^/]+)/video'
|
||||
_TEST = {
|
||||
'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
|
||||
'info_dict': {
|
||||
'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
|
||||
},
|
||||
'playlist_mincount': 151,
|
||||
}
|
||||
_PAGE_SIZE = 20
|
||||
|
||||
def _fetch_page(self, user_id, page):
|
||||
videos = self._call_api(
|
||||
'publicVideos', user_id, 'page %d' % (page + 1), {
|
||||
'skip': page * self._PAGE_SIZE,
|
||||
})['videos']
|
||||
for video in videos:
|
||||
yield self._parse_video(video)
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, user_id), self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, user_id)
|
||||
|
||||
|
||||
class StoryFireSeriesIE(StoryFireBaseIE):
|
||||
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'write/series/stories/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
|
||||
'info_dict': {
|
||||
'id': '-Lq6MsuIHLODO6d2dDkr',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}, {
|
||||
'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
|
||||
'info_dict': {
|
||||
'id': 'the_mortal_one',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, stories):
|
||||
for story in stories.values():
|
||||
if story.get('hasVideo'):
|
||||
yield self._parse_video(story)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
stories = self._call_api(
|
||||
'seriesStories', series_id, 'series stories')
|
||||
return self.playlist_result(self._extract_videos(stories), series_id)
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class StretchInternetIE(InfoExtractor):
|
||||
@@ -11,22 +10,28 @@ class StretchInternetIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '573272',
|
||||
'ext': 'mp4',
|
||||
'title': 'University of Mary Wrestling vs. Upper Iowa',
|
||||
'timestamp': 1575668361,
|
||||
'upload_date': '20191206',
|
||||
'title': 'UNIVERSITY OF MARY WRESTLING VS UPPER IOWA',
|
||||
# 'timestamp': 1575668361,
|
||||
# 'upload_date': '20191206',
|
||||
'uploader_id': '99997',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
media_url = self._download_json(
|
||||
'https://core.stretchlive.com/trinity/event/tcg/' + video_id,
|
||||
video_id)[0]['media'][0]['url']
|
||||
event = self._download_json(
|
||||
'https://api.stretchinternet.com/trinity/event/tcg/' + video_id,
|
||||
video_id)[0]
|
||||
'https://neo-client.stretchinternet.com/portal-ws/getEvent.json',
|
||||
video_id, query={'eventID': video_id, 'token': 'asdf'})['event']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': event['title'],
|
||||
'timestamp': int_or_none(event.get('dateCreated'), 1000),
|
||||
'url': 'https://' + event['media'][0]['url'],
|
||||
# TODO: parse US timezone abbreviations
|
||||
# 'timestamp': event.get('dateTimeString'),
|
||||
'url': 'https://' + media_url,
|
||||
'uploader_id': event.get('ownerID'),
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user