Skip to content

Commit 4c08a6a

Browse files
committed
video work: improved yt handling:
- disable yt using yt api, for forced html/flash, diable on load - use yt error event to detect error - better fallback on recorded video use seperate cache for range and video info tracking fix yt rules query to account for & and ?
1 parent ca17410 commit 4c08a6a

File tree

5 files changed

+176
-75
lines changed

5 files changed

+176
-75
lines changed

pywb/rules.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,12 @@ rules:
148148

149149
- url_prefix: 'com,youtube,c'
150150

151-
fuzzy_lookup: 'com,youtube,c.*/videogoodput.*(id=[^&]+)'
151+
fuzzy_lookup: 'com,youtube,c.*/videogoodput.*([?&]id=[^&]+)'
152152

153153
- url_prefix: 'com,googlevideo,'
154154

155155
fuzzy_lookup:
156-
match: 'com,googlevideo.*/videoplayback.*(id=[^&]+).*(itag=[^&]+).*(mime=[^&]+)'
156+
match: 'com,googlevideo.*/videoplayback.*([?&]id=[^&]+).*([?&]itag=[^&]+).*([?&]mime=[^&]+)'
157157
filter:
158158
- '~urlkey:{0}'
159159
- '!mimetype:text/plain'

pywb/static/vidrw.js

Lines changed: 98 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,42 @@ This file is part of pywb, https://github.com/ikreymer/pywb
1818
*/
1919

2020
// VidRw 1.0 -- video rewriting
21+
//
22+
//
23+
24+
var _pywbvid = "default";
25+
26+
var _pywb_yt_err = undefined;
27+
28+
if (window.location.hash) {
29+
var m = window.location.hash.match(/_pywbvid=([\w]+)/);
30+
if (m) {
31+
_pywbvid = m[1];
32+
}
33+
34+
if (_pywbvid == "html" || _pywbvid == "flash") {
35+
var YT_W_E_RX = /^(https?:\/\/.*youtube.com)\/(watch|embed).*$/;
36+
37+
if (wbinfo.url.match(YT_W_E_RX)) {
38+
// special case: prevent yt player from being inited
39+
Object.defineProperty(window, 'yt', {writeable: false});
40+
Object.defineProperty(window, 'ytplayer', {writeable: false});
41+
}
42+
}
43+
}
44+
2145

2246
__wbvidrw = (function() {
2347

2448
var found_embeds = false;
2549

26-
var vid_type = "default";
27-
2850
var FLASH_PLAYER = wbinfo.static_prefix + "/flowplayer/flowplayer-3.2.18.swf";
2951

3052
function check_videos() {
3153
if (found_embeds) {
3254
return;
3355
}
3456

35-
// extract_typ
36-
if (window.location.hash) {
37-
var m = window.location.hash.match(/_pywbvid=([\w]+)/);
38-
if (m) {
39-
vid_type = m[1];
40-
}
41-
}
42-
4357
function handle_all_embeds() {
4458
var embeds = document.getElementsByTagName("embed");
4559

@@ -61,10 +75,9 @@ __wbvidrw = (function() {
6175

6276
found_embeds = true;
6377

64-
handle_yt_videos(vid_type);
65-
66-
//window.setInterval(handle_all_embeds, 1000);
78+
handle_yt_videos(_pywbvid);
6779

80+
//window.setInterval(handle_all_embeds, 2000);
6881
//_wb_wombat.add_tag_handler("embed", handle_all_embeds);
6982
//_wb_wombat.add_tag_handler("object", handle_all_objects);
7083
}
@@ -100,8 +113,8 @@ __wbvidrw = (function() {
100113
return false;
101114
}
102115

103-
for (var j = 0; j < objects[i].children.length; j++) {
104-
var child = objects[i].children[j];
116+
for (var j = 0; j < elem.children.length; j++) {
117+
var child = elem.children[j];
105118

106119
if (child.tagName == "EMBED") {
107120
return false;
@@ -125,7 +138,7 @@ __wbvidrw = (function() {
125138

126139
elem._vidrw = true;
127140

128-
check_replacement(elem, src);
141+
check_replacement(elem, obj_url);
129142
return true;
130143
}
131144

@@ -136,41 +149,85 @@ __wbvidrw = (function() {
136149
var YT_V_RX = /^(https?:\/\/.*youtube.com)\/v\/([^&?]+)(.*)$/;
137150
var VIMEO_RX = /^https?:\/\/.*vimeo.*clip_id=([^&]+)/;
138151

139-
140-
function handle_yt_videos(vid_type)
152+
function remove_yt()
141153
{
142-
function do_yt_video_replace()
143-
{
144-
console.log("REPLACING YT: " + wbinfo.url);
145-
ytvideo[0].autoplay = false;
146-
ytvideo[0].preload = "none";
154+
// yt special case
155+
if (window.yt && window.yt.player && window.yt.player.getPlayerByElement) {
156+
//yt.player.Application.create("player-api", ytplayer.config).dispose();
147157

148-
var elem = ytvideo[0];
149-
// get ancestor 'div'
150-
if (elem.parentElement) {
151-
elem = elem.parentElement;
158+
var elem = window.yt.player.getPlayerByElement("player-api");
159+
160+
if (!elem) {
161+
elem = window.yt.player.getPlayerByElement("player");
152162
}
153-
if (elem.parentElement) {
154-
elem = elem.parentElement;
163+
164+
if (elem) {
165+
elem.destroy();
155166
}
156-
console.log(elem);
157167

158-
// Experimental
168+
delete window.yt;
169+
if (window.ytplayer) {
170+
delete window.ytplayer;
171+
}
172+
}
173+
// end yt special case
174+
}
159175

160-
check_replacement(elem, wbinfo.url);
176+
function handle_yt_videos(_pywbvid)
177+
{
178+
function do_yt_video_replace(elem)
179+
{
180+
remove_yt();
181+
182+
while (elem.hasChildNodes()) {
183+
elem.removeChild(elem.lastChild);
184+
}
185+
186+
//add placeholder child to remove
187+
var placeholder = document.createElement("div");
188+
elem.appendChild(placeholder);
189+
check_replacement(placeholder, wbinfo.url);
161190
}
162191

163192
// special case: yt
164193
if (wbinfo.url.match(YT_W_E_RX)) {
165-
var ytvideo = document.getElementsByTagName("video");
194+
//var ytvideo = document.getElementsByTagName("video");
195+
var player_div = document.getElementById("player-api");
196+
if (!player_div) {
197+
player_div = document.getElementById("player");
198+
}
199+
200+
//if (ytvideo.length == 1 && ytvideo[0].getAttribute("data-youtube-id") != "") {
201+
if (player_div) {
202+
if (_pywbvid == "html" || _pywbvid == "flash") {
203+
do_yt_video_replace(player_div);
204+
} else if (!wbinfo.is_live) {
205+
var player = window.yt.player.getPlayerByElement(player_div);
206+
207+
if (player) {
208+
_pywb_yt_err = function() {
209+
do_yt_video_replace(player_div);
210+
}
211+
212+
player.addEventListener("onError", "_pywb_yt_err");
213+
}
166214

167-
if (ytvideo.length == 1 && ytvideo[0].getAttribute("data-youtube-id") != "") {
168-
if (vid_type == "html") {
169-
do_yt_video_replace();
170-
} else {
171215
setTimeout(function() {
172-
if (!ytvideo || !ytvideo.length || ytvideo[0].readyState == 0) {
173-
do_yt_video_replace();
216+
if (!window.yt || !window.yt.player) {
217+
do_yt_video_replace(player_div);
218+
return;
219+
}
220+
221+
var state = -1;
222+
223+
if (player && player.getPlayerState) {
224+
state = player.getPlayerState();
225+
}
226+
227+
// if no player or player is still buffering (is this ok), then replace
228+
if (state < 0 || state == 3) {
229+
do_yt_video_replace(player_div);
230+
return;
174231
}
175232
}, 4000);
176233
}
@@ -197,7 +254,7 @@ __wbvidrw = (function() {
197254

198255
src = src.replace(VIMEO_RX, "http://player.vimeo.com/video/$1");
199256

200-
if (vid_type == "orig") {
257+
if (_pywbvid == "orig") {
201258
var repl_src = src.replace(YT_V_RX, "$1/embed/$2?$3&controls=0");
202259
if (repl_src != src) {
203260
do_replace_iframe(elem, repl_src);
@@ -275,12 +332,6 @@ __wbvidrw = (function() {
275332
} else {
276333
elem.parentNode.replaceChild(replacement, elem);
277334
}
278-
279-
if (window.yt) {
280-
yt.player.Application.create("player-api", ytplayer.config).dispose();
281-
delete window.yt;
282-
delete window.ytplayer;
283-
}
284335
}
285336

286337

@@ -315,7 +366,7 @@ __wbvidrw = (function() {
315366
if (type == "audio") {
316367
htmlelem = document.createElement("audio");
317368
}
318-
if (vid_type != "flash") {
369+
if (_pywbvid != "flash") {
319370
replacement = init_html_player(htmlelem, type, width, height, info, thumb_url);
320371
}
321372
}
@@ -402,7 +453,6 @@ __wbvidrw = (function() {
402453
return;
403454
}
404455

405-
//console.log("html5 " + type +" error");
406456
var replacement = document.createElement("div");
407457

408458
var vidId = "_wb_vid" + Date.now();

pywb/webapp/live_rewrite_handler.py

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def _live_request_headers(self, wbrequest):
6767

6868
def render_content(self, wbrequest):
6969
if wbrequest.wb_url.mod == 'vi_':
70-
return self.get_video_info(wbrequest)
70+
return self._get_video_info(wbrequest)
7171

7272
head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
7373
req_headers = self._live_request_headers(wbrequest)
@@ -79,6 +79,7 @@ def render_content(self, wbrequest):
7979
ignore_proxies = False
8080
use_206 = False
8181
url = None
82+
rangeres = None
8283

8384
readd_range = False
8485
cache_key = None
@@ -100,7 +101,7 @@ def render_content(self, wbrequest):
100101
ignore_proxies = True
101102

102103
# sets cache_key only if not already cached
103-
cache_key = self._check_url_cache(url)
104+
cache_key = self._get_cache_key('r:', url)
104105

105106
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
106107
wbrequest.urlrewriter,
@@ -124,6 +125,18 @@ def render_content(self, wbrequest):
124125
if cache_key:
125126
self._add_proxy_ping(cache_key, url, wbrequest, wbresponse)
126127

128+
if rangeres:
129+
referrer = wbrequest.env.get('REL_REFERER')
130+
131+
# also ping video info
132+
if referrer:
133+
try:
134+
resp = self._get_video_info(wbrequest,
135+
info_url=referrer,
136+
video_url=url)
137+
except:
138+
print('Error getting video info')
139+
127140
return wbresponse
128141

129142
def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
@@ -138,22 +151,26 @@ def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
138151

139152
return WbResponse(status_headers, gen)
140153

141-
def _check_url_cache(self, url):
154+
def _get_cache_key(self, prefix, url):
142155
if not self._cache:
143156
self._cache = create_cache()
144157

145-
hash_ = hashlib.md5()
146-
hash_.update(url)
147-
key = hash_.hexdigest()
158+
key = self.create_cache_key(prefix, url)
148159

149160
if key in self._cache:
150161
return None
151162

152163
return key
153164

154-
def _add_proxy_ping(self, key, url, wbrequest, wbresponse):
155-
referrer = wbrequest.env.get('REL_REFERER')
165+
@staticmethod
166+
def create_cache_key(prefix, url):
167+
hash_ = hashlib.md5()
168+
hash_.update(url)
169+
key = hash_.hexdigest()
170+
key = prefix + key
171+
return key
156172

173+
def _add_proxy_ping(self, key, url, wbrequest, wbresponse):
157174
def do_ping():
158175
headers = self._live_request_headers(wbrequest)
159176
headers['Connection'] = 'close'
@@ -175,12 +192,6 @@ def do_ping():
175192
del self._cache[key]
176193
raise
177194

178-
# also ping video info
179-
if referrer:
180-
resp = self.get_video_info(wbrequest,
181-
info_url=referrer,
182-
video_url=url)
183-
184195
def wrap_buff_gen(gen):
185196
for x in gen:
186197
yield x
@@ -194,7 +205,7 @@ def wrap_buff_gen(gen):
194205
wbresponse.body = wrap_buff_gen(wbresponse.body)
195206
return wbresponse
196207

197-
def get_video_info(self, wbrequest, info_url=None, video_url=None):
208+
def _get_video_info(self, wbrequest, info_url=None, video_url=None):
198209
if not self.youtubedl:
199210
self.youtubedl = YoutubeDLWrapper()
200211

@@ -204,12 +215,18 @@ def get_video_info(self, wbrequest, info_url=None, video_url=None):
204215
if not info_url:
205216
info_url = wbrequest.wb_url.url
206217

218+
cache_key = None
219+
if self.proxies:
220+
cache_key = self._get_cache_key('v:', video_url)
221+
207222
info = self.youtubedl.extract_info(video_url)
208223

224+
#if info and info.formats and len(info.formats) == 1:
225+
209226
content_type = self.YT_DL_TYPE
210227
metadata = json.dumps(info)
211228

212-
if self.proxies:
229+
if (self.proxies and cache_key):
213230
headers = self._live_request_headers(wbrequest)
214231
headers['Content-Type'] = content_type
215232

@@ -222,6 +239,8 @@ def get_video_info(self, wbrequest, info_url=None, video_url=None):
222239
proxies=self.proxies,
223240
verify=False)
224241

242+
self._cache[cache_key] = '1'
243+
225244
return WbResponse.text_response(metadata, content_type=content_type)
226245

227246
def __str__(self):

pywb/webapp/rangecache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import yaml
88
import os
9+
from shutil import rmtree
910

1011
import atexit
1112

@@ -19,9 +20,8 @@ def __init__(self):
1920

2021
def cleanup(self):
2122
if self.temp_dir: # pragma: no cover
22-
import shutil
2323
print('Removing: ' + self.temp_dir)
24-
shutil.rmtree(self.temp_dir, True)
24+
rmtree(self.temp_dir, True)
2525
self.temp_dir = None
2626

2727
def handle_range(self, wbrequest, digest, wbresponse_func,

0 commit comments

Comments
 (0)