3
3
import os
4
4
5
5
os .chdir (os .path .dirname (__file__ ))
6
- import requests
7
6
import traceback
8
7
from datetime import datetime , timedelta
9
8
import re
10
9
import base64
11
10
import zlib
12
11
from time import time
13
12
from html import escape as html_escape
13
+ import threading
14
14
from urllib .parse import urljoin , urlsplit , urlunsplit
15
+ import requests
15
16
from flask import Flask , request , make_response , Response , redirect
16
17
from ColorfulPyPrint import * # TODO: Migrate logging tools to the stdlib
17
18
41
42
errprint ('Can Not Create Local File Cache: ' , e , ' local file cache is disabled automatically.' )
42
43
local_cache_enable = False
43
44
44
- __VERSION__ = '0.15 .1-dev'
45
+ __VERSION__ = '0.16 .1-dev'
45
46
__author__ = 'Aploium <i@z.codes>'
46
47
static_file_extensions_list = set (static_file_extensions_list )
47
48
external_domains_set = set (external_domains or [])
52
53
myurl_prefix_escaped = myurl_prefix .replace ('/' , r'\/' )
53
54
cdn_domains_number = len (CDN_domains )
54
55
56
+ # ## thread local var ##
57
+ thread_local = threading .local ()
58
+ thread_local .start_time = None
59
+
55
60
# ########## Handle dependencies #############
56
61
if not enable_static_resource_CDN :
57
62
mime_based_static_resource_CDN = False
110
115
)
111
116
regex_extract_base64_from_embedded_url = re .compile (
112
117
r'_ewm0(?P<gzip>z?)_\.(?P<b64>[a-zA-Z0-9-_]+=*)\._ewm1_\.[a-zA-Z\d]+\b' )
113
- # Basic url rewriter for external sites, see function response_text_rewrite()
114
- regex_basic_ext_url_rewriter = {}
115
- regex_basic_ext_url_esc_rewriter = {}
116
- for _domain in external_domains :
117
- regex_basic_ext_url_rewriter [_domain ] = re .compile (r'(https?:)?//' + re .escape (_domain ), flags = re .IGNORECASE )
118
- # TODO: Combine it together with regex_basic_ext_url_rewriter
119
- regex_basic_ext_url_esc_rewriter [_domain ] = re .compile (r'(https?:)?\\/\\/' + re .escape (_domain ),
120
- flags = re .IGNORECASE )
118
+
121
119
# Response Cookies Rewriter, see response_cookie_rewrite()
122
120
regex_cookie_rewriter = re .compile (r'\bdomain=(\.?([\w-]+\.)+\w+)\b' , flags = re .IGNORECASE )
123
121
# Request Domains Rewriter, see rewrite_client_requests_text()
@@ -419,6 +417,7 @@ def put_response_to_local_cache(url, our_resp, req, remote_resp):
419
417
def try_get_cached_response (url , client_header ):
420
418
"""
421
419
420
+ :param url: real url with query string
422
421
:type client_header: dict
423
422
"""
424
423
# Only use cache when client use GET
@@ -484,10 +483,9 @@ def regex_url_reassemble(match_obj):
484
483
# only url(something) and @import are allowed to be unquoted
485
484
or ('url' not in prefix and 'import' not in prefix ) and (not quote_left or quote_right == ')' )
486
485
# for "key":"value" type replace, we must have at least one '/' in url path (for the value to be regard as url)
487
- or (':' in prefix and '/' not in path )):
486
+ or (':' in prefix and '/' not in path )
487
+ ):
488
488
return whole_match_string
489
- else :
490
- url_rewrite_cache_miss_count += 1
491
489
492
490
remote_path = request .path
493
491
if request .path [:11 ] == '/extdomains' :
@@ -564,6 +562,7 @@ def regex_url_reassemble(match_obj):
564
562
# write the adv rewrite cache only if we disable CDN or we known whether this url is CDN-able
565
563
if not mime_based_static_resource_CDN or _we_knew_this_url :
566
564
url_rewrite_cache [match_obj .group ()] = reassembled # write cache
565
+ url_rewrite_cache_miss_count += 1
567
566
568
567
return reassembled
569
568
@@ -661,6 +660,7 @@ def copy_response(requests_response_obj, content=b''):
661
660
return resp
662
661
663
662
663
+ # noinspection PyProtectedMember
664
664
def response_cookies_deep_copy (req_obj ):
665
665
"""
666
666
It's a BAD hack to get RAW cookies headers, but so far, we don't have better way.
@@ -711,8 +711,8 @@ def response_content_rewrite(remote_resp_obj):
711
711
if custom_text_rewriter_enable and content_mime == 'text/html' :
712
712
resp_text2 = custom_response_html_rewriter (resp_text )
713
713
resp_text = resp_text2
714
- except Exception as e : # just print err and fallback to normal rewrite
715
- errprint ('Custom Rewrite Function "custom_response_html_rewriter(text)" in custom_func.py ERROR' , e )
714
+ except Exception as _e : # just print err and fallback to normal rewrite
715
+ errprint ('Custom Rewrite Function "custom_response_html_rewriter(text)" in custom_func.py ERROR' , _e )
716
716
traceback .print_exc ()
717
717
718
718
# then do the normal rewrites
@@ -751,28 +751,18 @@ def response_text_rewrite(resp_text):
751
751
resp_text = resp_text .replace (r'https:\/\/' + domain , # TODO: Combine it with non-escaped version
752
752
myurl_prefix_escaped + r'\/extdomains\/' + 'https-' + domain )
753
753
# Implicit schemes replace, will be replaced to the same as `my_host_scheme`, unless forced
754
- resp_text = regex_basic_ext_url_rewriter [domain ].sub (
755
- '{0}{1}/extdomains/{2}{3}' .format (
756
- my_host_scheme ,
757
- my_host_name ,
758
- ('https-' if ('NONE' != force_https_domains )
759
- and (
760
- 'ALL' == force_https_domains or domain in force_https_domains
761
- ) else '' ),
762
- domain ),
763
- resp_text
764
- )
765
754
766
- resp_text = regex_basic_ext_url_esc_rewriter [domain ].sub ( # TODO: Combine it with non-escaped version
767
- '{0}\\ /extdomains\\ /{1}{2}' .format (
768
- myurl_prefix_escaped ,
769
- ('https-' if ('NONE' != force_https_domains )
770
- and (
771
- 'ALL' == force_https_domains or domain in force_https_domains
772
- ) else '' ),
773
- domain ),
774
- resp_text
775
- )
755
+ buff = '{0}/extdomains/{1}{2}' .format (
756
+ myurl_prefix ,
757
+ ('https-' if ('NONE' != force_https_domains )
758
+ and (
759
+ 'ALL' == force_https_domains or domain in force_https_domains
760
+ ) else '' ),
761
+ domain )
762
+ resp_text = resp_text .replace ('http://' + domain , buff , )
763
+ resp_text = resp_text .replace ('http:\\ /\\ /' + domain , buff .replace ('/' , r'\/' ))
764
+ resp_text = resp_text .replace ('//' + domain , buff )
765
+ resp_text = resp_text .replace ('\\ /\\ /' + domain , buff .replace ('/' , r'\/' ), )
776
766
777
767
# rewrite "foo.domain.tld" and 'foo.domain.tld'
778
768
resp_text = resp_text .replace ('"%s"' % domain , '\" ' + my_host_name + '/extdomains/' + domain + '\" ' )
@@ -887,7 +877,7 @@ def send_request(url, method='GET', headers=None, param_get=None, data=None):
887
877
return r , req_time
888
878
889
879
890
- def request_remote_site_and_parse (actual_request_url , start_time = None ):
880
+ def request_remote_site_and_parse (actual_request_url ):
891
881
if verbose_level >= 3 : dbgprint ('actual_request_url:' , actual_request_url )
892
882
893
883
if mime_based_static_resource_CDN :
@@ -909,8 +899,8 @@ def request_remote_site_and_parse(actual_request_url, start_time=None):
909
899
resp = try_get_cached_response (actual_request_url , client_header )
910
900
if resp is not None :
911
901
dbgprint ('CacheHit,Return' )
912
- if start_time is not None :
913
- resp .headers .set ('X-CP-Time' , "%.4f" % (time () - start_time ))
902
+ if thread_local . start_time is not None :
903
+ resp .headers .set ('X-CP-Time' , "%.4f" % (time () - thread_local . start_time ))
914
904
return resp # If cache hit, just skip next steps
915
905
916
906
try : # send request to remote server
@@ -950,8 +940,8 @@ def request_remote_site_and_parse(actual_request_url, start_time=None):
950
940
951
941
if local_cache_enable : # storge entire our server's response (headers included)
952
942
put_response_to_local_cache (actual_request_url , resp , request , r )
953
- if start_time is not None :
954
- resp .headers .add ('X-CP-Time' , "%.4f" % (time () - start_time - req_time ))
943
+ if thread_local . start_time is not None :
944
+ resp .headers .add ('X-CP-Time' , "%.4f" % (time () - thread_local . start_time - req_time ))
955
945
return resp
956
946
957
947
@@ -975,9 +965,8 @@ def filter_client_request():
975
965
if verbose_level >= 3 : dbgprint ('add to ip_whitelist because cookies:' , request .remote_addr )
976
966
else :
977
967
return redirect (
978
- "/ip_ban_verify_page?origin="
979
- + base64 .urlsafe_b64encode (str (request .url ).encode (encoding = 'utf-8' )).decode ()
980
- , code = 302 )
968
+ "/ip_ban_verify_page?origin=" + base64 .urlsafe_b64encode (str (request .url ).encode (encoding = 'utf-8' )).decode (),
969
+ code = 302 )
981
970
982
971
return None
983
972
@@ -1003,7 +992,6 @@ def rewrite_client_request():
1003
992
try :
1004
993
real_url = extract_real_url_from_embedded_url (request .url )
1005
994
if real_url is not None :
1006
- global request
1007
995
request .url = real_url
1008
996
request .path = urlsplit (real_url ).path
1009
997
except :
@@ -1120,7 +1108,7 @@ def ip_ban_verify_page():
1120
1108
@app .route ('/extdomains/<path:hostname>' , methods = ['GET' , 'POST' ])
1121
1109
@app .route ('/extdomains/<path:hostname>/<path:extpath>' , methods = ['GET' , 'POST' ])
1122
1110
def get_external_site (hostname , extpath = '/' ):
1123
- start_time = time () # to display compute time
1111
+ thread_local . start_time = time () # to display compute time
1124
1112
# pre-filter client's request
1125
1113
filter_or_rewrite_result = filter_client_request () or is_client_request_need_redirect ()
1126
1114
@@ -1145,13 +1133,13 @@ def get_external_site(hostname, extpath='/'):
1145
1133
if verbose_level >= 3 : dbgprint ('after extract, url:' , request .url , ' path:' , request .path )
1146
1134
actual_request_url = urljoin (urljoin (scheme + hostname , extpath ), '?' + urlsplit (request .url ).query )
1147
1135
1148
- return request_remote_site_and_parse (actual_request_url , start_time )
1136
+ return request_remote_site_and_parse (actual_request_url )
1149
1137
1150
1138
1151
1139
@app .route ('/' , methods = ['GET' , 'POST' ])
1152
1140
@app .route ('/<path:input_path>' , methods = ['GET' , 'POST' ])
1153
1141
def get_main_site (input_path = '/' ):
1154
- start_time = time () # to display compute time
1142
+ thread_local . start_time = time () # to display compute time
1155
1143
# pre-filter client's request
1156
1144
filter_or_rewrite_result = filter_client_request () or is_client_request_need_redirect ()
1157
1145
if filter_or_rewrite_result is not None :
@@ -1165,7 +1153,7 @@ def get_main_site(input_path='/'):
1165
1153
1166
1154
actual_request_url = urljoin (target_scheme + target_domain , extract_url_path_and_query (request .url ))
1167
1155
1168
- return request_remote_site_and_parse (actual_request_url , start_time )
1156
+ return request_remote_site_and_parse (actual_request_url )
1169
1157
1170
1158
1171
1159
# ################# End Flask #################
0 commit comments