Skip to content

Commit 5082742

Browse files
committed
Non iconv conversion of non utf-8 encoded pages now works. Fixed other encoding issues and various Content-Type header problems. ParseContentType now turns everything into lowercase.
1 parent c6e51fa commit 5082742

File tree

3 files changed

+42
-25
lines changed

3 files changed

+42
-25
lines changed

init.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,22 +83,31 @@ func (fn reqHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // Allo
8383
if e := fn(w, r); e != nil { // e is *appError, not os.Error
8484
if e.Code == 404 { // Serve a pretty (potentially cached) file for 404 errors, if it exists
8585
w.WriteHeader(404)
86-
if Config.Verbose {
86+
if Config.Verbose && e.Error != nil {
8787
fmt.Println(e.Error.Error(), "\n", e.Message) // Print the error message
8888
}
8989
if FileCache["404"] != nil { // Serve the cached file if one exists
9090
io.WriteString(w, string(FileCache["404"]))
9191
} else { // Read a non-cached file from disk and serve it because there isn't a cached one
9292
file, err := ioutil.ReadFile(Config.PublicDir + "/404.html")
9393
if err != nil {
94-
http.Error(w, e.Message+"\n"+e.Error.Error(), e.Code) // Serve a generic error message if the file isn't cahced and doesn't exist
94+
if e.Error == nil {
95+
http.Error(w, e.Message, e.Code) // Serve a generic error message if the file isn't cached and doesn't exist
96+
} else {
97+
http.Error(w, e.Message+"\n"+e.Error.Error(), e.Code) // Serve a generic error message if the file isn't cached and doesn't exist
98+
}
9599
return
96100
}
97101
io.WriteString(w, string(file))
98102
}
99103
} else { // If it's not a 404 error just serve a generic message
100-
fmt.Println(e.Error.Error(), "\n", e.Message) // Print the error message
101-
http.Error(w, e.Message+"\n"+e.Error.Error(), e.Code)
104+
if e.Error == nil {
105+
fmt.Println(e.Message)
106+
http.Error(w, e.Message, e.Code) // Serve a generic error message if the file isn't cached and doesn't exist
107+
} else {
108+
fmt.Println(e.Error.Error(), "\n", e.Message) // Print the error message
109+
http.Error(w, e.Message+"\n"+e.Error.Error(), e.Code) // Serve a generic error message if the file isn't cached and doesn't exist
110+
}
102111
}
103112

104113
}

proxy.go

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import (
1414
"strings"
1515

1616
"github.com/PuerkitoBio/goquery"
17-
goenc "github.com/mattn/go-encoding"
17+
goenc "github.com/pietroglyph/go-encoding"
1818
// "golang.org/x/text/encoding"
1919
// "golang.org/x/text/transform"
2020
)
@@ -86,8 +86,6 @@ func proxy(resWriter http.ResponseWriter, reqHttp *http.Request) *reqError { //
8686
return &reqError{err, "Couldn't read returned body.", 500}
8787
}
8888

89-
copyHeaders(resWriter.Header(), httpCliResp.Header) // Copy headers from response to our request from the server we are proxying
90-
9189
prox.ConType, err = parseContentType(httpCliResp.Header.Get("Content-Type")) // Get the MIME type of what we recieved from the Content-Type header
9290
if err != nil {
9391
prox.ConType, err = parseContentType(http.DetectContentType(prox.Body)) // Looks like we couldn't parse the Content-Type header, so we'll have to detect content type from the actual response body
@@ -96,7 +94,23 @@ func proxy(resWriter http.ResponseWriter, reqHttp *http.Request) *reqError { //
9694
}
9795
}
9896

99-
if prox.ConType.Parameters["charset"] == "" {
97+
for header := range httpCliResp.Header { // Copy over headers from the http response to our http response writer
98+
if !Config.DisableCORS {
99+
if header == "Content-Type" && prox.ConType.Type == "text" && prox.ConType.Subtype == "html" {
100+
resWriter.Header().Add(header, "text/html; charset=utf-8")
101+
} else {
102+
resWriter.Header().Add(header, httpCliResp.Header.Get(header))
103+
}
104+
} else if header != "Content-Security-Policy" && header != "Content-Type" {
105+
resWriter.Header().Add(header, httpCliResp.Header.Get(header))
106+
} else if header == "Content-Type" && prox.ConType.Type == "text" && prox.ConType.Subtype == "html" {
107+
resWriter.Header().Add(header, "text/html; charset=utf-8")
108+
} else {
109+
resWriter.Header().Add(header, httpCliResp.Header.Get(header))
110+
}
111+
} // TODO: This conditonal chain is a nightmare and should be fixed sometime
112+
113+
if prox.ConType.Parameters["charset"] == "" { // Make sure that we have a charset if the website doesn't provide one (which is fairly common)
100114
tempConType, err := parseContentType(http.DetectContentType(prox.Body))
101115
if err != nil {
102116
fmt.Println(err.Error()) // Instead of failing we will just give the user a non-formatted page and print the error
@@ -108,18 +122,23 @@ func proxy(resWriter http.ResponseWriter, reqHttp *http.Request) *reqError { //
108122
if prox.ConType.Type == "text" && prox.ConType.Subtype == "html" && prox.ConType.Parameters["charset"] != "" { // Does it say it's html with a valid charset
109123
resReader := strings.NewReader(string(prox.Body))
110124
if prox.ConType.Parameters["charset"] != "utf-8" {
111-
decoder := goenc.GetEncoding(prox.ConType.Parameters["charset"]).NewDecoder()
112-
prox.Document, err = goquery.NewDocumentFromReader(decoder.Reader(resReader)) // Parse the response from our target website
125+
encoding := goenc.GetEncoding(prox.ConType.Parameters["charset"])
126+
if encoding == nil {
127+
return &reqError{nil, prox.ConType.Parameters["charset"] + " is an invalid encoding.", 400}
128+
}
129+
fmt.Println(encoding, prox.ConType.Parameters["charset"])
130+
decoder := encoding.NewDecoder()
131+
prox.Document, err = goquery.NewDocumentFromReader(decoder.Reader(resReader)) // Parse the response from our target website whose body has been freshly utf-8 encoded
113132
if err != nil { // Looks like we can't parse this, let's just spit out the raw response
114133
fmt.Fprint(resWriter, string(prox.Body))
115-
fmt.Println(err.Error())
134+
fmt.Println(err.Error(), prox.Url)
116135
return nil
117136
}
118137
} else {
119138
prox.Document, err = goquery.NewDocumentFromReader(resReader)
120139
if err != nil { // Looks like we can't parse this, let's just spit out the raw response
121140
fmt.Fprint(resWriter, string(prox.Body))
122-
fmt.Println(err.Error())
141+
fmt.Println(err.Error(), prox.Url)
123142
return nil
124143
}
125144
}

util.go

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ package main
77
import (
88
"encoding/base64"
99
"errors"
10-
"net/http"
1110
"net/url"
1211
"strings"
1312
)
@@ -19,12 +18,14 @@ type ContentType struct { // The ContentType type holds easily usable informatio
1918
}
2019

2120
func parseContentType(rawcontype string) (*ContentType, error) { // Parse a MIME string into a ContentType struct
21+
rawcontype = strings.ToLower(rawcontype)
2222
var contentType ContentType
2323
contentType.Parameters = make(map[string]string)
2424
contype := strings.Split(rawcontype, " ")
2525
contype[0] = strings.Replace(contype[0], ";", "", -1)
2626
mimetype := strings.Split(contype[0], "/")
2727
if len(mimetype) <= 1 {
28+
2829
return new(ContentType), errors.New("contype: malformed content-type MIME type provided")
2930
}
3031
if len(contype) > 1 {
@@ -65,15 +66,3 @@ func formatUri(rawurl string, host string, proxyhost string) (string, error) { /
6566
formattedurl := "http://" + proxyhost + "/p/?u=" + encodedurl
6667
return formattedurl, nil
6768
}
68-
69-
func copyHeaders(dest http.Header, source http.Header) { // Copy one http.Header to another http.Header
70-
for header := range source {
71-
if !Config.DisableCORS {
72-
dest.Add(header, source.Get(header))
73-
} else if header != "Content-Security-Policy" && header != "Content-Type" {
74-
dest.Add(header, source.Get(header))
75-
} else if header == "Content-Type" {
76-
dest.Add(header, "text/html; charset=utf-8")
77-
}
78-
}
79-
}

0 commit comments

Comments
 (0)