Skip to content

Commit 20aebb1

Browse files
authored
fix: handle WordPress blog hosted on sub-paths (#156)
Earlier, the media path for a media /blog/2.png on a blog hosted on "example.com/blog" will be incorrect (example.com/blog/blog/2.png). This commit fixes that. Ref: #155
1 parent 95801e1 commit 20aebb1

File tree

5 files changed

+34
-17
lines changed

5 files changed

+34
-17
lines changed

src/wp2hugo/internal/hugogenerator/generate_hugo_config.go

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"github.com/ashishb/wp2hugo/src/wp2hugo/internal/wpparser"
88
"github.com/rs/zerolog/log"
99
"gopkg.in/yaml.v3"
10-
"net/url"
1110
"os"
1211
"path"
1312
"strings"
@@ -92,7 +91,7 @@ func updateConfig(siteDir string, info wpparser.WebsiteInfo) error {
9291
}
9392
// Ref: https://adityatelange.github.io/hugo-PaperMod/posts/papermod/papermod-faq/
9493
config.Title = info.Title()
95-
config.BaseURL = info.Link()
94+
config.BaseURL = info.Link().String()
9695
config.LanguageCode = info.Language()
9796
config.Taxonomies.Category = hugopage.CategoryName
9897
config.Taxonomies.Tag = hugopage.TagName
@@ -139,17 +138,12 @@ func addNavigationLinks(info wpparser.WebsiteInfo, config *_HugoConfig) error {
139138
if len(info.NavigationLinks()) <= 0 {
140139
return nil
141140
}
142-
hostName, err := url.Parse(info.Link())
143-
if err != nil {
144-
return fmt.Errorf("error parsing host name: %s", err)
145-
}
146141

147142
searchPresent := false
148-
149143
for i, link := range info.NavigationLinks() {
150144
config.Menu.Main = append(config.Menu.Main, _HugoNavMenu{
151145
Name: link.Title,
152-
URL: hugopage.ReplaceAbsoluteLinksWithRelative(hostName.Host, link.URL),
146+
URL: hugopage.ReplaceAbsoluteLinksWithRelative(info.Link().Host, link.URL),
153147
Weight: i + 1,
154148
})
155149
if strings.HasSuffix(link.URL, "/search/") {

src/wp2hugo/internal/hugogenerator/hugo_gen_setup.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ func (g Generator) Generate() error {
121121
}
122122

123123
if g.downloadMedia {
124-
url1 := info.Link() + "/favicon.ico"
124+
url1 := info.Link().Scheme + "://" + info.Link().Host + "/favicon.ico"
125125
media, err := g.mediaProvider.GetReader(url1)
126126
if err != nil {
127127
log.Error().
@@ -458,9 +458,9 @@ func (g Generator) downloadPageMedia(outputMediaDirPath string, p *hugopage.Page
458458
Str("page", pageURL.String()).
459459
Int("links", len(links)).
460460
Msgf("Embedded media links")
461-
462461
log.Debug().
463462
Int("links", len(links)).
463+
Strs("links", links).
464464
Msg("Downloading media files")
465465

466466
hostname := pageURL.Host
@@ -487,8 +487,12 @@ func (g Generator) downloadPageMedia(outputMediaDirPath string, p *hugopage.Page
487487
}
488488
outputFilePath := fmt.Sprintf("%s/static/%s", outputMediaDirPath,
489489
strings.TrimSuffix(strings.Split(link, "?")[0], "/"))
490-
if !strings.HasPrefix(link, "http") {
491-
link = g.wpInfo.Link() + link
490+
if strings.HasPrefix(link, "http") {
491+
// do nothing in case of absolute URL
492+
} else if strings.HasPrefix(link, "/") { // relative URL to the base of the website
493+
link = g.wpInfo.Link().Scheme + "://" + g.wpInfo.Link().Host + link
494+
} else {
495+
link = strings.TrimSuffix(g.wpInfo.Link().String(), "/") + "/" + link
492496
}
493497

494498
// Try full-res images first.

src/wp2hugo/internal/mediacache/media_cache_setup.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"net/http"
1010
"os"
1111
"path"
12+
"strings"
1213
)
1314

1415
type MediaCache struct {
@@ -20,17 +21,28 @@ func New(cacheDirPath string) MediaCache {
2021
}
2122

2223
func (m MediaCache) GetReader(url string) (io.Reader, error) {
24+
if strings.Contains(url, "blog/blog") {
25+
log.Panic().
26+
Str("url", url).
27+
Msg("media url contains blog/blog")
28+
}
29+
2330
if err := utils.CreateDirIfNotExist(m.cacheDirPath); err != nil {
2431
return nil, fmt.Errorf("error creating cache directory: %s", err)
2532
}
2633

2734
key := getSHA256(url)
2835
file, err := os.OpenFile(path.Join(m.cacheDirPath, key), os.O_RDONLY, 0644)
2936
if err == nil {
30-
log.Info().Msgf("media %s found in cache", url)
37+
log.Info().
38+
Str("url", url).
39+
Msg("media found in cache")
3140
return file, nil
3241
}
33-
log.Info().Msgf("media %s will be fetched", url)
42+
43+
log.Info().
44+
Str("url", url).
45+
Msg("media will be fetched")
3446
resp, err := http.Get(url)
3547
if err != nil {
3648
return nil, fmt.Errorf("error fetching media %s: %s", url, err)

src/wp2hugo/internal/wpparser/wp_parser_setup.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"fmt"
77
"io"
8+
"net/url"
89
"regexp"
910
"strings"
1011
"time"
@@ -258,9 +259,14 @@ func (p *Parser) getWebsiteInfo(feed *rss.Feed, authors []string) (*WebsiteInfo,
258259
}
259260
}
260261

262+
linkURL, err := url.Parse(feed.Link)
263+
if err != nil {
264+
return nil, fmt.Errorf("error parsing feed link: %w", err)
265+
}
266+
261267
websiteInfo := WebsiteInfo{
262268
title: feed.Title,
263-
link: feed.Link,
269+
link: linkURL,
264270
Description: feed.Description,
265271
pubDate: feed.PubDateParsed,
266272
language: feed.Language,

src/wp2hugo/internal/wpparser/wp_website_info.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
package wpparser
22

33
import (
4+
"net/url"
45
"time"
56
)
67

78
type WebsiteInfo struct {
89
title string
9-
link string
10+
link *url.URL
1011
Description string
1112

1213
pubDate *time.Time
@@ -49,7 +50,7 @@ func (w *WebsiteInfo) Title() string {
4950
return w.title
5051
}
5152

52-
func (w *WebsiteInfo) Link() string {
53+
func (w *WebsiteInfo) Link() *url.URL {
5354
return w.link
5455
}
5556

0 commit comments

Comments
 (0)