Skip to content

Commit ae488b9

Browse files
committed
For some reason the tool is now receiving duplicate chapter nodes. There might be optimisations to be made with how chapters are fetched, but for the time being this is just a quick fix to ensure the output data has de-duped them.
Also added git attributes
1 parent 601fe19 commit ae488b9

File tree

4 files changed

+217
-4
lines changed

4 files changed

+217
-4
lines changed

.gitattributes

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
##
2+
# This template is taken from https://github.com/alexkaratarakis/gitattributes/blob/master/Web.gitattributes
3+
# This template has subsequently been customised
4+
5+
## GITATTRIBUTES FOR WEB PROJECTS
6+
#
7+
# These settings are for any web project.
8+
#
9+
# Details per file setting:
10+
# text These files should be normalized (i.e. convert CRLF to LF).
11+
# binary These files are binary and should be left untouched.
12+
#
13+
# Note that binary is a macro for -text -diff.
14+
######################################################################
15+
16+
# Auto detect
17+
## Handle line endings automatically for files detected as
18+
## text and leave all files detected as binary untouched.
19+
## This will handle all files NOT defined below.
20+
* text=auto
21+
22+
# Source code
23+
*.bash text eol=lf
24+
*.bat text eol=crlf
25+
*.cmd text eol=crlf
26+
*.coffee text
27+
*.css text diff=css
28+
*.htm text diff=html
29+
*.html text diff=html
30+
*.inc text
31+
*.ini text
32+
*.js text eol=lf
33+
*.json text eol=lf
34+
*.jsx text eol=lf
35+
*.less text
36+
*.ls text
37+
*.map text -diff
38+
*.od text
39+
*.onlydata text
40+
*.php text diff=php
41+
*.pl text
42+
*.ps1 text eol=crlf
43+
*.py text diff=python
44+
*.rb text diff=ruby
45+
*.sass text
46+
*.scm text
47+
*.scss text diff=css
48+
*.sh text eol=lf
49+
*.sql text
50+
*.styl text
51+
*.tag text
52+
*.ts text eol=lf
53+
*.tsx text eol=lf
54+
*.xml text
55+
*.xhtml text diff=html
56+
57+
# Docker
58+
Dockerfile text
59+
60+
# Documentation
61+
*.ipynb text
62+
*.markdown text diff=markdown
63+
*.md text diff=markdown
64+
*.mdwn text diff=markdown
65+
*.mdown text diff=markdown
66+
*.mkd text diff=markdown
67+
*.mkdn text diff=markdown
68+
*.mdtxt text
69+
*.mdtext text
70+
*.txt text
71+
AUTHORS text
72+
CHANGELOG text
73+
CHANGES text
74+
CONTRIBUTING text
75+
COPYING text
76+
copyright text
77+
*COPYRIGHT* text
78+
INSTALL text
79+
license text
80+
LICENSE text
81+
NEWS text
82+
readme text
83+
*README* text
84+
TODO text
85+
86+
# Templates
87+
*.dot text
88+
*.ejs text
89+
*.haml text
90+
*.handlebars text
91+
*.hbs text
92+
*.hbt text
93+
*.jade text
94+
*.latte text
95+
*.mustache text
96+
*.njk text
97+
*.phtml text
98+
*.svelte text
99+
*.tmpl text
100+
*.tpl text
101+
*.twig text
102+
*.vue text
103+
104+
# Configs
105+
*.cnf text
106+
*.conf text
107+
*.config text
108+
.editorconfig text
109+
.env text
110+
.gitattributes text
111+
.gitconfig text
112+
.htaccess text
113+
*.lock text -diff
114+
package.json text eol=lf
115+
package-lock.json text -diff
116+
pnpm-lock.yaml text eol=lf -diff
117+
.prettierrc text
118+
yarn.lock text -diff
119+
*.toml text
120+
*.yaml text
121+
*.yml text
122+
browserslist text
123+
Makefile text
124+
makefile text
125+
126+
# Heroku
127+
Procfile text
128+
129+
# Graphics
130+
*.ai binary
131+
*.bmp binary
132+
*.eps binary
133+
*.gif binary
134+
*.gifv binary
135+
*.ico binary
136+
*.jng binary
137+
*.jp2 binary
138+
*.jpg binary
139+
*.jpeg binary
140+
*.jpx binary
141+
*.jxr binary
142+
*.pdf binary
143+
*.png binary
144+
*.psb binary
145+
*.psd binary
146+
# SVG treated as an asset (binary) by default.
147+
*.svg text
148+
# If you want to treat it as binary,
149+
# use the following line instead.
150+
# *.svg binary
151+
*.svgz binary
152+
*.tif binary
153+
*.tiff binary
154+
*.wbmp binary
155+
*.webp binary
156+
157+
# Audio
158+
*.kar binary
159+
*.m4a binary
160+
*.mid binary
161+
*.midi binary
162+
*.mp3 binary
163+
*.ogg binary
164+
*.ra binary
165+
166+
# Video
167+
*.3gpp binary
168+
*.3gp binary
169+
*.as binary
170+
*.asf binary
171+
*.asx binary
172+
*.avi binary
173+
*.fla binary
174+
*.flv binary
175+
*.m4v binary
176+
*.mng binary
177+
*.mov binary
178+
*.mp4 binary
179+
*.mpeg binary
180+
*.mpg binary
181+
*.ogv binary
182+
*.swc binary
183+
*.swf binary
184+
*.webm binary
185+
186+
# Archives
187+
*.7z binary
188+
*.gz binary
189+
*.jar binary
190+
*.rar binary
191+
*.tar binary
192+
*.zip binary
193+
194+
# Fonts
195+
*.ttf binary
196+
*.eot binary
197+
*.otf binary
198+
*.woff binary
199+
*.woff2 binary
200+
201+
# Executables
202+
*.exe binary
203+
*.pyc binary
204+
205+
# RC files (like .babelrc or .eslintrc)
206+
*.*rc text
207+
208+
# Ignore files (like .npmignore or .gitignore)
209+
*.*ignore text

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
It'll archive stories for you.
44

5-
If you have no idea what you're doing then just download [this zip](https://github.com/Fiddlekins/akun-story-scraper/releases/download/1.8.0/akun-story-scraper.1.8.0.zip) and extract it in a nice friendly location like your desktop.
5+
If you have no idea what you're doing then just download [this zip](https://github.com/Fiddlekins/akun-story-scraper/releases/download/1.9.0/akun-story-scraper.1.9.0.zip) and extract it in a nice friendly location like your desktop.
66

77
Otherwise clone the project, do an npm install, it's pretty standard.
88

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "akun-story-scraper",
3-
"version": "1.8.0",
3+
"version": "1.9.0",
44
"description": "A tool to archive all quests on Akun",
55
"type": "module",
66
"main": "src/index.js",

src/Scraper.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ export default class Scraper {
176176
this._logger.log(`Archiving ${storyId}`);
177177
// I realised that trying to take an existing archive and only fetch new data means that edits wouldn't be picked up, which is unacceptable, so yay
178178
const imageUrls = new Set();
179-
const story = [];
179+
const storyChapterMap = {};
180180
let chat = [];
181181

182182
let metaData;
@@ -213,7 +213,7 @@ export default class Scraper {
213213
return this._api(`/api/anonkun/chapters/${storyId}/${startCt}/${ct}`);
214214
}, 30);
215215
for (const chapter of chapters) {
216-
story.push(chapter);
216+
storyChapterMap[chapter._id] = chapter;
217217
}
218218
} catch (err) {
219219
await this.logFatQuest(storyId);
@@ -222,6 +222,10 @@ export default class Scraper {
222222
startCt = ct;
223223
}
224224

225+
const story = Object.values(storyChapterMap).sort((a, b) => {
226+
return a.ct - b.ct;
227+
})
228+
225229
await fs.outputJson(path.join(archivePath, `${storyId}.chapters.json`), story);
226230
if (downloadImages) {
227231
Scraper.addImageUrlsFromStory(story, imageUrls, this._logger);

0 commit comments

Comments
 (0)