For some reason the tool is now receiving duplicate chapter nodes. There might be optimisations to be made with how chapters are fetched, but for the time being this is just a quick fix to ensure the output data has de-duped them.

Fiddlekins · Fiddlekins · commit ae488b93eb8b · 2024-07-09T15:49:30.000+08:00
Also added git attributes
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,209 @@
+##
+# This template is taken from https://github.com/alexkaratarakis/gitattributes/blob/master/Web.gitattributes
+# This template has subsequently been customised
+
+## GITATTRIBUTES FOR WEB PROJECTS
+#
+# These settings are for any web project.
+#
+# Details per file setting:
+#   text    These files should be normalized (i.e. convert CRLF to LF).
+#   binary  These files are binary and should be left untouched.
+#
+# Note that binary is a macro for -text -diff.
+######################################################################
+
+# Auto detect
+##   Handle line endings automatically for files detected as
+##   text and leave all files detected as binary untouched.
+##   This will handle all files NOT defined below.
+*                 text=auto
+
+# Source code
+*.bash            text eol=lf
+*.bat             text eol=crlf
+*.cmd             text eol=crlf
+*.coffee          text
+*.css             text diff=css
+*.htm             text diff=html
+*.html            text diff=html
+*.inc             text
+*.ini             text
+*.js              text eol=lf
+*.json            text eol=lf
+*.jsx             text eol=lf
+*.less            text
+*.ls              text
+*.map             text -diff
+*.od              text
+*.onlydata        text
+*.php             text diff=php
+*.pl              text
+*.ps1             text eol=crlf
+*.py              text diff=python
+*.rb              text diff=ruby
+*.sass            text
+*.scm             text
+*.scss            text diff=css
+*.sh              text eol=lf
+*.sql             text
+*.styl            text
+*.tag             text
+*.ts              text eol=lf
+*.tsx             text eol=lf
+*.xml             text
+*.xhtml           text diff=html
+
+# Docker
+Dockerfile        text
+
+# Documentation
+*.ipynb           text
+*.markdown        text diff=markdown
+*.md              text diff=markdown
+*.mdwn            text diff=markdown
+*.mdown           text diff=markdown
+*.mkd             text diff=markdown
+*.mkdn            text diff=markdown
+*.mdtxt           text
+*.mdtext          text
+*.txt             text
+AUTHORS           text
+CHANGELOG         text
+CHANGES           text
+CONTRIBUTING      text
+COPYING           text
+copyright         text
+*COPYRIGHT*       text
+INSTALL           text
+license           text
+LICENSE           text
+NEWS              text
+readme            text
+*README*          text
+TODO              text
+
+# Templates
+*.dot             text
+*.ejs             text
+*.haml            text
+*.handlebars      text
+*.hbs             text
+*.hbt             text
+*.jade            text
+*.latte           text
+*.mustache        text
+*.njk             text
+*.phtml           text
+*.svelte          text
+*.tmpl            text
+*.tpl             text
+*.twig            text
+*.vue             text
+
+# Configs
+*.cnf             text
+*.conf            text
+*.config          text
+.editorconfig     text
+.env              text
+.gitattributes    text
+.gitconfig        text
+.htaccess         text
+*.lock            text -diff
+package.json      text eol=lf
+package-lock.json text -diff
+pnpm-lock.yaml    text eol=lf -diff
+.prettierrc       text
+yarn.lock         text -diff
+*.toml            text
+*.yaml            text
+*.yml             text
+browserslist      text
+Makefile          text
+makefile          text
+
+# Heroku
+Procfile          text
+
+# Graphics
+*.ai              binary
+*.bmp             binary
+*.eps             binary
+*.gif             binary
+*.gifv            binary
+*.ico             binary
+*.jng             binary
+*.jp2             binary
+*.jpg             binary
+*.jpeg            binary
+*.jpx             binary
+*.jxr             binary
+*.pdf             binary
+*.png             binary
+*.psb             binary
+*.psd             binary
+# SVG treated as an asset (binary) by default.
+*.svg             text
+# If you want to treat it as binary,
+# use the following line instead.
+# *.svg           binary
+*.svgz            binary
+*.tif             binary
+*.tiff            binary
+*.wbmp            binary
+*.webp            binary
+
+# Audio
+*.kar             binary
+*.m4a             binary
+*.mid             binary
+*.midi            binary
+*.mp3             binary
+*.ogg             binary
+*.ra              binary
+
+# Video
+*.3gpp            binary
+*.3gp             binary
+*.as              binary
+*.asf             binary
+*.asx             binary
+*.avi             binary
+*.fla             binary
+*.flv             binary
+*.m4v             binary
+*.mng             binary
+*.mov             binary
+*.mp4             binary
+*.mpeg            binary
+*.mpg             binary
+*.ogv             binary
+*.swc             binary
+*.swf             binary
+*.webm            binary
+
+# Archives
+*.7z              binary
+*.gz              binary
+*.jar             binary
+*.rar             binary
+*.tar             binary
+*.zip             binary
+
+# Fonts
+*.ttf             binary
+*.eot             binary
+*.otf             binary
+*.woff            binary
+*.woff2           binary
+
+# Executables
+*.exe             binary
+*.pyc             binary
+
+# RC files (like .babelrc or .eslintrc)
+*.*rc             text
+
+# Ignore files (like .npmignore or .gitignore)
+*.*ignore         text
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 It'll archive stories for you.
 
-If you have no idea what you're doing then just download [this zip](https://github.com/Fiddlekins/akun-story-scraper/releases/download/1.8.0/akun-story-scraper.1.8.0.zip) and extract it in a nice friendly location like your desktop.
+If you have no idea what you're doing then just download [this zip](https://github.com/Fiddlekins/akun-story-scraper/releases/download/1.9.0/akun-story-scraper.1.9.0.zip) and extract it in a nice friendly location like your desktop.
 
 Otherwise clone the project, do an npm install, it's pretty standard.
 
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "akun-story-scraper",
-  "version": "1.8.0",
+  "version": "1.9.0",
   "description": "A tool to archive all quests on Akun",
   "type": "module",
   "main": "src/index.js",
diff --git a/src/Scraper.js b/src/Scraper.js
@@ -176,7 +176,7 @@ export default class Scraper {
 		this._logger.log(`Archiving ${storyId}`);
 		// I realised that trying to take an existing archive and only fetch new data means that edits wouldn't be picked up, which is unacceptable, so yay
 		const imageUrls = new Set();
-		const story = [];
+		const storyChapterMap = {};
 		let chat = [];
 
 		let metaData;
@@ -213,7 +213,7 @@ export default class Scraper {
 					return this._api(`/api/anonkun/chapters/${storyId}/${startCt}/${ct}`);
 				}, 30);
 				for (const chapter of chapters) {
-					story.push(chapter);
+          storyChapterMap[chapter._id] = chapter;
 				}
 			} catch (err) {
 				await this.logFatQuest(storyId);
@@ -222,6 +222,10 @@ export default class Scraper {
 			startCt = ct;
 		}
 
+    const story = Object.values(storyChapterMap).sort((a, b) => {
+      return a.ct - b.ct;
+    })
+
 		await fs.outputJson(path.join(archivePath, `${storyId}.chapters.json`), story);
 		if (downloadImages) {
 			Scraper.addImageUrlsFromStory(story, imageUrls, this._logger);

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "akun-story-scraper",`
`3`		`- "version": "1.8.0",`
	`3`	`+ "version": "1.9.0",`
`4`	`4`	`"description": "A tool to archive all quests on Akun",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"main": "src/index.js",`