Skip to content

Commit 0d41ddf

Browse files
authored
Merge pull request #4 from steinybear/master
Windows patch and formatting bug fix
2 parents 24fc03b + 92f1ef7 commit 0d41ddf

File tree

2 files changed

+452
-218
lines changed

2 files changed

+452
-218
lines changed

main.rs

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
extern crate select;
2+
extern crate gen_epub_book;
3+
extern crate url;
4+
extern crate chrono;
5+
extern crate reqwest;
6+
use reqwest::Client;
7+
use url::Url;
8+
use std::path::PathBuf;
9+
use gen_epub_book::ops::{BookElement, EPubBook};
10+
use std::env;
11+
use std::io;
12+
use std::io::Write;
13+
use std::fs;
14+
use std::fs::File;
15+
use select::document::Document;
16+
use std::io::stdout;
17+
use select::node::Node;
18+
use chrono::DateTime;
19+
use std::fs::OpenOptions;
20+
use select::predicate::{Name, And, Class, Descendant};
21+
use std::thread::Builder;
22+
const FILE_USE: bool = true;
23+
const BOOKS: [&str;5] = ["worm","pact","twig","glow","ward"];
24+
struct Book {
25+
title:String,
26+
start:String,
27+
desc:String,
28+
date:String,
29+
cover:Option<String>,
30+
}
31+
struct DownloadedBook {
32+
title:String,
33+
content:Vec<BookElement>
34+
}
35+
fn main() {
36+
let builder = Builder::new()
37+
.name("reductor".into())
38+
.stack_size(32 * 1024 * 1024); //32 MB of stack space
39+
let handler = builder.spawn(|| {
40+
interpet_args();
41+
}).unwrap();
42+
handler.join().unwrap();
43+
}
44+
fn check_args_num(num: usize, command:&str) -> bool{
45+
let need = match command {
46+
_ => 0,
47+
};
48+
if num < need {
49+
false
50+
} else {
51+
true
52+
}
53+
}
54+
fn get_info(key:&str) -> Book{
55+
return match key {
56+
"worm" => Book {
57+
title:String::from("Worm"),
58+
start:String::from("parahumans.wordpress.com/2011/06/11/1-1/"),
59+
desc:String::from("An introverted teenage girl with an unconventional superpower, Taylor goes out in costume to find escape from a deeply unhappy and frustrated civilian life. Her first attempt at taking down a supervillain sees her mistaken for one, thrusting her into the midst of the local ‘cape’ scene’s politics, unwritten rules, and ambiguous morals. As she risks life and limb, Taylor faces the dilemma of having to do the wrong things for the right reasons."),
60+
date:String::from("Tue, 19 Nov 2013 00:00:00 +0100"),
61+
cover:Some("https://i.imgur.com/g0fLbQ1.jpg".to_string()),
62+
},
63+
"pact" => Book {
64+
title:String::from("Pact"),
65+
start:String::from("pactwebserial.wordpress.com/category/story/arc-1-bonds/1-01/"),
66+
desc:String::from("Blake Thorburn was driven away from home and family by a vicious fight over inheritance, returning only for a deathbed visit with the grandmother who set it in motion. Blake soon finds himself next in line to inherit the property, a trove of dark supernatural knowledge, and the many enemies his grandmother left behind her in the small town of Jacob’s Bell."),
67+
date:String::from("Sat, 07 Mar 2015 00:00:00 +0100"),
68+
cover:Some("https://i.redd.it/uyfiofnoko8z.png".to_string()),
69+
},
70+
"twig" => Book {
71+
title:String::from("Twig"),
72+
start:String::from("twigserial.wordpress.com/2014/12/24/taking-root-1-1/"),
73+
desc:String::from("The year is 1921, and a little over a century has passed since a great mind unraveled the underpinnings of life itself. Every week, it seems, the papers announce great advances, solving the riddle of immortality, successfully reviving the dead, the cloning of living beings, or blending of two animals into one. For those on the ground, every week brings new mutterings of work taken by ‘stitched’ men of patchwork flesh that do not need to sleep, or more fearful glances as they have to step off the sidewalks to make room for great laboratory-grown beasts. Often felt but rarely voiced is the notion that events are already spiraling out of the control of the academies that teach these things. It is only this generation, they say, that the youth and children are able to take the mad changes in stride, accepting it all as a part of day to day life. Of those children, a small group of strange youths from the Lambsbridge Orphanage stand out, taking a more direct hand in events."),
74+
date:String::from("Tue, 17 Oct 2017 00:00:00 +0200"),
75+
cover:Some("https://i.imgur.com/3KeIJyz.jpg".to_string()),
76+
},
77+
"glow" => Book {
78+
title:String::from("Glow-worm"),
79+
start:String::from("parahumans.wordpress.com/2017/10/21/glowworm-p-1/"),
80+
desc:String::from("The bridge between Worm and Ward, Glow-worm introduces readers to the characters of Ward, and the consequences of Gold Morning"),
81+
date:String::from("Sat, 11 Nov 2017 00:00:00 +0100"),
82+
cover:None,
83+
},
84+
"ward" => Book {
85+
title:String::from("Ward"),
86+
start:String::from("parahumans.net/2017/09/11/daybreak-1-1/"),
87+
desc:String::from("The unwritten rules that govern the fights and outright wars between ‘capes’ have been amended: everyone gets their second chance. It’s an uneasy thing to come to terms with when notorious supervillains and even monsters are playing at being hero. The world ended two years ago, and as humanity straddles the old world and the new, there aren’t records, witnesses, or facilities to answer the villains’ past actions in the present. One of many compromises, uneasy truces and deceptions that are starting to splinter as humanity rebuilds. None feel the injustice of this new status quo or the lack of established footing more than the past residents of the parahuman asylums. The facilities hosted parahumans and their victims, but the facilities are ruined or gone; one of many fragile ex-patients is left to find a place in a fractured world. She’s perhaps the person least suited to have anything to do with this tenuous peace or to stand alongside these false heroes. She’s put in a position to make the decision: will she compromise to help forge what they call, with dark sentiment, a second golden age? Or will she stand tall as a gilded dark age dawns?"),
88+
date:String::from("Sat, 11 Nov 2017 00:00:00 +0100"),
89+
cover:None,
90+
},
91+
_ => Book {
92+
title:String::from("Worm"),
93+
start:String::from("parahumans.wordpress.com/2011/06/11/1-1/"),
94+
desc:String::from("An introverted teenage girl with an unconventional superpower, Taylor goes out in costume to find escape from a deeply unhappy and frustrated civilian life. Her first attempt at taking down a supervillain sees her mistaken for one, thrusting her into the midst of the local ‘cape’ scene’s politics, unwritten rules, and ambiguous morals. As she risks life and limb, Taylor faces the dilemma of having to do the wrong things for the right reasons."),
95+
date:String::from("Tue, 19 Nov 2013 00:00:00 +0100"),
96+
cover:Some("https://i.imgur.com/g0fLbQ1.jpg".to_string()),
97+
},
98+
99+
};
100+
}
101+
fn prompt_cover(title:String, url:String) -> bool {
102+
print!("Would you like to include a cover for {}? Cover URL is {}. If it cannot be downloaded, program will not exit gracefully.(y/n)",title,url);
103+
io::stdout().flush().ok().expect("Could not flush stdout");
104+
let reader = io::stdin();
105+
106+
let mut buf = String::new();
107+
(reader).read_line(&mut buf).unwrap();
108+
buf == "y".to_string() || buf== "yes".to_string()
109+
}
110+
fn interpet_args() {
111+
let args: Vec<String> = env::args().collect();
112+
let command : &str;
113+
if args.len() < 2 {
114+
command = "help";
115+
} else {
116+
command = &args[1];
117+
if !check_args_num(args.len()- 2, command.as_ref()){
118+
println!("Not enough arguments for {}", &command);
119+
::std::process::exit(64);
120+
}
121+
}
122+
match command {
123+
"help" => print_help(),
124+
"all" => gen_all(),
125+
_ => process_book(download_book(get_info(command.as_ref())))
126+
}
127+
128+
}
129+
fn gen_all() {
130+
for book in BOOKS.iter() {
131+
let info = get_info(book);
132+
println!("Now downloading {}",info.title);
133+
process_book(download_book(info));
134+
}
135+
}
136+
fn print_help() {
137+
println!("Rust Wildbow Scraper v0.0.1");
138+
println!("By Nicohman");
139+
println!("Commands:");
140+
println!("help: Shows this help screen");
141+
println!("worm: Scrapes Worm");
142+
println!("pact: Scrapes Pact");
143+
println!("twig: Scrapes Twig");
144+
println!("glow: Scrapes Glow-worm");
145+
println!("ward: Scrapes Ward");
146+
}
147+
fn download_book(book:Book) -> DownloadedBook {
148+
let mut elements = vec![BookElement::Name(book.title.clone()), BookElement::Author("John McCrae".to_string()), BookElement::Language("en-US".to_string()), BookElement::Date(DateTime::parse_from_rfc2822(&book.date).unwrap()), BookElement::StringDescription(book.desc)];
149+
if book.cover.is_some() {
150+
let cover = book.cover.unwrap();
151+
if prompt_cover(book.title.clone(),cover.clone()) {
152+
elements.push(BookElement::NetworkCover(Url::parse(&cover).unwrap()));
153+
}
154+
}
155+
let client = Client::new();
156+
if FILE_USE {
157+
if !fs::metadata("content").is_err() {
158+
println!("Content directory is already there. Please remove and try again.");
159+
::std::process::exit(73);
160+
} else {
161+
fs::create_dir("content").unwrap();
162+
}
163+
}
164+
let done = download_iter(&mut ("https://".to_string()+ &book.start, elements, client));
165+
return DownloadedBook {
166+
title:book.title,
167+
content:done.1
168+
}
169+
}
170+
fn download_iter( tup: &mut (String, Vec<BookElement>, Client)) -> (String, Vec<BookElement>, Client) {
171+
let page = tup.2.get(&tup.0).send().unwrap().text().unwrap();
172+
let doc = Document::from(page.as_ref());
173+
let check = doc.find(Descendant(And(Name("div"), Class("entry-content")),Descendant(Name("p"),Name("a")))).filter(|x|{
174+
if x.text().trim() == "Next Chapter" || x.text().trim() == "Next" {
175+
true
176+
} else {
177+
false
178+
}
179+
}).next();
180+
let mut title = doc.find(Name("title")).next().unwrap().text().split("|").next().unwrap().trim().replace(" - Parahumans 2", "").replace(" – Twig","").replace("Glow-worm – ","").replace("(Sequel is live!)","").to_string();
181+
if &title == "1.01" {
182+
title = "Bonds 1.1".to_string();
183+
}
184+
println!("Downloaded {}", title);
185+
let mut arr = doc.find(Descendant(And(Name("div"), Class("entry-content")),Name("p"))).collect::<Vec<Node>>();
186+
let to_sp = arr.len();
187+
188+
arr.truncate(to_sp);
189+
let num = tup.1.len().clone().to_string();
190+
let cont = arr.into_iter().fold("<?xml version='1.0' encoding='utf-8' ?><html xmlns='http://www.w3.org/1999/xhtml'><head><title>".to_string()+&title+"</title><meta http-equiv='Content-Type' content ='text/html'></meta><!-- ePub title: \"" +&title+ "\" -->\n</head><body><h1>"+&title+"</h1>\n", |acc, x|{
191+
acc + "<p>"+ &x.inner_html().replace("&nbsp;","&#160;").replace("<br>","<br></br>").replace("& ", "&amp;").replace("<Walk or->","&lt;Walk or-&gt;").replace("<Walk!>","&lt;Walk!&gt;").replace("Last Chapter", "").replace("Next Chapter", "")+"</p>\n"
192+
})+"</body></html>";
193+
if FILE_USE {
194+
let mut file = OpenOptions::new()
195+
.create(true)
196+
.write(true)
197+
.open("content/".to_string()+&num+".html")
198+
.unwrap();
199+
file.write_all((cont)
200+
.as_bytes()).unwrap();
201+
tup.1.push(BookElement::Content(PathBuf::from("content/".to_string()+&num+".html")));
202+
} else {
203+
tup.1.push(BookElement::StringContent(cont));
204+
}
205+
if check.is_none() || title == "P.9" {
206+
return tup.clone();
207+
} else {
208+
tup.0 = check.unwrap().attr("href").unwrap().to_string();
209+
if !tup.0.contains("https") {
210+
tup.0 = "https:".to_string()+&tup.0;
211+
}
212+
return download_iter(tup);
213+
}
214+
}
215+
fn process_book(book: DownloadedBook) {
216+
println!("Done downloading {}", book.title);
217+
let filename = book.title.clone().to_lowercase();
218+
println!("Converting to epub now at {}.epub", filename);
219+
let mut processed = EPubBook::from_elements(book.content).unwrap();
220+
processed.normalise_paths(&["./".parse().unwrap()], false, &mut stdout()).unwrap();
221+
processed.write_zip(&mut File::create(filename+".epub").unwrap(), false, &mut stdout()).expect("Couldn't export epub");
222+
if FILE_USE {
223+
fs::remove_dir_all("content").unwrap();
224+
}
225+
println!("Done downloading {}", book.title);
226+
}

0 commit comments

Comments
 (0)