Skip to content

Commit ba6a90e

Browse files
authored
Merge 49055d9 into 78ff73f
2 parents 78ff73f + 49055d9 commit ba6a90e

File tree

2 files changed

+92
-61
lines changed

2 files changed

+92
-61
lines changed
Lines changed: 50 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
package com.rarchives.ripme.ripper.rippers;
22

3-
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
4-
import com.rarchives.ripme.utils.Http;
53
import java.io.IOException;
64
import java.net.MalformedURLException;
75
import java.net.URI;
@@ -11,12 +9,14 @@
119
import java.util.List;
1210
import java.util.regex.Matcher;
1311
import java.util.regex.Pattern;
12+
1413
import org.jsoup.nodes.Document;
1514
import org.jsoup.nodes.Element;
1615

16+
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
17+
import com.rarchives.ripme.utils.Http;
1718

1819
public class MrCongRipper extends AbstractHTMLRipper {
19-
2020
private Document currDoc;
2121
private int lastPageNum;
2222
private int currPageNum;
@@ -29,43 +29,48 @@ public MrCongRipper(URL url) throws IOException {
2929

3030
@Override
3131
public String getHost() {
32-
return "mrcong";
32+
return "misskon";
3333
}
3434

3535
@Override
3636
public String getDomain() {
37-
return "mrcong.com";
37+
// NOTE: This was previously mrcong.com, which now redirects to
38+
// misskon.com. Some resources still refer to mrcong.com
39+
// but all the top level URLs are now misskon.com
40+
return "misskon.com";
3841
}
3942

4043
@Override
4144
public String getGID(URL url) throws MalformedURLException {
4245
System.out.println(url.toExternalForm());
43-
Pattern p = Pattern.compile("^https?://mrcong\\.com/(\\S*)[0-9]+-anh(-[0-9]+-videos)?(|/|/[0-9]+)$");
44-
Pattern p2 = Pattern.compile("^https?://mrcong\\.com/tag/(\\S*)/$"); //Added 6-10-21
46+
Pattern p = Pattern.compile(
47+
"^https?://(?:[a-z]+\\.)?misskon\\.com/([-0-9a-zA-Z]+)(?:/?|/[0-9]+/?)?$");
48+
Pattern p2 = Pattern.compile("^https?://misskon\\.com/tag/(\\S*)/$");
4549
Matcher m = p.matcher(url.toExternalForm());
46-
Matcher m2 = p2.matcher(url.toExternalForm()); //6-10-21
50+
Matcher m2 = p2.matcher(url.toExternalForm());
4751
if (m.matches()) {
4852
return m.group(1);
49-
}
50-
else if(m2.matches()) { //Added 6-10-21
53+
} else if (m2.matches()) {
5154
tagPage = true;
5255
System.out.println("tagPage = TRUE");
5356
return m2.group(1);
5457
}
5558

56-
throw new MalformedURLException("Expected mrcong.com URL format: "
57-
+ "mrcong.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + " instead");
59+
throw new MalformedURLException("Expected misskon.com URL format: "
60+
+ "misskon.com/GALLERY_NAME (or /PAGE_NUMBER/) - got " + url
61+
+ " instead");
5862
}
5963

6064
@Override
61-
public Document getFirstPage() throws IOException { //returns the root gallery page regardless of actual page number
65+
public Document getFirstPage() throws IOException {
66+
// returns the root gallery page regardless of actual page number
6267
// "url" is an instance field of the superclass
6368
String rootUrlStr;
6469
URL rootUrl;
6570

66-
if(!tagPage) {
71+
if (!tagPage) {
6772
rootUrlStr = url.toExternalForm().replaceAll("(|/|/[0-9]+/?)$", "/");
68-
} else { //6-10-21
73+
} else { // 6-10-21
6974
rootUrlStr = url.toExternalForm().replaceAll("(page/[0-9]+/)$", "page/1/");
7075
}
7176

@@ -81,51 +86,56 @@ public Document getFirstPage() throws IOException { //returns the root gallery p
8186
public Document getNextPage(Document doc) throws IOException {
8287
int pageNum = currPageNum;
8388
String urlStr;
84-
if(!tagPage) {
89+
if (!tagPage) {
8590
if (pageNum == 1 && lastPageNum > 1) {
8691
urlStr = url.toExternalForm().concat((pageNum + 1) + "");
8792
System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr);
8893
} else if (pageNum < lastPageNum) {
8994
urlStr = url.toExternalForm().replaceAll("(/([0-9]*)/?)$", ("/" + (pageNum + 1) + "/"));
9095
System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr);
9196
} else {
92-
//System.out.printf("Error: Page number provided goes past last valid page number\n");
97+
// System.out.printf("Error: Page number provided goes past last valid page
98+
// number\n");
9399
throw (new IOException("Error: Page number provided goes past last valid page number\n"));
94100
}
95-
} else { //6-10-21
96-
//if (pageNum == 1 && lastPageNum >= 1) {
97-
if (pageNum == 1 && lastPageNum > 1) { //6-10-21
101+
} else { // 6-10-21
102+
// if (pageNum == 1 && lastPageNum >= 1) {
103+
if (pageNum == 1 && lastPageNum > 1) { // 6-10-21
98104
urlStr = url.toExternalForm().concat("page/" + (pageNum + 1) + "");
99105
System.out.printf("Old Str: %s New Str: %s\n", url.toExternalForm(), urlStr);
100106
} else if (pageNum < lastPageNum) {
101107
urlStr = url.toExternalForm().replaceAll("(page/([0-9]*)/?)$", ("page/" + (pageNum + 1) + "/"));
102108
System.out.printf("Old Str: %s New Str: %s\n", url.toString(), urlStr);
103109
} else {
104-
//System.out.printf("Error: Page number provided goes past last valid page number\n");
110+
// System.out.printf("Error: Page number provided goes past last valid page
111+
// number\n");
105112
System.out.print("Error: There is no next page!\n");
106113
return null;
107-
//throw (new IOException("Error: Page number provided goes past last valid page number\n"));
114+
// throw (new IOException("Error: Page number provided goes past last valid page
115+
// number\n"));
108116
}
109117
}
110118

111119
url = URI.create(urlStr).toURL();
112120
currDoc = Http.url(url).get();
113-
currPageNum ++;//hi
121+
currPageNum++;// hi
114122
return currDoc;
115123
}
116124

117125
private int getMaxPageNumber(Document doc) {
118-
if(!tagPage) {
126+
if (!tagPage) {
119127
try {
120-
lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text()); //gets the last possible page for the gallery
121-
} catch(Exception e) {
128+
// gets the last possible page for the gallery
129+
lastPageNum = Integer.parseInt(doc.select("div.page-link > a").last().text());
130+
} catch (Exception e) {
122131
return 1;
123132
}
124133
} else {
125134
try {
126-
lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text()); //gets the last possible page for the gallery
135+
// gets the last possible page for the gallery
136+
lastPageNum = Integer.parseInt(doc.select("div.pagination > a").last().text());
127137
System.out.println("The last page found for " + url + " was " + lastPageNum);
128-
} catch(Exception e) {
138+
} catch (Exception e) {
129139
return 1;
130140
}
131141
}
@@ -134,9 +144,9 @@ private int getMaxPageNumber(Document doc) {
134144
}
135145

136146
private int getCurrentPageNum(Document doc) {
137-
int currPage; //6-10-21
147+
int currPage; // 6-10-21
138148

139-
if(!tagPage) {
149+
if (!tagPage) {
140150
currPage = Integer.parseInt(doc.select("div.page-link > span").first().text());
141151
} else {
142152
currPage = Integer.parseInt(doc.select("div.pagination > span").first().text());
@@ -148,45 +158,25 @@ private int getCurrentPageNum(Document doc) {
148158
}
149159

150160
@Override
151-
public List<String> getURLsFromPage(Document doc) { //gets the urls of the images
161+
public List<String> getURLsFromPage(Document doc) { // gets the urls of the images
152162
List<String> result = new ArrayList<>();
153163

154-
if(!tagPage) {
164+
if (!tagPage) {
155165
for (Element el : doc.select("p > img")) {
156-
String imageSource = el.attr("src");
166+
String imageSource = el.attr("data-src");
157167
result.add(imageSource);
158168
}
159169

160170
System.out.println("\n1.)Printing List: " + result + "\n");
161-
} else { //6-10-21
162-
//List<String> gallery_set_list = new ArrayList<>();
163-
171+
} else {
164172
for (Element el : doc.select("h2 > a")) {
165173
String pageSource = el.attr("href");
166-
if(!pageSource.equals("https://mrcong.com/")) {
174+
if (!pageSource.equals("https://misskon.com/")) {
167175
result.add(pageSource);
168176
System.out.println("\n" + pageSource + " has been added to the list.");
169177
}
170178
}
171179

172-
/*for (String el2 : gallery_set_list) {
173-
try {
174-
URL temp_urL = URI.create(el2).toURL();
175-
MrCongRipper mcr = new MrCongRipper(temp_urL);
176-
System.out.println("URL being ripped: " + mcr.url.toString());
177-
result.addAll(mcr.getURLsFromPage(mcr.getFirstPage()));
178-
179-
Document nextPg = mcr.getNextPage(mcr.currDoc);
180-
while(nextPg != null) {
181-
result.addAll(mcr.getURLsFromPage(nextPg));
182-
nextPg = mcr.getNextPage(mcr.currDoc);
183-
}
184-
} catch (IOException e) {
185-
e.printStackTrace();
186-
}
187-
188-
}*/
189-
190180
System.out.println("\n2.)Printing List: " + result + "\n");
191181
}
192182

@@ -195,21 +185,20 @@ public List<String> getURLsFromPage(Document doc) { //gets the urls of the image
195185

196186
@Override
197187
public void downloadURL(URL url, int index) {
198-
//addURLToDownload(url, getPrefix(index));
199-
200-
if(!tagPage) {
188+
if (!tagPage) {
201189
addURLToDownload(url, getPrefix(index));
202190
} else {
203191
try {
204192
List<String> ls = this.getURLsFromPage(this.currDoc);
205193
Document np = this.getNextPage(this.currDoc);
206194

207-
while(np != null) { //Creates a list of all sets to download
195+
// Creates a list of all sets to download
196+
while (np != null) {
208197
ls.addAll(this.getURLsFromPage(np));
209198
np = this.getNextPage(np);
210199
}
211200

212-
for(String urlStr : ls) {
201+
for (String urlStr : ls) {
213202
MrCongRipper mcr = new MrCongRipper(URI.create(urlStr).toURL());
214203
mcr.setup();
215204
mcr.rip();
@@ -220,4 +209,4 @@ public void downloadURL(URL url, int index) {
220209
}
221210
}
222211
}
223-
}
212+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package com.rarchives.ripme.tst.ripper.rippers;
2+
3+
import java.io.IOException;
4+
import java.net.URI;
5+
import java.net.URISyntaxException;
6+
7+
import org.junit.jupiter.api.Test;
8+
9+
import com.rarchives.ripme.ripper.rippers.MrCongRipper;
10+
11+
public class MrCongRipperTest extends RippersTest {
12+
@Test
13+
public void testMrCongAlbumRip1() throws IOException, URISyntaxException {
14+
MrCongRipper ripper = new MrCongRipper(new URI(
15+
"https://misskon.com/87161-xr-uncensored-lin-xing-lan-r18-xiu-ren-jue-mi-3wan-yuan-zi-liao-chao-shi-zhang-16k-qing-te-xie-1174-photos-1-video/")
16+
.toURL());
17+
testRipper(ripper);
18+
}
19+
20+
@Test
21+
public void testMrCongAlbumRip2() throws IOException, URISyntaxException {
22+
MrCongRipper ripper = new MrCongRipper(
23+
new URI("https://misskon.com/xiaoyu-vol-799-lin-xing-lan-87-anh/").toURL());
24+
25+
testRipper(ripper);
26+
}
27+
28+
@Test
29+
public void testMrCongAlbumRip3() throws IOException, URISyntaxException {
30+
MrCongRipper ripper = new MrCongRipper(
31+
new URI("https://misskon.com/87163-le-ledb-201b-dayoung-50-photos/").toURL());
32+
testRipper(ripper);
33+
}
34+
35+
// Ripping from tags is not yet implemented. Uncomment the @Test line when
36+
// implemented.
37+
// @Test
38+
public void testMrCongTagRip() throws IOException, URISyntaxException {
39+
MrCongRipper ripper = new MrCongRipper(new URI("https://misskon.com/tag/xr-uncensored/").toURL());
40+
testRipper(ripper);
41+
}
42+
}

0 commit comments

Comments
 (0)