1
1
package com .rarchives .ripme .ripper .rippers ;
2
2
3
- import com .rarchives .ripme .ripper .AbstractHTMLRipper ;
4
- import com .rarchives .ripme .utils .Http ;
5
3
import java .io .IOException ;
6
4
import java .net .MalformedURLException ;
7
5
import java .net .URI ;
11
9
import java .util .List ;
12
10
import java .util .regex .Matcher ;
13
11
import java .util .regex .Pattern ;
12
+
14
13
import org .jsoup .nodes .Document ;
15
14
import org .jsoup .nodes .Element ;
16
15
16
+ import com .rarchives .ripme .ripper .AbstractHTMLRipper ;
17
+ import com .rarchives .ripme .utils .Http ;
17
18
18
19
public class MrCongRipper extends AbstractHTMLRipper {
19
-
20
20
private Document currDoc ;
21
21
private int lastPageNum ;
22
22
private int currPageNum ;
@@ -29,43 +29,48 @@ public MrCongRipper(URL url) throws IOException {
29
29
30
30
@ Override
31
31
public String getHost () {
32
- return "mrcong " ;
32
+ return "misskon " ;
33
33
}
34
34
35
35
@ Override
36
36
public String getDomain () {
37
- return "mrcong.com" ;
37
+ // NOTE: This was previously mrcong.com, which now redirects to
38
+ // misskon.com. Some resources still refer to mrcong.com
39
+ // but all the top level URLs are now misskon.com
40
+ return "misskon.com" ;
38
41
}
39
42
40
43
@ Override
41
44
public String getGID (URL url ) throws MalformedURLException {
42
45
System .out .println (url .toExternalForm ());
43
- Pattern p = Pattern .compile ("^https?://mrcong\\ .com/(\\ S*)[0-9]+-anh(-[0-9]+-videos)?(|/|/[0-9]+)$" );
44
- Pattern p2 = Pattern .compile ("^https?://mrcong\\ .com/tag/(\\ S*)/$" ); //Added 6-10-21
46
+ Pattern p = Pattern .compile (
47
+ "^https?://(?:[a-z]+\\ .)?misskon\\ .com/([-0-9a-zA-Z]+)(?:/?|/[0-9]+/?)?$" );
48
+ Pattern p2 = Pattern .compile ("^https?://misskon\\ .com/tag/(\\ S*)/$" );
45
49
Matcher m = p .matcher (url .toExternalForm ());
46
- Matcher m2 = p2 .matcher (url .toExternalForm ()); //6-10-21
50
+ Matcher m2 = p2 .matcher (url .toExternalForm ());
47
51
if (m .matches ()) {
48
52
return m .group (1 );
49
- }
50
- else if (m2 .matches ()) { //Added 6-10-21
53
+ } else if (m2 .matches ()) {
51
54
tagPage = true ;
52
55
System .out .println ("tagPage = TRUE" );
53
56
return m2 .group (1 );
54
57
}
55
58
56
- throw new MalformedURLException ("Expected mrcong.com URL format: "
57
- + "mrcong.com/GALLERY_NAME(-anh OR -anh/ OR -anh/PAGE_NUMBER OR -anh/PAGE_NUMBER/) - got " + url + " instead" );
59
+ throw new MalformedURLException ("Expected misskon.com URL format: "
60
+ + "misskon.com/GALLERY_NAME (or /PAGE_NUMBER/) - got " + url
61
+ + " instead" );
58
62
}
59
63
60
64
@ Override
61
- public Document getFirstPage () throws IOException { //returns the root gallery page regardless of actual page number
65
+ public Document getFirstPage () throws IOException {
66
+ // returns the root gallery page regardless of actual page number
62
67
// "url" is an instance field of the superclass
63
68
String rootUrlStr ;
64
69
URL rootUrl ;
65
70
66
- if (!tagPage ) {
71
+ if (!tagPage ) {
67
72
rootUrlStr = url .toExternalForm ().replaceAll ("(|/|/[0-9]+/?)$" , "/" );
68
- } else { //6-10-21
73
+ } else { // 6-10-21
69
74
rootUrlStr = url .toExternalForm ().replaceAll ("(page/[0-9]+/)$" , "page/1/" );
70
75
}
71
76
@@ -81,51 +86,56 @@ public Document getFirstPage() throws IOException { //returns the root gallery p
81
86
public Document getNextPage (Document doc ) throws IOException {
82
87
int pageNum = currPageNum ;
83
88
String urlStr ;
84
- if (!tagPage ) {
89
+ if (!tagPage ) {
85
90
if (pageNum == 1 && lastPageNum > 1 ) {
86
91
urlStr = url .toExternalForm ().concat ((pageNum + 1 ) + "" );
87
92
System .out .printf ("Old Str: %s New Str: %s\n " , url .toExternalForm (), urlStr );
88
93
} else if (pageNum < lastPageNum ) {
89
94
urlStr = url .toExternalForm ().replaceAll ("(/([0-9]*)/?)$" , ("/" + (pageNum + 1 ) + "/" ));
90
95
System .out .printf ("Old Str: %s New Str: %s\n " , url .toString (), urlStr );
91
96
} else {
92
- //System.out.printf("Error: Page number provided goes past last valid page number\n");
97
+ // System.out.printf("Error: Page number provided goes past last valid page
98
+ // number\n");
93
99
throw (new IOException ("Error: Page number provided goes past last valid page number\n " ));
94
100
}
95
- } else { //6-10-21
96
- //if (pageNum == 1 && lastPageNum >= 1) {
97
- if (pageNum == 1 && lastPageNum > 1 ) { //6-10-21
101
+ } else { // 6-10-21
102
+ // if (pageNum == 1 && lastPageNum >= 1) {
103
+ if (pageNum == 1 && lastPageNum > 1 ) { // 6-10-21
98
104
urlStr = url .toExternalForm ().concat ("page/" + (pageNum + 1 ) + "" );
99
105
System .out .printf ("Old Str: %s New Str: %s\n " , url .toExternalForm (), urlStr );
100
106
} else if (pageNum < lastPageNum ) {
101
107
urlStr = url .toExternalForm ().replaceAll ("(page/([0-9]*)/?)$" , ("page/" + (pageNum + 1 ) + "/" ));
102
108
System .out .printf ("Old Str: %s New Str: %s\n " , url .toString (), urlStr );
103
109
} else {
104
- //System.out.printf("Error: Page number provided goes past last valid page number\n");
110
+ // System.out.printf("Error: Page number provided goes past last valid page
111
+ // number\n");
105
112
System .out .print ("Error: There is no next page!\n " );
106
113
return null ;
107
- //throw (new IOException("Error: Page number provided goes past last valid page number\n"));
114
+ // throw (new IOException("Error: Page number provided goes past last valid page
115
+ // number\n"));
108
116
}
109
117
}
110
118
111
119
url = URI .create (urlStr ).toURL ();
112
120
currDoc = Http .url (url ).get ();
113
- currPageNum ++;//hi
121
+ currPageNum ++;// hi
114
122
return currDoc ;
115
123
}
116
124
117
125
private int getMaxPageNumber (Document doc ) {
118
- if (!tagPage ) {
126
+ if (!tagPage ) {
119
127
try {
120
- lastPageNum = Integer .parseInt (doc .select ("div.page-link > a" ).last ().text ()); //gets the last possible page for the gallery
121
- } catch (Exception e ) {
128
+ // gets the last possible page for the gallery
129
+ lastPageNum = Integer .parseInt (doc .select ("div.page-link > a" ).last ().text ());
130
+ } catch (Exception e ) {
122
131
return 1 ;
123
132
}
124
133
} else {
125
134
try {
126
- lastPageNum = Integer .parseInt (doc .select ("div.pagination > a" ).last ().text ()); //gets the last possible page for the gallery
135
+ // gets the last possible page for the gallery
136
+ lastPageNum = Integer .parseInt (doc .select ("div.pagination > a" ).last ().text ());
127
137
System .out .println ("The last page found for " + url + " was " + lastPageNum );
128
- } catch (Exception e ) {
138
+ } catch (Exception e ) {
129
139
return 1 ;
130
140
}
131
141
}
@@ -134,9 +144,9 @@ private int getMaxPageNumber(Document doc) {
134
144
}
135
145
136
146
private int getCurrentPageNum (Document doc ) {
137
- int currPage ; //6-10-21
147
+ int currPage ; // 6-10-21
138
148
139
- if (!tagPage ) {
149
+ if (!tagPage ) {
140
150
currPage = Integer .parseInt (doc .select ("div.page-link > span" ).first ().text ());
141
151
} else {
142
152
currPage = Integer .parseInt (doc .select ("div.pagination > span" ).first ().text ());
@@ -148,45 +158,25 @@ private int getCurrentPageNum(Document doc) {
148
158
}
149
159
150
160
@ Override
151
- public List <String > getURLsFromPage (Document doc ) { //gets the urls of the images
161
+ public List <String > getURLsFromPage (Document doc ) { // gets the urls of the images
152
162
List <String > result = new ArrayList <>();
153
163
154
- if (!tagPage ) {
164
+ if (!tagPage ) {
155
165
for (Element el : doc .select ("p > img" )) {
156
- String imageSource = el .attr ("src" );
166
+ String imageSource = el .attr ("data- src" );
157
167
result .add (imageSource );
158
168
}
159
169
160
170
System .out .println ("\n 1.)Printing List: " + result + "\n " );
161
- } else { //6-10-21
162
- //List<String> gallery_set_list = new ArrayList<>();
163
-
171
+ } else {
164
172
for (Element el : doc .select ("h2 > a" )) {
165
173
String pageSource = el .attr ("href" );
166
- if (!pageSource .equals ("https://mrcong .com/" )) {
174
+ if (!pageSource .equals ("https://misskon .com/" )) {
167
175
result .add (pageSource );
168
176
System .out .println ("\n " + pageSource + " has been added to the list." );
169
177
}
170
178
}
171
179
172
- /*for (String el2 : gallery_set_list) {
173
- try {
174
- URL temp_urL = URI.create(el2).toURL();
175
- MrCongRipper mcr = new MrCongRipper(temp_urL);
176
- System.out.println("URL being ripped: " + mcr.url.toString());
177
- result.addAll(mcr.getURLsFromPage(mcr.getFirstPage()));
178
-
179
- Document nextPg = mcr.getNextPage(mcr.currDoc);
180
- while(nextPg != null) {
181
- result.addAll(mcr.getURLsFromPage(nextPg));
182
- nextPg = mcr.getNextPage(mcr.currDoc);
183
- }
184
- } catch (IOException e) {
185
- e.printStackTrace();
186
- }
187
-
188
- }*/
189
-
190
180
System .out .println ("\n 2.)Printing List: " + result + "\n " );
191
181
}
192
182
@@ -195,21 +185,20 @@ public List<String> getURLsFromPage(Document doc) { //gets the urls of the image
195
185
196
186
@ Override
197
187
public void downloadURL (URL url , int index ) {
198
- //addURLToDownload(url, getPrefix(index));
199
-
200
- if (!tagPage ) {
188
+ if (!tagPage ) {
201
189
addURLToDownload (url , getPrefix (index ));
202
190
} else {
203
191
try {
204
192
List <String > ls = this .getURLsFromPage (this .currDoc );
205
193
Document np = this .getNextPage (this .currDoc );
206
194
207
- while (np != null ) { //Creates a list of all sets to download
195
+ // Creates a list of all sets to download
196
+ while (np != null ) {
208
197
ls .addAll (this .getURLsFromPage (np ));
209
198
np = this .getNextPage (np );
210
199
}
211
200
212
- for (String urlStr : ls ) {
201
+ for (String urlStr : ls ) {
213
202
MrCongRipper mcr = new MrCongRipper (URI .create (urlStr ).toURL ());
214
203
mcr .setup ();
215
204
mcr .rip ();
@@ -220,4 +209,4 @@ public void downloadURL(URL url, int index) {
220
209
}
221
210
}
222
211
}
223
- }
212
+ }
0 commit comments