Skip to content

Commit 3bddae2

Browse files
author
updating-bot
committed
mirroring bot - 2025/08/26
1 parent 193a63a commit 3bddae2

24 files changed

+924
-486
lines changed

svn_trunk/src/jd/plugins/decrypter/ArchiveOrgCrawler.java

Lines changed: 13 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
import jd.plugins.download.HashInfo;
7575
import jd.plugins.hoster.ArchiveOrg;
7676

77-
@DecrypterPlugin(revision = "$Revision: 51141 $", interfaceVersion = 2, names = { "archive.org", "subdomain.archive.org" }, urls = { "https?://(?:www\\.)?archive\\.org/((?:details|download|stream|embed)/.+|search\\?query=.+)", "https?://[^/]+\\.archive\\.org/view_archive\\.php\\?archive=[^\\&]+(?:\\&file=[^\\&]+)?" })
77+
@DecrypterPlugin(revision = "$Revision: 51367 $", interfaceVersion = 2, names = { "archive.org", "subdomain.archive.org" }, urls = { "https?://(?:www\\.)?archive\\.org/((?:details|download|stream|embed)/.+|search\\?query=.+)", "https?://[^/]+\\.archive\\.org/view_archive\\.php\\?archive=[^\\&]+(?:\\&file=[^\\&]+)?" })
7878
public class ArchiveOrgCrawler extends PluginForDecrypt {
7979
public ArchiveOrgCrawler(PluginWrapper wrapper) {
8080
super(wrapper);
@@ -367,7 +367,7 @@ private ArrayList<DownloadLink> crawlBetaSearchAPI(final String sourceurl, Strin
367367
do {
368368
query.addAndReplace("page", Integer.toString(page));
369369
/* This looks to be an internally used version of public crawl/search API v2 beta, see: https://archive.org/services/swagger/ */
370-
brc.getPage("https://archive.org/services/search/beta/page_production/?" + query.toString());
370+
brc.getPage("https://" + getHost() + "/services/search/beta/page_production/?" + query.toString());
371371
if (brc.getHttpConnection().getResponseCode() == 400) {
372372
if (ret.size() > 0) {
373373
logger.info("Stopping because: Surprisingly got http response 400 | Possibly missing items: " + (totalNumberofItems - ret.size()));
@@ -750,38 +750,11 @@ public ArrayList<DownloadLink> crawlBook(final Browser br, final String ajaxurl,
750750

751751
private String findBookReaderURLWebsite(final Browser br) {
752752
String url = br.getRegex("(?:\\'|\")([^\\'\"]+BookReaderJSIA\\.php\\?[^\\'\"]+)").getMatch(0);
753-
if (url != null) {
754-
url = PluginJSonUtils.unescape(url);
755-
return url;
756-
}
757-
return null;
758-
}
759-
760-
@Deprecated
761-
/** This function can parse the "track" field of json items from "/metadata/<identifier> */
762-
private int[] parseAudioTrackPosition(final Object audioTrackPositionO) throws PluginException {
763-
if (audioTrackPositionO == null) {
753+
if (url == null) {
764754
return null;
765-
} else if (audioTrackPositionO instanceof int[]) {
766-
return (int[]) audioTrackPositionO;
767-
} else if (audioTrackPositionO instanceof Number) {
768-
return new int[] { ((Number) audioTrackPositionO).intValue() };
769-
} else if (audioTrackPositionO instanceof String) {
770-
final String string = audioTrackPositionO.toString();
771-
final String xofY[] = new Regex(string, "(\\d+)\\s*/\\s*(\\d+)").getRow(0);
772-
if (xofY != null) {
773-
// 02/09
774-
return new int[] { Integer.parseInt(xofY[0]) };
775-
}
776-
final String cdAndTrack[] = new Regex(string, "(\\d+)\\s*\\.\\s*(\\d+)").getRow(0);
777-
if (cdAndTrack != null) {
778-
// 1.01 and 3.09
779-
return new int[] { Integer.parseInt(cdAndTrack[1]), Integer.parseInt(cdAndTrack[0]) };
780-
}
781-
return new int[] { Integer.parseInt(string) };
782-
} else {
783-
throw new PluginException(LinkStatus.ERROR_PLUGIN_DEFECT, "Unsupported:" + audioTrackPositionO);
784755
}
756+
url = PluginJSonUtils.unescape(url);
757+
return url;
785758
}
786759

787760
/** Work in progress, see https://archive.org/metadata/<identifier> */
@@ -805,7 +778,7 @@ private ArrayList<DownloadLink> crawlMetadataJsonV2(final String identifier, fin
805778
final Browser brc = br.cloneBrowser();
806779
/* The json answer can be really big. */
807780
brc.setLoadLimit(Integer.MAX_VALUE);
808-
brc.getPage("https://archive.org/metadata/" + Encoding.urlEncode(identifier));
781+
brc.getPage("https://" + getHost() + "/metadata/" + Encoding.urlEncode(identifier));
809782
final Map<String, Object> root = restoreFromString(brc.getRequest().getHtmlCode(), TypeRef.MAP);
810783
// final Boolean is_dark = (Boolean) root.get("is_dark"); // This means that the content is offline(?)
811784
final List<Map<String, Object>> root_files = (List<Map<String, Object>>) root.get("files");
@@ -870,13 +843,15 @@ private ArrayList<DownloadLink> crawlMetadataJsonV2(final String identifier, fin
870843
DownloadLink singleDesiredFile = null;
871844
DownloadLink singleDesiredFile2 = null;
872845
final ArrayList<DownloadLink> selectedItems = new ArrayList<DownloadLink>();
846+
/* FilPackage for all file items that are contained in the root of this identifiers' filesystem. */
873847
final FilePackage fpRoot = FilePackage.getInstance();
874848
fpRoot.setName(identifier);
875849
if (!StringUtils.isEmpty(description)) {
876850
fpRoot.setComment(description);
877851
}
878852
final Map<String, FilePackage> packagemap = new HashMap<String, FilePackage>();
879853
packagemap.put(identifier, fpRoot);
854+
// final Set<ArchiveOrgType> selectedTypes = cfg.getTypesToCrawl();
880855
final boolean crawlOriginalFilesOnly = cfg.isFileCrawlerCrawlOnlyOriginalVersions();
881856
final boolean crawlMetadataFiles = cfg.isFileCrawlerCrawlMetadataFiles();
882857
final boolean crawlThumbnails = cfg.isFileCrawlerCrawlThumbnails();
@@ -944,7 +919,7 @@ private ArrayList<DownloadLink> crawlMetadataJsonV2(final String identifier, fin
944919
filename = pathWithFilename;
945920
}
946921
final Object fileSizeO = filemap.get("size");
947-
String url = "https://archive.org/download/" + identifier;
922+
String url = "https://" + getHost() + "/download/" + identifier;
948923
if (pathWithFilename.startsWith("/")) {
949924
url += URLEncode.encodeURIComponent(pathWithFilename);
950925
} else {
@@ -1208,7 +1183,7 @@ private ArrayList<DownloadLink> crawlMetadataJsonV2(final String identifier, fin
12081183
* Video can't be officially downloaded but it can be streamed in segments of X seconds each -> Generate those stream-links
12091184
*/
12101185
for (int position = 0; position < numberofVideoSegments; position++) {
1211-
final String directurl = "https://archive.org/download/" + identifier + "/" + identifier + ".mp4?t=" + offsetSeconds + "/" + (offsetSeconds + secondsPerSegment) + "&ignore=x.mp4";
1186+
final String directurl = "https://" + getHost() + "/download/" + identifier + "/" + identifier + ".mp4?t=" + offsetSeconds + "/" + (offsetSeconds + secondsPerSegment) + "&ignore=x.mp4";
12121187
final DownloadLink video = this.createDownloadlink(directurl);
12131188
video.setProperty(ArchiveOrg.PROPERTY_FILETYPE, ArchiveOrg.FILETYPE_VIDEO);
12141189
video.setProperty(ArchiveOrg.PROPERTY_PLAYLIST_POSITION, position);
@@ -1638,7 +1613,7 @@ private ArrayList<DownloadLink> crawlPatternSlashDownloadWebsite(final String ur
16381613
private ArrayList<DownloadLink> crawlFiles(final String contenturl) throws Exception {
16391614
if (br.getHttpConnection().getResponseCode() == 404) {
16401615
throw new PluginException(LinkStatus.ERROR_FILE_NOT_FOUND);
1641-
} else if (br.containsHTML("(?i)>\\s*The item is not available")) {
1616+
} else if (br.containsHTML(">\\s*The item is not available")) {
16421617
throw new PluginException(LinkStatus.ERROR_FILE_NOT_FOUND);
16431618
} else if (!br.containsHTML("\"/download/")) {
16441619
throw new PluginException(LinkStatus.ERROR_FILE_NOT_FOUND, "Maybe invalid link or nothing there to download");
@@ -1824,7 +1799,7 @@ private ArrayList<DownloadLink> crawlXML(final String contenturl, final Browser
18241799
throw new IllegalArgumentException();
18251800
}
18261801
String xmlResponse = null;
1827-
final String xmlurl = "https://archive.org/download/" + titleSlug + "/" + titleSlug + "_files.xml";
1802+
final String xmlurl = "https://" + getHost() + "/download/" + titleSlug + "/" + titleSlug + "_files.xml";
18281803
final String cacheKey = xmlurl;
18291804
final Object lock = requestLock(cacheKey);
18301805
try {
@@ -1869,7 +1844,7 @@ private ArrayList<DownloadLink> crawlXML(final String contenturl, final Browser
18691844
throw new DecrypterRetryException(RetryReason.EMPTY_FOLDER, path);
18701845
}
18711846
logger.info("Crawling all files below path: " + path);
1872-
final String basePath = "https://archive.org/download/" + titleSlug;
1847+
final String basePath = "https://" + getHost() + "/download/" + titleSlug;
18731848
final List<String> skippedItems = new ArrayList<String>();
18741849
for (final String item : items) {
18751850
/* <old_version>true</old_version> */

svn_trunk/src/jd/plugins/decrypter/GenericYetiShareFolder.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
import jd.plugins.PluginForHost;
5151
import jd.plugins.components.SiteType.SiteTemplate;
5252

53-
@DecrypterPlugin(revision = "$Revision: 51349 $", interfaceVersion = 3, names = {}, urls = {})
53+
@DecrypterPlugin(revision = "$Revision: 51367 $", interfaceVersion = 3, names = {}, urls = {})
5454
public class GenericYetiShareFolder extends PluginForDecrypt {
5555
public GenericYetiShareFolder(PluginWrapper wrapper) {
5656
super(wrapper);
@@ -98,6 +98,7 @@ public static List<String[]> getPluginDomains() {
9898
ret.add(new String[] { "imgcubby.com" });
9999
ret.add(new String[] { "vidpirate.com" });
100100
ret.add(new String[] { "netnaijafiles.xyz" });
101+
ret.add(new String[] { "way4share.com" });
101102
return ret;
102103
}
103104

svn_trunk/src/jd/plugins/decrypter/KemonoPartyCrawler.java

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,7 @@
2323
import java.util.LinkedHashMap;
2424
import java.util.List;
2525
import java.util.Map;
26-
27-
import org.appwork.net.protocol.http.HTTPConstants;
28-
import org.appwork.storage.TypeRef;
29-
import org.appwork.utils.DebugMode;
30-
import org.appwork.utils.Regex;
31-
import org.appwork.utils.StringUtils;
32-
import org.appwork.utils.parser.UrlQuery;
33-
import org.jdownloader.plugins.components.config.KemonoPartyConfig;
34-
import org.jdownloader.plugins.components.config.KemonoPartyConfig.TextCrawlMode;
35-
import org.jdownloader.plugins.components.config.KemonoPartyConfigCoomerParty;
36-
import org.jdownloader.plugins.config.PluginJsonConfig;
37-
import org.jdownloader.plugins.controller.LazyPlugin;
26+
import java.util.Set;
3827

3928
import jd.PluginWrapper;
4029
import jd.controlling.ProgressController;
@@ -54,7 +43,19 @@
5443
import jd.plugins.PluginForDecrypt;
5544
import jd.plugins.hoster.KemonoParty;
5645

57-
@DecrypterPlugin(revision = "$Revision: 51362 $", interfaceVersion = 3, names = {}, urls = {})
46+
import org.appwork.net.protocol.http.HTTPConstants;
47+
import org.appwork.storage.TypeRef;
48+
import org.appwork.utils.DebugMode;
49+
import org.appwork.utils.Regex;
50+
import org.appwork.utils.StringUtils;
51+
import org.appwork.utils.parser.UrlQuery;
52+
import org.jdownloader.plugins.components.config.KemonoPartyConfig;
53+
import org.jdownloader.plugins.components.config.KemonoPartyConfig.TextCrawlMode;
54+
import org.jdownloader.plugins.components.config.KemonoPartyConfigCoomerParty;
55+
import org.jdownloader.plugins.config.PluginJsonConfig;
56+
import org.jdownloader.plugins.controller.LazyPlugin;
57+
58+
@DecrypterPlugin(revision = "$Revision: 51371 $", interfaceVersion = 3, names = {}, urls = {})
5859
public class KemonoPartyCrawler extends PluginForDecrypt {
5960
public KemonoPartyCrawler(PluginWrapper wrapper) {
6061
super(wrapper);
@@ -118,7 +119,10 @@ private String getApiBase() {
118119
return "https://" + getHost() + "/api/v1";
119120
}
120121

122+
private KemonoPartyConfig cfg = null;
123+
121124
public ArrayList<DownloadLink> decryptIt(final CryptedLink param, ProgressController progress) throws Exception {
125+
cfg = PluginJsonConfig.get(getConfigInterface());
122126
cl = param;
123127
if (param.getCryptedUrl().matches(TYPE_PROFILE)) {
124128
return this.crawlProfile(param);
@@ -130,6 +134,12 @@ public ArrayList<DownloadLink> decryptIt(final CryptedLink param, ProgressContro
130134
}
131135
}
132136

137+
@Override
138+
public void clean() {
139+
cfg = null;
140+
super.clean();
141+
}
142+
133143
private ArrayList<DownloadLink> crawlProfile(final CryptedLink param) throws Exception {
134144
final Regex urlinfo = new Regex(param.getCryptedUrl(), TYPE_PROFILE);
135145
if (!urlinfo.patternFind()) {
@@ -156,8 +166,8 @@ private ArrayList<DownloadLink> crawlProfileAPI(final String service, final Stri
156166
throw new PluginException(LinkStatus.ERROR_PLUGIN_DEFECT);
157167
}
158168
final HashSet<String> dupes = new HashSet<String>();
159-
final boolean useAdvancedDupecheck = PluginJsonConfig.get(getConfigInterface()).isEnableProfileCrawlerAdvancedDupeFiltering();
160-
final boolean perPostPackageEnabled = PluginJsonConfig.get(getConfigInterface()).isPerPostURLPackageEnabled();
169+
final boolean useAdvancedDupecheck = cfg.isEnableProfileCrawlerAdvancedDupeFiltering();
170+
final boolean perPostPackageEnabled = cfg.isPerPostURLPackageEnabled();
161171
final ArrayList<DownloadLink> ret = new ArrayList<DownloadLink>();
162172
final FilePackage profileFilePackage = getFilePackageForProfileCrawler(service, usernameOrUserID);
163173
int offset = 0;
@@ -169,6 +179,7 @@ private ArrayList<DownloadLink> crawlProfileAPI(final String service, final Stri
169179
final int maxItemsPerPage = 50;
170180
int numberofContinuousPagesWithoutAnyNewItems = 0;
171181
final int maxPagesWithoutNewItems = 15;
182+
final Set<String> retryWithSinglePostAPI = new HashSet<String>();
172183
do {
173184
getPage(br, this.getApiBase() + "/" + service + "/user/" + Encoding.urlEncode(usernameOrUserID) + "/posts?o=" + offset);
174185
final List<Map<String, Object>> posts = (List<Map<String, Object>>) restoreFromString(br.getRequest().getHtmlCode(), TypeRef.OBJECT);
@@ -184,6 +195,14 @@ private ArrayList<DownloadLink> crawlProfileAPI(final String service, final Stri
184195
final int numberofUniqueItemsOld = dupes.size();
185196
for (final Map<String, Object> post : posts) {
186197
final ArrayList<DownloadLink> thisresults = this.crawlProcessPostAPI(post, dupes, useAdvancedDupecheck);
198+
if (post.get("content") == null && StringUtils.isNotEmpty(StringUtils.valueOfOrNull(post.get("substring")))) {
199+
// posts api no longer returns full post content but only a substring, so we have to retry with post api
200+
final TextCrawlMode mode = cfg.getTextCrawlMode();
201+
if (cfg.isCrawlHttpLinksFromPostContent() || mode == TextCrawlMode.ALWAYS || (mode == TextCrawlMode.ONLY_IF_NO_MEDIA_ITEMS_ARE_FOUND && thisresults.isEmpty())) {
202+
retryWithSinglePostAPI.add(post.get("id").toString());
203+
logger.info("Need to process item:" + post.get("id") + " again due to maybe incomplete post content");
204+
}
205+
}
187206
for (final DownloadLink thisresult : thisresults) {
188207
if (!perPostPackageEnabled) {
189208
thisresult._setFilePackage(profileFilePackage);
@@ -218,6 +237,19 @@ private ArrayList<DownloadLink> crawlProfileAPI(final String service, final Stri
218237
page++;
219238
}
220239
} while (!this.isAbort());
240+
logger.info("Need to process " + retryWithSinglePostAPI.size() + " items again due to maybe incomplete post content");
241+
while (!this.isAbort() && retryWithSinglePostAPI.size() > 0) {
242+
final String nextRetryPostID = retryWithSinglePostAPI.iterator().next();
243+
retryWithSinglePostAPI.remove(nextRetryPostID);
244+
final ArrayList<DownloadLink> thisresults = crawlPostAPI(br, service, usernameOrUserID, nextRetryPostID);
245+
for (final DownloadLink thisresult : thisresults) {
246+
if (!perPostPackageEnabled) {
247+
thisresult._setFilePackage(profileFilePackage);
248+
}
249+
distribute(thisresult);
250+
}
251+
ret.addAll(thisresults);
252+
}
221253
return ret;
222254
}
223255

@@ -252,11 +284,11 @@ private ArrayList<DownloadLink> crawlPost(final CryptedLink param) throws Except
252284
final String service = urlinfo.getMatch(0);
253285
final String usernameOrUserID = urlinfo.getMatch(1);
254286
final String postID = urlinfo.getMatch(2);
255-
return crawlPostAPI(param, service, usernameOrUserID, postID);
287+
return crawlPostAPI(br, service, usernameOrUserID, postID);
256288
}
257289

258290
/** API docs: https://kemono.su/api/schema */
259-
private ArrayList<DownloadLink> crawlPostAPI(final CryptedLink param, final String service, final String userID, final String postID) throws Exception {
291+
private ArrayList<DownloadLink> crawlPostAPI(final Browser br, final String service, final String userID, final String postID) throws Exception {
260292
if (service == null || userID == null || postID == null) {
261293
/* Developer mistake */
262294
throw new PluginException(LinkStatus.ERROR_PLUGIN_DEFECT);
@@ -282,7 +314,9 @@ private ArrayList<DownloadLink> crawlProcessPostAPI(final Map<String, Object> po
282314
final String postID = postmap.get("id").toString();
283315
final String posturl = "https://" + this.getHost() + "/" + service + "/user/" + usernameOrUserID + "/post/" + postID;
284316
final String postTitle = postmap.get("title").toString();
317+
/* Every item has a "published" date */
285318
final String publishedDateStr = StringUtils.valueOfOrNull(postmap.get("published"));
319+
/* Not all items have a "edited" date */
286320
final String editedDateStr = StringUtils.valueOfOrNull(postmap.get("edited"));
287321
final ArrayList<DownloadLink> kemonoResults = new ArrayList<DownloadLink>();
288322
int numberofResultsSimpleCount = 0;
@@ -313,7 +347,6 @@ private ArrayList<DownloadLink> crawlProcessPostAPI(final Map<String, Object> po
313347
final ArrayList<DownloadLink> ret = new ArrayList<DownloadLink>();
314348
final FilePackage postFilePackage = getFilePackageForPostCrawler(service, usernameOrUserID, postID, postTitle);
315349
String postTextContent = (String) postmap.get("content");
316-
final KemonoPartyConfig cfg = PluginJsonConfig.get(getConfigInterface());
317350
if (!StringUtils.isEmpty(postTextContent)) {
318351
if (cfg.isCrawlHttpLinksFromPostContent()) {
319352
/* Place number 1 where we can crawl external http links from */
@@ -432,15 +465,13 @@ private DownloadLink buildFileDownloadLinkAPI(final HashSet<String> dupes, final
432465
}
433466

434467
private static Map<String, String> ID_TO_USERNAME = new LinkedHashMap<String, String>() {
435-
protected boolean removeEldestEntry(Map.Entry<String, String> eldest) {
436-
return size() > 100;
437-
};
438-
};
468+
protected boolean removeEldestEntry(Map.Entry<String, String> eldest) {
469+
return size() > 100;
470+
};
471+
};
439472

440473
/**
441-
* Returns userID for given username. </br>
442-
* Uses API to find userID. </br>
443-
* Throws Exception if it is unable to find userID.
474+
* Returns userID for given username. </br> Uses API to find userID. </br> Throws Exception if it is unable to find userID.
444475
*/
445476
private String findUsername(final String service, final String usernameOrUserID) throws Exception {
446477
synchronized (ID_TO_USERNAME) {

0 commit comments

Comments
 (0)