2323import java .util .LinkedHashMap ;
2424import java .util .List ;
2525import java .util .Map ;
26-
27- import org .appwork .net .protocol .http .HTTPConstants ;
28- import org .appwork .storage .TypeRef ;
29- import org .appwork .utils .DebugMode ;
30- import org .appwork .utils .Regex ;
31- import org .appwork .utils .StringUtils ;
32- import org .appwork .utils .parser .UrlQuery ;
33- import org .jdownloader .plugins .components .config .KemonoPartyConfig ;
34- import org .jdownloader .plugins .components .config .KemonoPartyConfig .TextCrawlMode ;
35- import org .jdownloader .plugins .components .config .KemonoPartyConfigCoomerParty ;
36- import org .jdownloader .plugins .config .PluginJsonConfig ;
37- import org .jdownloader .plugins .controller .LazyPlugin ;
26+ import java .util .Set ;
3827
3928import jd .PluginWrapper ;
4029import jd .controlling .ProgressController ;
5443import jd .plugins .PluginForDecrypt ;
5544import jd .plugins .hoster .KemonoParty ;
5645
57- @ DecrypterPlugin (revision = "$Revision: 51362 $" , interfaceVersion = 3 , names = {}, urls = {})
46+ import org .appwork .net .protocol .http .HTTPConstants ;
47+ import org .appwork .storage .TypeRef ;
48+ import org .appwork .utils .DebugMode ;
49+ import org .appwork .utils .Regex ;
50+ import org .appwork .utils .StringUtils ;
51+ import org .appwork .utils .parser .UrlQuery ;
52+ import org .jdownloader .plugins .components .config .KemonoPartyConfig ;
53+ import org .jdownloader .plugins .components .config .KemonoPartyConfig .TextCrawlMode ;
54+ import org .jdownloader .plugins .components .config .KemonoPartyConfigCoomerParty ;
55+ import org .jdownloader .plugins .config .PluginJsonConfig ;
56+ import org .jdownloader .plugins .controller .LazyPlugin ;
57+
58+ @ DecrypterPlugin (revision = "$Revision: 51371 $" , interfaceVersion = 3 , names = {}, urls = {})
5859public class KemonoPartyCrawler extends PluginForDecrypt {
5960 public KemonoPartyCrawler (PluginWrapper wrapper ) {
6061 super (wrapper );
@@ -118,7 +119,10 @@ private String getApiBase() {
118119 return "https://" + getHost () + "/api/v1" ;
119120 }
120121
122+ private KemonoPartyConfig cfg = null ;
123+
121124 public ArrayList <DownloadLink > decryptIt (final CryptedLink param , ProgressController progress ) throws Exception {
125+ cfg = PluginJsonConfig .get (getConfigInterface ());
122126 cl = param ;
123127 if (param .getCryptedUrl ().matches (TYPE_PROFILE )) {
124128 return this .crawlProfile (param );
@@ -130,6 +134,12 @@ public ArrayList<DownloadLink> decryptIt(final CryptedLink param, ProgressContro
130134 }
131135 }
132136
137+ @ Override
138+ public void clean () {
139+ cfg = null ;
140+ super .clean ();
141+ }
142+
133143 private ArrayList <DownloadLink > crawlProfile (final CryptedLink param ) throws Exception {
134144 final Regex urlinfo = new Regex (param .getCryptedUrl (), TYPE_PROFILE );
135145 if (!urlinfo .patternFind ()) {
@@ -156,8 +166,8 @@ private ArrayList<DownloadLink> crawlProfileAPI(final String service, final Stri
156166 throw new PluginException (LinkStatus .ERROR_PLUGIN_DEFECT );
157167 }
158168 final HashSet <String > dupes = new HashSet <String >();
159- final boolean useAdvancedDupecheck = PluginJsonConfig . get ( getConfigInterface ()) .isEnableProfileCrawlerAdvancedDupeFiltering ();
160- final boolean perPostPackageEnabled = PluginJsonConfig . get ( getConfigInterface ()) .isPerPostURLPackageEnabled ();
169+ final boolean useAdvancedDupecheck = cfg .isEnableProfileCrawlerAdvancedDupeFiltering ();
170+ final boolean perPostPackageEnabled = cfg .isPerPostURLPackageEnabled ();
161171 final ArrayList <DownloadLink > ret = new ArrayList <DownloadLink >();
162172 final FilePackage profileFilePackage = getFilePackageForProfileCrawler (service , usernameOrUserID );
163173 int offset = 0 ;
@@ -169,6 +179,7 @@ private ArrayList<DownloadLink> crawlProfileAPI(final String service, final Stri
169179 final int maxItemsPerPage = 50 ;
170180 int numberofContinuousPagesWithoutAnyNewItems = 0 ;
171181 final int maxPagesWithoutNewItems = 15 ;
182+ final Set <String > retryWithSinglePostAPI = new HashSet <String >();
172183 do {
173184 getPage (br , this .getApiBase () + "/" + service + "/user/" + Encoding .urlEncode (usernameOrUserID ) + "/posts?o=" + offset );
174185 final List <Map <String , Object >> posts = (List <Map <String , Object >>) restoreFromString (br .getRequest ().getHtmlCode (), TypeRef .OBJECT );
@@ -184,6 +195,14 @@ private ArrayList<DownloadLink> crawlProfileAPI(final String service, final Stri
184195 final int numberofUniqueItemsOld = dupes .size ();
185196 for (final Map <String , Object > post : posts ) {
186197 final ArrayList <DownloadLink > thisresults = this .crawlProcessPostAPI (post , dupes , useAdvancedDupecheck );
198+ if (post .get ("content" ) == null && StringUtils .isNotEmpty (StringUtils .valueOfOrNull (post .get ("substring" )))) {
199+ // posts api no longer returns full post content but only a substring, so we have to retry with post api
200+ final TextCrawlMode mode = cfg .getTextCrawlMode ();
201+ if (cfg .isCrawlHttpLinksFromPostContent () || mode == TextCrawlMode .ALWAYS || (mode == TextCrawlMode .ONLY_IF_NO_MEDIA_ITEMS_ARE_FOUND && thisresults .isEmpty ())) {
202+ retryWithSinglePostAPI .add (post .get ("id" ).toString ());
203+ logger .info ("Need to process item:" + post .get ("id" ) + " again due to maybe incomplete post content" );
204+ }
205+ }
187206 for (final DownloadLink thisresult : thisresults ) {
188207 if (!perPostPackageEnabled ) {
189208 thisresult ._setFilePackage (profileFilePackage );
@@ -218,6 +237,19 @@ private ArrayList<DownloadLink> crawlProfileAPI(final String service, final Stri
218237 page ++;
219238 }
220239 } while (!this .isAbort ());
240+ logger .info ("Need to process " + retryWithSinglePostAPI .size () + " items again due to maybe incomplete post content" );
241+ while (!this .isAbort () && retryWithSinglePostAPI .size () > 0 ) {
242+ final String nextRetryPostID = retryWithSinglePostAPI .iterator ().next ();
243+ retryWithSinglePostAPI .remove (nextRetryPostID );
244+ final ArrayList <DownloadLink > thisresults = crawlPostAPI (br , service , usernameOrUserID , nextRetryPostID );
245+ for (final DownloadLink thisresult : thisresults ) {
246+ if (!perPostPackageEnabled ) {
247+ thisresult ._setFilePackage (profileFilePackage );
248+ }
249+ distribute (thisresult );
250+ }
251+ ret .addAll (thisresults );
252+ }
221253 return ret ;
222254 }
223255
@@ -252,11 +284,11 @@ private ArrayList<DownloadLink> crawlPost(final CryptedLink param) throws Except
252284 final String service = urlinfo .getMatch (0 );
253285 final String usernameOrUserID = urlinfo .getMatch (1 );
254286 final String postID = urlinfo .getMatch (2 );
255- return crawlPostAPI (param , service , usernameOrUserID , postID );
287+ return crawlPostAPI (br , service , usernameOrUserID , postID );
256288 }
257289
258290 /** API docs: https://kemono.su/api/schema */
259- private ArrayList <DownloadLink > crawlPostAPI (final CryptedLink param , final String service , final String userID , final String postID ) throws Exception {
291+ private ArrayList <DownloadLink > crawlPostAPI (final Browser br , final String service , final String userID , final String postID ) throws Exception {
260292 if (service == null || userID == null || postID == null ) {
261293 /* Developer mistake */
262294 throw new PluginException (LinkStatus .ERROR_PLUGIN_DEFECT );
@@ -282,7 +314,9 @@ private ArrayList<DownloadLink> crawlProcessPostAPI(final Map<String, Object> po
282314 final String postID = postmap .get ("id" ).toString ();
283315 final String posturl = "https://" + this .getHost () + "/" + service + "/user/" + usernameOrUserID + "/post/" + postID ;
284316 final String postTitle = postmap .get ("title" ).toString ();
317+ /* Every item has a "published" date */
285318 final String publishedDateStr = StringUtils .valueOfOrNull (postmap .get ("published" ));
319+ /* Not all items have a "edited" date */
286320 final String editedDateStr = StringUtils .valueOfOrNull (postmap .get ("edited" ));
287321 final ArrayList <DownloadLink > kemonoResults = new ArrayList <DownloadLink >();
288322 int numberofResultsSimpleCount = 0 ;
@@ -313,7 +347,6 @@ private ArrayList<DownloadLink> crawlProcessPostAPI(final Map<String, Object> po
313347 final ArrayList <DownloadLink > ret = new ArrayList <DownloadLink >();
314348 final FilePackage postFilePackage = getFilePackageForPostCrawler (service , usernameOrUserID , postID , postTitle );
315349 String postTextContent = (String ) postmap .get ("content" );
316- final KemonoPartyConfig cfg = PluginJsonConfig .get (getConfigInterface ());
317350 if (!StringUtils .isEmpty (postTextContent )) {
318351 if (cfg .isCrawlHttpLinksFromPostContent ()) {
319352 /* Place number 1 where we can crawl external http links from */
@@ -432,15 +465,13 @@ private DownloadLink buildFileDownloadLinkAPI(final HashSet<String> dupes, final
432465 }
433466
434467 private static Map <String , String > ID_TO_USERNAME = new LinkedHashMap <String , String >() {
435- protected boolean removeEldestEntry (Map .Entry <String , String > eldest ) {
436- return size () > 100 ;
437- };
438- };
468+ protected boolean removeEldestEntry (Map .Entry <String , String > eldest ) {
469+ return size () > 100 ;
470+ };
471+ };
439472
440473 /**
441- * Returns userID for given username. </br>
442- * Uses API to find userID. </br>
443- * Throws Exception if it is unable to find userID.
474+ * Returns userID for given username. </br> Uses API to find userID. </br> Throws Exception if it is unable to find userID.
444475 */
445476 private String findUsername (final String service , final String usernameOrUserID ) throws Exception {
446477 synchronized (ID_TO_USERNAME ) {
0 commit comments