698
698
< h1 > skbio.stats.isubsample< a class ="headerlink " href ="#skbio-stats-isubsample " title ="Link to this heading "> #</ a > </ h1 >
699
699
< dl class ="py function ">
700
700
< dt class ="sig sig-object py " id ="skbio.stats.isubsample ">
701
- < span class ="sig-prename descclassname "> < span class ="pre "> skbio.stats.</ span > </ span > < span class ="sig-name descname "> < span class ="pre "> isubsample</ span > </ span > < span class ="sig-paren "> (</ span > < em class ="sig-param "> < span class ="n "> < span class ="pre "> items</ span > </ span > </ em > , < em class ="sig-param "> < span class ="n "> < span class ="pre "> maximum</ span > </ span > </ em > , < em class ="sig-param "> < span class ="n "> < span class ="pre "> minimum</ span > </ span > < span class ="o "> < span class ="pre "> =</ span > </ span > < span class ="default_value "> < span class ="pre "> 1</ span > </ span > </ em > , < em class ="sig-param "> < span class ="n "> < span class ="pre "> buf_size</ span > </ span > < span class ="o "> < span class ="pre "> =</ span > </ span > < span class ="default_value "> < span class ="pre "> 1000</ span > </ span > </ em > , < em class ="sig-param "> < span class ="n "> < span class ="pre "> bin_f</ span > </ span > < span class ="o "> < span class ="pre "> =</ span > </ span > < span class ="default_value "> < span class ="pre "> None</ span > </ span > </ em > < span class ="sig-paren "> )</ span > < a class ="reference external " href ="https://github.com/scikit-bio/scikit-bio/blob/main/skbio/stats/_subsample.py#L21 "> < span class ="viewcode-link "> < span class ="pre "> [source]</ span > </ span > </ a > < a class ="headerlink " href ="#skbio.stats.isubsample " title ="Link to this definition "> #</ a > </ dt >
701
+ < span class ="sig-prename descclassname "> < span class ="pre "> skbio.stats.</ span > </ span > < span class ="sig-name descname "> < span class ="pre "> isubsample</ span > </ span > < span class ="sig-paren "> (</ span > < em class ="sig-param "> < span class ="n "> < span class ="pre "> items</ span > </ span > </ em > , < em class ="sig-param "> < span class ="n "> < span class ="pre "> maximum</ span > </ span > </ em > , < em class ="sig-param "> < span class ="n "> < span class ="pre "> minimum</ span > </ span > < span class ="o "> < span class ="pre "> =</ span > </ span > < span class ="default_value "> < span class ="pre "> 1</ span > </ span > </ em > , < em class ="sig-param "> < span class ="n "> < span class ="pre "> buf_size</ span > </ span > < span class ="o "> < span class ="pre "> =</ span > </ span > < span class ="default_value "> < span class ="pre "> 1000</ span > </ span > </ em > , < em class ="sig-param "> < span class ="n "> < span class ="pre "> bin_f</ span > </ span > < span class ="o "> < span class ="pre "> =</ span > </ span > < span class ="default_value "> < span class ="pre "> None</ span > </ span > </ em > , < em class ="sig-param "> < span class ="n "> < span class ="pre "> seed</ span > </ span > < span class ="o "> < span class ="pre "> =</ span > </ span > < span class ="default_value "> < span class ="pre "> None</ span > </ span > </ em > < span class ="sig-paren "> )</ span > < a class ="reference external " href ="https://github.com/scikit-bio/scikit-bio/blob/main/skbio/stats/_subsample.py#L21 "> < span class ="viewcode-link "> < span class ="pre "> [source]</ span > </ span > </ a > < a class ="headerlink " href ="#skbio.stats.isubsample " title ="Link to this definition "> #</ a > </ dt >
702
702
< dd > < p > Randomly subsample items from bins, without replacement.</ p >
703
703
< p > Randomly subsample items without replacement from an unknown number of
704
704
input items, that may fall into an unknown number of bins. This method is
705
705
intended for data that either a) cannot fit into memory or b) subsampling
706
706
collections of arbitrary datatypes.</ p >
707
- < dl class ="field-list simple ">
707
+ < dl class ="field-list ">
708
708
< dt class ="field-odd "> Parameters< span class ="colon "> :</ span > </ dt >
709
- < dd class ="field-odd "> < dl class =" simple " >
709
+ < dd class ="field-odd "> < dl >
710
710
< dt > < strong > items</ strong > < span class ="classifier "> Iterable</ span > </ dt > < dd > < p > The items to evaluate.</ p >
711
711
</ dd >
712
712
< dt > < strong > maximum</ strong > < span class ="classifier "> unsigned int</ span > </ dt > < dd > < p > The maximum number of items per bin.</ p >
@@ -726,6 +726,11 @@ <h1>skbio.stats.isubsample<a class="headerlink" href="#skbio-stats-isubsample" t
726
726
return a hashable value indicating the bin that that entry should be
727
727
placed in.</ p >
728
728
</ dd >
729
+ < dt > < strong > seed</ strong > < span class ="classifier "> int or np.random.Generator, optional</ span > </ dt > < dd > < p > A user-provided random seed or random generator instance.</ p >
730
+ < div class ="versionadded ">
731
+ < p > < span class ="versionmodified added "> Added in version 0.6.3.</ span > </ p >
732
+ </ div >
733
+ </ dd >
729
734
</ dl >
730
735
</ dd >
731
736
< dt class ="field-even "> Returns< span class ="colon "> :</ span > </ dt >
@@ -760,32 +765,30 @@ <h1>skbio.stats.isubsample<a class="headerlink" href="#skbio-stats-isubsample" t
760
765
< p > Randomly keep up to 2 sequences per sample from a set of demultiplexed
761
766
sequences:</ p >
762
767
< div class ="doctest highlight-default notranslate "> < div class ="highlight "> < pre > < span > </ span > < span class ="gp "> >>> </ span > < span class ="kn "> from</ span > < span class ="nn "> skbio.stats</ span > < span class ="kn "> import</ span > < span class ="n "> isubsample</ span >
763
- < span class ="gp "> >>> </ span > < span class ="kn "> import</ span > < span class ="nn "> numpy</ span > < span class ="k "> as</ span > < span class ="nn "> np</ span >
764
- < span class ="gp "> >>> </ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> random</ span > < span class ="o "> .</ span > < span class ="n "> seed</ span > < span class ="p "> (</ span > < span class ="mi "> 123</ span > < span class ="p "> )</ span >
765
768
< span class ="gp "> >>> </ span > < span class ="n "> seqs</ span > < span class ="o "> =</ span > < span class ="p "> [(</ span > < span class ="s1 "> 'sampleA'</ span > < span class ="p "> ,</ span > < span class ="s1 "> 'AATTGG'</ span > < span class ="p "> ),</ span >
766
769
< span class ="gp "> ... </ span > < span class ="p "> (</ span > < span class ="s1 "> 'sampleB'</ span > < span class ="p "> ,</ span > < span class ="s1 "> 'ATATATAT'</ span > < span class ="p "> ),</ span >
767
770
< span class ="gp "> ... </ span > < span class ="p "> (</ span > < span class ="s1 "> 'sampleC'</ span > < span class ="p "> ,</ span > < span class ="s1 "> 'ATGGCC'</ span > < span class ="p "> ),</ span >
768
771
< span class ="gp "> ... </ span > < span class ="p "> (</ span > < span class ="s1 "> 'sampleB'</ span > < span class ="p "> ,</ span > < span class ="s1 "> 'ATGGCT'</ span > < span class ="p "> ),</ span >
769
772
< span class ="gp "> ... </ span > < span class ="p "> (</ span > < span class ="s1 "> 'sampleB'</ span > < span class ="p "> ,</ span > < span class ="s1 "> 'ATGGCG'</ span > < span class ="p "> ),</ span >
770
773
< span class ="gp "> ... </ span > < span class ="p "> (</ span > < span class ="s1 "> 'sampleA'</ span > < span class ="p "> ,</ span > < span class ="s1 "> 'ATGGCA'</ span > < span class ="p "> )]</ span >
771
774
< span class ="gp "> >>> </ span > < span class ="n "> bin_f</ span > < span class ="o "> =</ span > < span class ="k "> lambda</ span > < span class ="n "> item</ span > < span class ="p "> :</ span > < span class ="n "> item</ span > < span class ="p "> [</ span > < span class ="mi "> 0</ span > < span class ="p "> ]</ span >
772
- < span class ="gp "> >>> </ span > < span class ="k "> for</ span > < span class ="n "> bin_</ span > < span class ="p "> ,</ span > < span class ="n "> item</ span > < span class ="ow "> in</ span > < span class ="nb "> sorted</ span > < span class ="p "> (</ span > < span class ="n "> isubsample</ span > < span class ="p "> (</ span > < span class ="n "> seqs</ span > < span class ="p "> ,</ span > < span class ="mi "> 2</ span > < span class ="p "> ,</ span > < span class ="n "> bin_f</ span > < span class ="o "> =</ span > < span class ="n "> bin_f</ span > < span class ="p "> )):</ span >
775
+ < span class ="gp "> >>> </ span > < span class ="k "> for</ span > < span class ="n "> bin_</ span > < span class ="p "> ,</ span > < span class ="n "> item</ span > < span class ="ow "> in</ span > < span class ="nb "> sorted</ span > < span class ="p "> (</ span > < span class ="n "> isubsample</ span > < span class ="p "> (</ span > < span class ="n "> seqs</ span > < span class ="p "> ,</ span > < span class ="mi "> 2</ span > < span class ="p "> ,</ span > < span class ="n "> bin_f</ span > < span class ="o "> =</ span > < span class ="n "> bin_f</ span > < span class ="p "> , </ span > < span class =" n " > seed </ span > < span class =" o " > = </ span > < span class =" mi " > 123 </ span > < span class =" p " > )):</ span >
773
776
< span class ="gp "> ... </ span > < span class ="nb "> print</ span > < span class ="p "> (</ span > < span class ="n "> bin_</ span > < span class ="p "> ,</ span > < span class ="n "> item</ span > < span class ="p "> [</ span > < span class ="mi "> 1</ span > < span class ="p "> ])</ span >
774
777
< span class ="go "> sampleA AATTGG</ span >
775
778
< span class ="go "> sampleA ATGGCA</ span >
776
- < span class ="go "> sampleB ATATATAT</ span >
777
779
< span class ="go "> sampleB ATGGCG</ span >
780
+ < span class ="go "> sampleB ATGGCT</ span >
778
781
< span class ="go "> sampleC ATGGCC</ span >
779
782
</ pre > </ div >
780
783
</ div >
781
784
< p > Now, let’s set the minimum to 2:</ p >
782
785
< div class ="doctest highlight-default notranslate "> < div class ="highlight "> < pre > < span > </ span > < span class ="gp "> >>> </ span > < span class ="n "> bin_f</ span > < span class ="o "> =</ span > < span class ="k "> lambda</ span > < span class ="n "> item</ span > < span class ="p "> :</ span > < span class ="n "> item</ span > < span class ="p "> [</ span > < span class ="mi "> 0</ span > < span class ="p "> ]</ span >
783
- < span class ="gp "> >>> </ span > < span class ="k "> for</ span > < span class ="n "> bin_</ span > < span class ="p "> ,</ span > < span class ="n "> item</ span > < span class ="ow "> in</ span > < span class ="nb "> sorted</ span > < span class ="p "> (</ span > < span class ="n "> isubsample</ span > < span class ="p "> (</ span > < span class ="n "> seqs</ span > < span class ="p "> ,</ span > < span class ="mi "> 2</ span > < span class ="p "> ,</ span > < span class ="mi "> 2</ span > < span class ="p "> ,</ span > < span class ="n "> bin_f</ span > < span class ="o "> =</ span > < span class ="n "> bin_f</ span > < span class ="p "> )):</ span >
786
+ < span class ="gp "> >>> </ span > < span class ="k "> for</ span > < span class ="n "> bin_</ span > < span class ="p "> ,</ span > < span class ="n "> item</ span > < span class ="ow "> in</ span > < span class ="nb "> sorted</ span > < span class ="p "> (</ span > < span class ="n "> isubsample</ span > < span class ="p "> (</ span > < span class ="n "> seqs</ span > < span class ="p "> ,</ span > < span class ="mi "> 2</ span > < span class ="p "> ,</ span > < span class ="mi "> 2</ span > < span class ="p "> ,</ span > < span class ="n "> bin_f</ span > < span class ="o "> =</ span > < span class ="n "> bin_f</ span > < span class ="p "> , </ span > < span class =" n " > seed </ span > < span class =" o " > = </ span > < span class =" mi " > 123 </ span > < span class =" p " > )):</ span >
784
787
< span class ="gp "> ... </ span > < span class ="nb "> print</ span > < span class ="p "> (</ span > < span class ="n "> bin_</ span > < span class ="p "> ,</ span > < span class ="n "> item</ span > < span class ="p "> [</ span > < span class ="mi "> 1</ span > < span class ="p "> ])</ span >
785
788
< span class ="go "> sampleA AATTGG</ span >
786
789
< span class ="go "> sampleA ATGGCA</ span >
787
- < span class ="go "> sampleB ATATATAT</ span >
788
790
< span class ="go "> sampleB ATGGCG</ span >
791
+ < span class ="go "> sampleB ATGGCT</ span >
789
792
</ pre > </ div >
790
793
</ div >
791
794
</ dd > </ dl >
0 commit comments