Skip to content

Commit 05fbd1e

Browse files
committed
DNA mode: add the distance computations
1 parent f3cebb3 commit 05fbd1e

File tree

3 files changed

+162
-0
lines changed

3 files changed

+162
-0
lines changed

modules/flann/include/opencv2/flann.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ using ::cvflann::MaxDistance;
9595
using ::cvflann::HammingLUT;
9696
using ::cvflann::Hamming;
9797
using ::cvflann::Hamming2;
98+
using ::cvflann::DNAmmingLUT;
99+
using ::cvflann::DNAmming2;
98100
using ::cvflann::HistIntersectionDistance;
99101
using ::cvflann::HellingerDistance;
100102
using ::cvflann::ChiSquareDistance;
@@ -131,6 +133,14 @@ performed using library calls, if available. Lookup table implementation is used
131133
cv::flann::Hamming2 - %Hamming distance functor. Population count is
132134
implemented in 12 arithmetic operations (one of which is multiplication).
133135
136+
cv::flann::DNAmmingLUT - %Adaptation of the Hamming distance functor to DNA comparison.
137+
As the four bases A, C, G, T of the DNA (or A, G, C, U for RNA) can be coded on 2 bits,
138+
it counts the bits pairs differences between two sequences using a lookup table implementation.
139+
140+
cv::flann::DNAmming2 - %Adaptation of the Hamming distance functor to DNA comparison.
141+
Bases differences count are vectorised thanks to arithmetic operations using standard
142+
registers (AVX2 and AVX-512 should come in a near future).
143+
134144
cv::flann::HistIntersectionDistance - The histogram
135145
intersection distance functor.
136146

modules/flann/include/opencv2/flann/defines.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ enum flann_distance_t
128128
FLANN_DIST_KULLBACK_LEIBLER = 8,
129129
FLANN_DIST_KL = 8,
130130
FLANN_DIST_HAMMING = 9,
131+
FLANN_DIST_DNAMMING = 10,
131132

132133
// deprecated constants, should use the FLANN_DIST_* ones instead
133134
EUCLIDEAN = 1,

modules/flann/include/opencv2/flann/dist.h

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,157 @@ struct Hamming2
744744

745745
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
746746

747+
struct DNAmmingLUT
748+
{
749+
typedef False is_kdtree_distance;
750+
typedef False is_vector_space_distance;
751+
752+
typedef unsigned char ElementType;
753+
typedef int ResultType;
754+
typedef ElementType CentersType;
755+
756+
/** this will count the bits in a ^ b
757+
*/
758+
template<typename Iterator2>
759+
ResultType operator()(const unsigned char* a, const Iterator2 b, size_t size) const
760+
{
761+
static const uchar popCountTable[] =
762+
{
763+
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
764+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
765+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
766+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
767+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
768+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
769+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
770+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
771+
};
772+
ResultType result = 0;
773+
const unsigned char* b2 = reinterpret_cast<const unsigned char*> (b);
774+
for (size_t i = 0; i < size; i++) {
775+
result += popCountTable[a[i] ^ b2[i]];
776+
}
777+
return result;
778+
}
779+
780+
781+
ResultType operator()(const unsigned char* a, const ZeroIterator<unsigned char> b, size_t size) const
782+
{
783+
(void)b;
784+
static const uchar popCountTable[] =
785+
{
786+
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
787+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
788+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
789+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
790+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
791+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
792+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
793+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
794+
};
795+
ResultType result = 0;
796+
for (size_t i = 0; i < size; i++) {
797+
result += popCountTable[a[i]];
798+
}
799+
return result;
800+
}
801+
};
802+
803+
804+
template<typename T>
805+
struct DNAmming2
806+
{
807+
typedef False is_kdtree_distance;
808+
typedef False is_vector_space_distance;
809+
810+
typedef T ElementType;
811+
typedef int ResultType;
812+
typedef ElementType CentersType;
813+
814+
/** This is popcount_3() from:
815+
* http://en.wikipedia.org/wiki/Hamming_weight */
816+
unsigned int popcnt32(uint32_t n) const
817+
{
818+
n = ((n >> 1) | n) & 0x55555555;
819+
n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
820+
return (((n + (n >> 4))& 0x0F0F0F0F)* 0x01010101) >> 24;
821+
}
822+
823+
#ifdef FLANN_PLATFORM_64_BIT
824+
unsigned int popcnt64(uint64_t n) const
825+
{
826+
n = ((n >> 1) | n) & 0x5555555555555555;
827+
n = (n & 0x3333333333333333) + ((n >> 2) & 0x3333333333333333);
828+
return (((n + (n >> 4))& 0x0f0f0f0f0f0f0f0f)* 0x0101010101010101) >> 56;
829+
}
830+
#endif
831+
832+
template <typename Iterator1, typename Iterator2>
833+
ResultType operator()(const Iterator1 a, const Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
834+
{
835+
CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
836+
837+
#ifdef FLANN_PLATFORM_64_BIT
838+
const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
839+
const uint64_t* pb = reinterpret_cast<const uint64_t*>(b);
840+
ResultType result = 0;
841+
size /= long_word_size_;
842+
for(size_t i = 0; i < size; ++i ) {
843+
result += popcnt64(*pa ^ *pb);
844+
++pa;
845+
++pb;
846+
}
847+
#else
848+
const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
849+
const uint32_t* pb = reinterpret_cast<const uint32_t*>(b);
850+
ResultType result = 0;
851+
size /= long_word_size_;
852+
for(size_t i = 0; i < size; ++i ) {
853+
result += popcnt32(*pa ^ *pb);
854+
++pa;
855+
++pb;
856+
}
857+
#endif
858+
return result;
859+
}
860+
861+
862+
template <typename Iterator1>
863+
ResultType operator()(const Iterator1 a, ZeroIterator<unsigned char> b, size_t size, ResultType /*worst_dist*/ = -1) const
864+
{
865+
CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
866+
867+
(void)b;
868+
#ifdef FLANN_PLATFORM_64_BIT
869+
const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
870+
ResultType result = 0;
871+
size /= long_word_size_;
872+
for(size_t i = 0; i < size; ++i ) {
873+
result += popcnt64(*pa);
874+
++pa;
875+
}
876+
#else
877+
const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
878+
ResultType result = 0;
879+
size /= long_word_size_;
880+
for(size_t i = 0; i < size; ++i ) {
881+
result += popcnt32(*pa);
882+
++pa;
883+
}
884+
#endif
885+
return result;
886+
}
887+
888+
private:
889+
#ifdef FLANN_PLATFORM_64_BIT
890+
static const size_t long_word_size_= sizeof(uint64_t)/sizeof(unsigned char);
891+
#else
892+
static const size_t long_word_size_= sizeof(uint32_t)/sizeof(unsigned char);
893+
#endif
894+
};
895+
896+
897+
747898
template<class T>
748899
struct HistIntersectionDistance
749900
{

0 commit comments

Comments
 (0)