Skip to content

Commit fc0f9da

Browse files
committed
Merge pull request opencv#18084 from pemmanuelviel:pev--add-DNA-distances
2 parents 277961f + 05fbd1e commit fc0f9da

File tree

3 files changed

+162
-0
lines changed

3 files changed

+162
-0
lines changed

modules/flann/include/opencv2/flann.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ using ::cvflann::MaxDistance;
9595
using ::cvflann::HammingLUT;
9696
using ::cvflann::Hamming;
9797
using ::cvflann::Hamming2;
98+
using ::cvflann::DNAmmingLUT;
99+
using ::cvflann::DNAmming2;
98100
using ::cvflann::HistIntersectionDistance;
99101
using ::cvflann::HellingerDistance;
100102
using ::cvflann::ChiSquareDistance;
@@ -131,6 +133,14 @@ performed using library calls, if available. Lookup table implementation is used
131133
cv::flann::Hamming2 - %Hamming distance functor. Population count is
132134
implemented in 12 arithmetic operations (one of which is multiplication).
133135
136+
cv::flann::DNAmmingLUT - %Adaptation of the Hamming distance functor to DNA comparison.
137+
As the four bases A, C, G, T of the DNA (or A, G, C, U for RNA) can be coded on 2 bits,
138+
it counts the bits pairs differences between two sequences using a lookup table implementation.
139+
140+
cv::flann::DNAmming2 - %Adaptation of the Hamming distance functor to DNA comparison.
141+
Bases differences count are vectorised thanks to arithmetic operations using standard
142+
registers (AVX2 and AVX-512 should come in a near future).
143+
134144
cv::flann::HistIntersectionDistance - The histogram
135145
intersection distance functor.
136146

modules/flann/include/opencv2/flann/defines.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ enum flann_distance_t
128128
FLANN_DIST_KULLBACK_LEIBLER = 8,
129129
FLANN_DIST_KL = 8,
130130
FLANN_DIST_HAMMING = 9,
131+
FLANN_DIST_DNAMMING = 10,
131132

132133
// deprecated constants, should use the FLANN_DIST_* ones instead
133134
EUCLIDEAN = 1,

modules/flann/include/opencv2/flann/dist.h

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,157 @@ struct Hamming2
748748

749749
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
750750

751+
struct DNAmmingLUT
752+
{
753+
typedef False is_kdtree_distance;
754+
typedef False is_vector_space_distance;
755+
756+
typedef unsigned char ElementType;
757+
typedef int ResultType;
758+
typedef ElementType CentersType;
759+
760+
/** this will count the bits in a ^ b
761+
*/
762+
template<typename Iterator2>
763+
ResultType operator()(const unsigned char* a, const Iterator2 b, size_t size) const
764+
{
765+
static const uchar popCountTable[] =
766+
{
767+
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
768+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
769+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
770+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
771+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
772+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
773+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
774+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
775+
};
776+
ResultType result = 0;
777+
const unsigned char* b2 = reinterpret_cast<const unsigned char*> (b);
778+
for (size_t i = 0; i < size; i++) {
779+
result += popCountTable[a[i] ^ b2[i]];
780+
}
781+
return result;
782+
}
783+
784+
785+
ResultType operator()(const unsigned char* a, const ZeroIterator<unsigned char> b, size_t size) const
786+
{
787+
(void)b;
788+
static const uchar popCountTable[] =
789+
{
790+
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
791+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
792+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
793+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
794+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
795+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
796+
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
797+
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
798+
};
799+
ResultType result = 0;
800+
for (size_t i = 0; i < size; i++) {
801+
result += popCountTable[a[i]];
802+
}
803+
return result;
804+
}
805+
};
806+
807+
808+
template<typename T>
809+
struct DNAmming2
810+
{
811+
typedef False is_kdtree_distance;
812+
typedef False is_vector_space_distance;
813+
814+
typedef T ElementType;
815+
typedef int ResultType;
816+
typedef ElementType CentersType;
817+
818+
/** This is popcount_3() from:
819+
* http://en.wikipedia.org/wiki/Hamming_weight */
820+
unsigned int popcnt32(uint32_t n) const
821+
{
822+
n = ((n >> 1) | n) & 0x55555555;
823+
n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
824+
return (((n + (n >> 4))& 0x0F0F0F0F)* 0x01010101) >> 24;
825+
}
826+
827+
#ifdef FLANN_PLATFORM_64_BIT
828+
unsigned int popcnt64(uint64_t n) const
829+
{
830+
n = ((n >> 1) | n) & 0x5555555555555555;
831+
n = (n & 0x3333333333333333) + ((n >> 2) & 0x3333333333333333);
832+
return (((n + (n >> 4))& 0x0f0f0f0f0f0f0f0f)* 0x0101010101010101) >> 56;
833+
}
834+
#endif
835+
836+
template <typename Iterator1, typename Iterator2>
837+
ResultType operator()(const Iterator1 a, const Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
838+
{
839+
CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
840+
841+
#ifdef FLANN_PLATFORM_64_BIT
842+
const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
843+
const uint64_t* pb = reinterpret_cast<const uint64_t*>(b);
844+
ResultType result = 0;
845+
size /= long_word_size_;
846+
for(size_t i = 0; i < size; ++i ) {
847+
result += popcnt64(*pa ^ *pb);
848+
++pa;
849+
++pb;
850+
}
851+
#else
852+
const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
853+
const uint32_t* pb = reinterpret_cast<const uint32_t*>(b);
854+
ResultType result = 0;
855+
size /= long_word_size_;
856+
for(size_t i = 0; i < size; ++i ) {
857+
result += popcnt32(*pa ^ *pb);
858+
++pa;
859+
++pb;
860+
}
861+
#endif
862+
return result;
863+
}
864+
865+
866+
template <typename Iterator1>
867+
ResultType operator()(const Iterator1 a, ZeroIterator<unsigned char> b, size_t size, ResultType /*worst_dist*/ = -1) const
868+
{
869+
CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
870+
871+
(void)b;
872+
#ifdef FLANN_PLATFORM_64_BIT
873+
const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
874+
ResultType result = 0;
875+
size /= long_word_size_;
876+
for(size_t i = 0; i < size; ++i ) {
877+
result += popcnt64(*pa);
878+
++pa;
879+
}
880+
#else
881+
const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
882+
ResultType result = 0;
883+
size /= long_word_size_;
884+
for(size_t i = 0; i < size; ++i ) {
885+
result += popcnt32(*pa);
886+
++pa;
887+
}
888+
#endif
889+
return result;
890+
}
891+
892+
private:
893+
#ifdef FLANN_PLATFORM_64_BIT
894+
static const size_t long_word_size_= sizeof(uint64_t)/sizeof(unsigned char);
895+
#else
896+
static const size_t long_word_size_= sizeof(uint32_t)/sizeof(unsigned char);
897+
#endif
898+
};
899+
900+
901+
751902
template<class T>
752903
struct HistIntersectionDistance
753904
{

0 commit comments

Comments
 (0)