AliceVision
Photogrammetric Computer Vision Framework
metric.hpp
1 // This file is part of the AliceVision project.
2 // Copyright (c) 2016 AliceVision contributors.
3 // Copyright (c) 2012 openMVG contributors.
4 // This Source Code Form is subject to the terms of the Mozilla Public License,
5 // v. 2.0. If a copy of the MPL was not distributed with this file,
6 // You can obtain one at https://mozilla.org/MPL/2.0/.
7 
8 #pragma once
9 
10 #include "Hamming.hpp"
11 
12 #include <aliceVision/numeric/Accumulator.hpp>
13 #include <aliceVision/config.hpp>
14 
15 #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_SSE)
16  #include <aliceVision/system/Logger.hpp>
17  #include <xmmintrin.h>
18 #endif
19 
20 #include <cstddef>
21 
22 namespace aliceVision {
23 namespace feature {
24 
26 template<class T>
27 struct L2_Simple
28 {
29  typedef T ElementType;
30  typedef typename Accumulator<T>::Type ResultType;
31 
32  template<typename Iterator1, typename Iterator2>
33  inline ResultType operator()(Iterator1 a, Iterator2 b, size_t size) const
34  {
35  ResultType result = ResultType();
36  ResultType diff;
37  for (size_t i = 0; i < size; ++i)
38  {
39  diff = *a++ - *b++;
40  result += diff * diff;
41  }
42  return result;
43  }
44 };
45 
47 template<class T>
49 {
50  typedef T ElementType;
51  typedef typename Accumulator<T>::Type ResultType;
52 
53  template<typename Iterator1, typename Iterator2>
54  inline ResultType operator()(Iterator1 a, Iterator2 b, size_t size) const
55  {
56  ResultType result = ResultType();
57  ResultType diff0, diff1, diff2, diff3;
58  Iterator1 last = a + size;
59  Iterator1 lastgroup = last - 3;
60 
61  // Process 4 items with each loop for efficiency.
62  while (a < lastgroup)
63  {
64  diff0 = a[0] - b[0];
65  diff1 = a[1] - b[1];
66  diff2 = a[2] - b[2];
67  diff3 = a[3] - b[3];
68  result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
69  a += 4;
70  b += 4;
71  }
72  // Process last 0-3 pixels. Not needed for standard vector lengths.
73  while (a < last)
74  {
75  diff0 = *a++ - *b++;
76  result += diff0 * diff0;
77  }
78  return result;
79  }
80 };
81 
82 #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_SSE)
83 
84 namespace optim_ss2 {
85 
87 union sseRegisterHelper
88 {
89  __m128 m;
90  float f[4];
91 };
92 
93 // Euclidean distance (SSE method) (squared result)
94 inline float l2_sse(const float* b1, const float* b2, int size)
95 {
96  float* b1Pt = (float*)b1;
97  float* b2Pt = (float*)b2;
98  if (size % 4 == 0)
99  {
100  __m128 srcA, srcB, temp, cumSum;
101  float zeros[4] = {0.f, 0.f, 0.f, 0.f};
102  cumSum = _mm_load_ps(zeros);
103  for (int i = 0; i < size; i += 4)
104  {
105  srcA = _mm_load_ps(b1Pt + i);
106  srcB = _mm_load_ps(b2Pt + i);
107  //-- Subtract
108  temp = _mm_sub_ps(srcA, srcB);
109  //-- Multiply
110  temp = _mm_mul_ps(temp, temp);
111  //-- sum
112  cumSum = _mm_add_ps(cumSum, temp);
113  }
114  sseRegisterHelper res;
115  res.m = cumSum;
116  return (res.f[0] + res.f[1] + res.f[2] + res.f[3]);
117  }
118  else
119  {
120  ALICEVISION_LOG_WARNING("/!\\ size is not modulus 4, distance cannot be performed in SSE");
121  return 0.0f;
122  }
123 }
124 } // namespace optim_ss2
125 
126 // Template specification to run SSE L2 squared distance
127 // on float vector
128 template<>
129 struct L2_Vectorized<float>
130 {
131  typedef float ElementType;
132  typedef Accumulator<float>::Type ResultType;
133 
134  template<typename Iterator1, typename Iterator2>
135  inline ResultType operator()(Iterator1 a, Iterator2 b, size_t size) const
136  {
137  return optim_ss2::l2_sse(a, b, size);
138  }
139 };
140 
141 #endif // ALICEVISION_HAVE_SSE
142 
143 } // namespace feature
144 } // namespace aliceVision
aliceVision::feature::L2_Simple
Squared Euclidean distance functor.
Definition: metric.hpp:27
aliceVision
Definition: checkerDetector.cpp:32
aliceVision::feature::L2_Vectorized
Squared Euclidean distance functor (vectorized version)
Definition: metric.hpp:48