AliceVision
Photogrammetric Computer Vision Framework
VocabularyTree.hpp
1 // This file is part of the AliceVision project.
2 // Copyright (c) 2016 AliceVision contributors.
3 // This Source Code Form is subject to the terms of the Mozilla Public License,
4 // v. 2.0. If a copy of the MPL was not distributed with this file,
5 // You can obtain one at https://mozilla.org/MPL/2.0/.
6 
7 #pragma once
8 
9 #include <aliceVision/config.hpp>
10 #include "distance.hpp"
11 #include "DefaultAllocator.hpp"
12 
13 #include <aliceVision/feature/imageDescriberCommon.hpp>
14 #include <aliceVision/feature/regionsFactory.hpp>
15 
16 #include <aliceVision/types.hpp>
17 #include <aliceVision/system/Logger.hpp>
18 
19 #include <stdint.h>
20 #include <vector>
21 #include <map>
22 #include <cassert>
23 #include <limits>
24 #include <fstream>
25 #include <stdexcept>
26 #include <iostream>
27 
28 namespace aliceVision {
29 namespace voctree {
30 
31 typedef int32_t Word;
32 
33 typedef IndexT DocId;
34 
35 typedef std::vector<Word> Document;
36 typedef std::map<Word, std::vector<IndexT>> SparseHistogram;
37 typedef std::map<DocId, SparseHistogram> SparseHistogramPerImage;
38 
39 std::ostream& operator<<(std::ostream& os, const Document& doc);
40 
48 inline void computeSparseHistogram(const std::vector<Word>& document, SparseHistogram& v)
49 {
50  // for each visual word in the list
51  for (std::size_t i = 0; i < document.size(); ++i)
52  {
53  // update its weighted count inside the map
54  // the map v contains only the visual words that are associated to some features
55  // the visual words in v are unique unlikely the document
56  Word word = document[i];
57  v[word].push_back(i);
58  }
59 }
60 
62 {
63  public:
64  virtual ~IVocabularyTree() = 0;
65 
67  virtual void save(const std::string& file) const = 0;
69  virtual void load(const std::string& file) = 0;
70 
76  virtual SparseHistogram quantizeToSparse(const void* blindDescriptors) const = 0;
77 
79  virtual uint32_t levels() const = 0;
81  virtual uint32_t splits() const = 0;
83  virtual uint32_t words() const = 0;
84 
86  virtual void clear() = 0;
87 };
88 
89 inline IVocabularyTree::~IVocabularyTree() {}
90 
101 template<class Feature, template<typename, typename> class Distance = L2> // TODO: rename Feature into Descriptor
103 {
104  public:
105  VocabularyTree();
106 
111  VocabularyTree(const std::string& file);
112 
114  template<class DescriptorT>
115  Word quantize(const DescriptorT& feature) const;
116 
118  template<class DescriptorT>
119  std::vector<Word> quantize(const std::vector<DescriptorT>& features) const;
120 
122  template<class DescriptorT>
123  SparseHistogram quantizeToSparse(const std::vector<DescriptorT>& features) const;
124 
125  SparseHistogram quantizeToSparse(const void* blindDescriptors) const override
126  {
127  const std::vector<Feature>* descriptors = static_cast<const std::vector<Feature>*>(blindDescriptors);
128  return quantizeToSparse(*descriptors);
129  }
130 
132  uint32_t levels() const override;
134  uint32_t splits() const override;
136  uint32_t words() const override;
137 
139  void clear() override;
140 
142  void save(const std::string& file) const override;
144  void load(const std::string& file) override;
145 
146  bool operator==(const VocabularyTree& other) const
147  {
148  return (centers_ == other.centers_) && (valid_centers_ == other.valid_centers_) && (k_ == other.k_) && (levels_ == other.levels_) &&
149  (num_words_ == other.num_words_) && (word_start_ == other.word_start_);
150  }
151 
152  protected:
153  std::vector<Feature> centers_;
154  std::vector<uint8_t> valid_centers_;
155 
156  uint32_t k_; // splits, or branching factor
157  uint32_t levels_;
158  uint32_t num_words_; // number of leaf nodes
159  uint32_t word_start_; // number of non-leaf nodes, or offset to the first leaf node
160 
161  bool initialized() const { return num_words_ != 0; }
162 
163  void setNodeCounts();
164 };
165 
166 template<class Feature, template<typename, typename> class Distance>
167 VocabularyTree<Feature, Distance>::VocabularyTree()
168  : k_(0),
169  levels_(0),
170  num_words_(0),
171  word_start_(0)
172 {}
173 
174 template<class Feature, template<typename, typename> class Distance>
176  : k_(0),
177  levels_(0),
178  num_words_(0),
179  word_start_(0)
180 {
181  load(file);
182 }
183 
184 template<class Feature, template<typename, typename> class Distance>
185 template<class DescriptorT>
186 Word VocabularyTree<Feature, Distance>::quantize(const DescriptorT& feature) const
187 {
188  typedef typename Distance<Feature, DescriptorT>::result_type distance_type;
189 
190  // printf("asserting\n");
191  assert(initialized());
192  // printf("initialized\n");
193  int32_t index = -1; // virtual "root" index, which has no associated center.
194  for (unsigned level = 0; level < levels_; ++level)
195  {
196  // Calculate the offset to the first child of the current index.
197  int32_t first_child = (index + 1) * splits();
198  // Find the child center closest to the query.
199  int32_t best_child = first_child;
200  distance_type best_distance = std::numeric_limits<distance_type>::max();
201  for (int32_t child = first_child; child < first_child + (int32_t)splits(); ++child)
202  {
203  if (!valid_centers_[child])
204  break; // Fewer than splits() children.
205  distance_type child_distance = Distance<DescriptorT, Feature>()(feature, centers_[child]);
206  if (child_distance < best_distance)
207  {
208  best_child = child;
209  best_distance = child_distance;
210  }
211  }
212  index = best_child;
213  }
214 
215  return index - word_start_;
216 }
217 
218 template<class Feature, template<typename, typename> class Distance>
219 template<class DescriptorT>
220 std::vector<Word> VocabularyTree<Feature, Distance>::quantize(const std::vector<DescriptorT>& features) const
221 {
222  // ALICEVISION_LOG_DEBUG("VocabularyTree quantize: " << features.size());
223  std::vector<Word> imgVisualWords(features.size(), 0);
224 
225 // quantize the features
226 #pragma omp parallel for
227  for (ptrdiff_t j = 0; j < static_cast<ptrdiff_t>(features.size()); ++j)
228  {
229  // store the visual word associated to the feature in the temporary list
230  imgVisualWords[j] = quantize<DescriptorT>(features[j]);
231  }
232 
233  // add the vector to the documents
234  return imgVisualWords;
235 }
236 
237 template<class Feature, template<typename, typename> class Distance>
238 template<class DescriptorT>
239 SparseHistogram VocabularyTree<Feature, Distance>::quantizeToSparse(const std::vector<DescriptorT>& features) const
240 {
241  SparseHistogram histo;
242  std::vector<Word> doc = quantize(features);
243  computeSparseHistogram(doc, histo);
244  return histo;
245 }
246 
247 template<class Feature, template<typename, typename> class Distance>
249 {
250  return levels_;
251 }
252 
253 template<class Feature, template<typename, typename> class Distance>
255 {
256  return k_;
257 }
258 
259 template<class Feature, template<typename, typename> class Distance>
261 {
262  return num_words_;
263 }
264 
265 template<class Feature, template<typename, typename> class Distance>
267 {
268  centers_.clear();
269  valid_centers_.clear();
270  k_ = levels_ = num_words_ = word_start_ = 0;
271 }
272 
273 template<class Feature, template<typename, typename> class Distance>
274 void VocabularyTree<Feature, Distance>::save(const std::string& file) const
275 {
278  assert(initialized());
279 
280  std::ofstream out(file, std::ios_base::binary);
281  out.write((char*)(&k_), sizeof(uint32_t));
282  out.write((char*)(&levels_), sizeof(uint32_t));
283  uint32_t size = centers_.size();
284  out.write((char*)(&size), sizeof(uint32_t));
285  out.write((char*)(&centers_[0]), centers_.size() * sizeof(Feature));
286  out.write((char*)(&valid_centers_[0]), valid_centers_.size());
287 }
288 
289 template<class Feature, template<typename, typename> class Distance>
290 void VocabularyTree<Feature, Distance>::load(const std::string& file)
291 {
292  clear();
293 
294  std::ifstream in;
295  in.exceptions(std::ifstream::eofbit | std::ifstream::failbit | std::ifstream::badbit);
296 
297  uint32_t size;
298  try
299  {
300  in.open(file, std::ios_base::binary);
301  in.read((char*)(&k_), sizeof(uint32_t));
302  in.read((char*)(&levels_), sizeof(uint32_t));
303  in.read((char*)(&size), sizeof(uint32_t));
304  centers_.resize(size);
305  valid_centers_.resize(size);
306  in.read((char*)(&centers_[0]), centers_.size() * sizeof(Feature));
307  in.read((char*)(&valid_centers_[0]), valid_centers_.size());
308  }
309  catch (std::ifstream::failure& e)
310  {
311  throw std::runtime_error("Failed to load vocabulary tree file" + file);
312  }
313 
314  setNodeCounts();
315  assert(size == num_words_ + word_start_);
316 }
317 
318 template<class Feature, template<typename, typename> class Distance>
320 {
321  num_words_ = k_;
322  word_start_ = 0;
323  for (uint32_t i = 0; i < levels_ - 1; ++i)
324  {
325  word_start_ += num_words_;
326  num_words_ *= k_;
327  }
328 }
329 
339 float sparseDistance(const SparseHistogram& v1,
340  const SparseHistogram& v2,
341  const std::string& distanceMethod = "classic",
342  const std::vector<float>& word_weights = std::vector<float>());
343 
344 inline std::unique_ptr<IVocabularyTree> createVoctreeForDescriberType(feature::EImageDescriberType imageDescriberType)
345 {
346  using namespace aliceVision::feature;
347  std::unique_ptr<IVocabularyTree> res;
348 
349  switch (imageDescriberType)
350  {
351  case EImageDescriberType::SIFT:
352  res.reset(new VocabularyTree<SIFT_Regions::DescriptorT>);
353  break;
354  case EImageDescriberType::SIFT_FLOAT:
355  res.reset(new VocabularyTree<SIFT_Float_Regions::DescriptorT>);
356  break;
357  case EImageDescriberType::AKAZE:
358  res.reset(new VocabularyTree<AKAZE_Float_Regions::DescriptorT>);
359  break;
360  case EImageDescriberType::AKAZE_MLDB:
361  res.reset(new VocabularyTree<AKAZE_BinaryRegions::DescriptorT>);
362  break;
363 
364 #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CCTAG)
365  case EImageDescriberType::CCTAG3:
366  case EImageDescriberType::CCTAG4:
367  res.reset(new VocabularyTree<CCTAG_Regions::DescriptorT>);
368  break;
369 #endif // ALICEVISION_HAVE_CCTAG
370 
371 #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_OPENCV)
372  #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_OCVSIFT)
373  case EImageDescriberType::SIFT_OCV:
374  res.reset(new VocabularyTree<SIFT_Regions::DescriptorT>);
375  break;
376  #endif // ALICEVISION_HAVE_OCVSIFT
377  case EImageDescriberType::AKAZE_OCV:
378  res.reset(new VocabularyTree<AKAZE_Float_Regions::DescriptorT>);
379  break;
380 #endif // ALICEVISION_HAVE_OPENCV
381 
382  default:
383  throw std::out_of_range("Invalid imageDescriber enum");
384  }
385 
386  return res;
387 }
388 
389 inline void load(std::unique_ptr<IVocabularyTree>& out_voctree, feature::EImageDescriberType& out_descType, const std::string& filepath)
390 {
391  std::size_t lastDot = filepath.find_last_of(".");
392  if (lastDot == std::string::npos)
393  throw std::invalid_argument("Unrecognized Vocabulary tree filename (no extension): " + filepath);
394  std::size_t secondLastDot = filepath.find_last_of(".", lastDot - 1);
395  if (secondLastDot == std::string::npos)
396  throw std::invalid_argument("Unrecognized Vocabulary tree filename (no descType in extension): " + filepath);
397 
398  const std::string descTypeStr = filepath.substr((secondLastDot + 1), lastDot - (secondLastDot + 1));
399 
400  out_descType = feature::EImageDescriberType_stringToEnum(descTypeStr);
401  out_voctree = createVoctreeForDescriberType(out_descType);
402  out_voctree->load(filepath);
403 }
404 
405 } // namespace voctree
406 } // namespace aliceVision
aliceVision::voctree::IVocabularyTree::save
virtual void save(const std::string &file) const =0
Save vocabulary to a file.
aliceVision::voctree::VocabularyTree::splits
uint32_t splits() const override
Get the branching factor (max splits at each node) of the tree.
Definition: VocabularyTree.hpp:254
aliceVision::voctree::VocabularyTree::save
void save(const std::string &file) const override
Save vocabulary to a file.
Definition: VocabularyTree.hpp:274
aliceVision::voctree::VocabularyTree::levels
uint32_t levels() const override
Get the depth (number of levels) of the tree.
Definition: VocabularyTree.hpp:248
aliceVision::voctree::IVocabularyTree::quantizeToSparse
virtual SparseHistogram quantizeToSparse(const void *blindDescriptors) const =0
Create a SparseHistogram from a blind vector of descriptors.
aliceVision::voctree::IVocabularyTree::clear
virtual void clear()=0
Clears vocabulary, leaving an empty tree.
aliceVision
Definition: checkerDetector.cpp:32
aliceVision::voctree::VocabularyTree::quantize
Word quantize(const DescriptorT &feature) const
Quantizes a feature into a discrete word.
Definition: VocabularyTree.hpp:186
aliceVision::voctree::IVocabularyTree::levels
virtual uint32_t levels() const =0
Get the depth (number of levels) of the tree.
aliceVision::voctree::IVocabularyTree::splits
virtual uint32_t splits() const =0
Get the branching factor (max splits at each node) of the tree.
aliceVision::voctree::VocabularyTree::quantizeToSparse
SparseHistogram quantizeToSparse(const void *blindDescriptors) const override
Create a SparseHistogram from a blind vector of descriptors.
Definition: VocabularyTree.hpp:125
aliceVision::voctree::VocabularyTree::words
uint32_t words() const override
Get the number of words the tree contains.
Definition: VocabularyTree.hpp:260
aliceVision::voctree::IVocabularyTree
Definition: VocabularyTree.hpp:61
aliceVision::voctree::VocabularyTree
Optimized vocabulary tree quantizer, templated on feature type and distance metric for maximum effici...
Definition: VocabularyTree.hpp:102
aliceVision::voctree::IVocabularyTree::words
virtual uint32_t words() const =0
Get the number of words the tree contains.
aliceVision::voctree::VocabularyTree::clear
void clear() override
Clears vocabulary, leaving an empty tree.
Definition: VocabularyTree.hpp:266
aliceVision::voctree::VocabularyTree::load
void load(const std::string &file) override
Load vocabulary from a file.
Definition: VocabularyTree.hpp:290
aliceVision::voctree::VocabularyTree::quantizeToSparse
SparseHistogram quantizeToSparse(const std::vector< DescriptorT > &features) const
Quantizes a set of features into sparse histogram of visual words.
Definition: VocabularyTree.hpp:239
aliceVision::voctree::IVocabularyTree::load
virtual void load(const std::string &file)=0
Load vocabulary from a file.
aliceVision::voctree::VocabularyTree::k_
uint32_t k_
Definition: VocabularyTree.hpp:156