AliceVision
Photogrammetric Computer Vision Framework
Database.hpp
1 // This file is part of the AliceVision project.
2 // Copyright (c) 2016 AliceVision contributors.
3 // This Source Code Form is subject to the terms of the Mozilla Public License,
4 // v. 2.0. If a copy of the MPL was not distributed with this file,
5 // You can obtain one at https://mozilla.org/MPL/2.0/.
6 
7 #pragma once
8 
9 #include "VocabularyTree.hpp"
10 #include <aliceVision/types.hpp>
11 
12 #include <map>
13 #include <cstddef>
14 #include <string>
15 
16 namespace aliceVision {
17 namespace voctree {
18 
24 struct DocMatch
25 {
26  DocId id{UndefinedIndexT};
27  float score{0.0f};
28 
29  DocMatch() = default;
30  DocMatch(DocId _id, float _score)
31  : id(_id),
32  score(_score)
33  {}
34 
36  bool operator<(const DocMatch& other) const { return score < other.score; }
37 
38  bool operator==(const DocMatch& other) const { return id == other.id && score == other.score; }
39  bool operator!=(const DocMatch& other) const { return !(*this == other); }
40 };
41 
42 typedef std::vector<DocMatch> DocMatches;
43 
44 std::ostream& operator<<(std::ostream& os, const DocMatches& matches);
45 
50 class Database
51 {
52  public:
59  explicit Database(uint32_t num_words = 0);
60 
68  DocId insert(DocId doc_id, const SparseHistogram& document);
69 
77  void sanityCheck(std::size_t N, std::map<std::size_t, DocMatches>& matches) const;
78 
87  void find(const std::vector<Word>& document,
88  std::size_t N,
89  std::vector<DocMatch>& matches,
90  const std::string& distanceMethod = "strongCommonPoints") const;
91 
100  void find(const SparseHistogram& query,
101  std::size_t N,
102  std::vector<DocMatch>& matches,
103  const std::string& distanceMethod = "strongCommonPoints") const;
104 
111  void computeTfIdfWeights(float default_weight = 1.0f);
112 
117  std::size_t size() const;
118 
120  void saveWeights(const std::string& file) const;
122  void loadWeights(const std::string& file);
123 
124  // Save weights and documents
125  // void save(const std::string& file) const;
126  // void load(const std::string& file);
127 
128  const SparseHistogramPerImage& getSparseHistogramPerImage() const { return database_; }
129 
130  private:
131  struct WordFrequency
132  {
133  DocId id;
134  uint32_t count;
135 
136  WordFrequency() = default;
137  WordFrequency(DocId _id, uint32_t _count)
138  : id(_id),
139  count(_count)
140  {}
141  };
142 
143  // Stored in increasing order by DocId
144  typedef std::vector<WordFrequency> InvertedFile;
145 
147  // typedef std::vector< std::pair<Word, float> > DocumentVector;
148 
149  friend std::ostream& operator<<(std::ostream& os, const SparseHistogram& dv);
150 
151  std::vector<InvertedFile> word_files_;
152  std::vector<float> word_weights_;
153  SparseHistogramPerImage database_; // Precomputed for inserted documents
154 
159  void normalize(SparseHistogram& v) const;
160 };
161 
162 } // namespace voctree
163 } // namespace aliceVision
aliceVision::voctree::Database::insert
DocId insert(DocId doc_id, const SparseHistogram &document)
Insert a new document.
Definition: Database.cpp:45
aliceVision::voctree::Database::operator<<
friend std::ostream & operator<<(std::ostream &os, const SparseHistogram &dv)
Definition: Database.cpp:19
aliceVision::voctree::Database::computeTfIdfWeights
void computeTfIdfWeights(float default_weight=1.0f)
Compute the TF-IDF weights of all the words. To be called after inserting a corpus of training exampl...
Definition: Database.cpp:145
aliceVision::voctree::Database::size
std::size_t size() const
Return the size of the database in terms of number of documents.
Definition: Database.cpp:206
aliceVision::voctree::DocMatch
Struct representing a single database match.
Definition: Database.hpp:24
aliceVision
Definition: checkerDetector.cpp:32
aliceVision::voctree::Database::find
void find(const std::vector< Word > &document, std::size_t N, std::vector< DocMatch > &matches, const std::string &distanceMethod="strongCommonPoints") const
Find the top N matches in the database for the query document.
Definition: Database.cpp:105
aliceVision::voctree::Database::sanityCheck
void sanityCheck(std::size_t N, std::map< std::size_t, DocMatches > &matches) const
Perform a sanity check of the database by querying each document of the database and finding its top ...
Definition: Database.cpp:66
aliceVision::voctree::Database::Database
Database(uint32_t num_words=0)
Constructor.
Definition: Database.cpp:40
aliceVision::voctree::Database
Class for efficiently matching a bag-of-words representation of a document (image) against a database...
Definition: Database.hpp:50
aliceVision::voctree::DocMatch::operator<
bool operator<(const DocMatch &other) const
Allows sorting DocMatches in best-to-worst order with std::sort.
Definition: Database.hpp:36
aliceVision::voctree::Database::loadWeights
void loadWeights(const std::string &file)
Load the vocabulary word weights from a file.
Definition: Database.cpp:167
aliceVision::voctree::Database::saveWeights
void saveWeights(const std::string &file) const
Save the vocabulary word weights to a file.
Definition: Database.cpp:159