OTest2
A C++ testing framework
hirschberg.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2019 Ondrej Starek
3  *
4  * This file is part of OTest2.
5  *
6  * OTest2 is free software: you can redistribute it and/or modify it under
7  * the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License,
9  * or (at your option) any later version.
10  *
11  * OTest2 is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14  * License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with OTest2. If not, see <http://www.gnu.org/licenses/>.
18  */
19 #ifndef OTest2__LIB_HIRSCHBERG_H_
20 #define OTest2__LIB_HIRSCHBERG_H_
21 
22 #include <assert.h>
23 #include <cstdint>
24 #include <tuple>
25 #include <vector>
26 
27 #include <otest2/difflogreverse.h>
28 
29 namespace OTest2 {
30 
31 enum class DiffAction : uint8_t {
32  SUBSTR = 0,
33  CHANGE,
34  INSERT,
35  DELETE,
36 };
37 
41 template<typename Type_>
42 class DiffScoreLCS {
43  public:
44  std::tuple<bool, int> scoreSub(
45  int step_,
46  const Type_ left_[],
47  int left_index_,
48  const Type_ right_[],
49  int right_index_) const {
50  bool same_(left_[left_index_] == right_[right_index_]);
51  return std::make_tuple(same_, same_ ? 1 : -1);
52  }
53 
54  int scoreDel(
55  int step_,
56  const Type_ right_[],
57  int right_index_) const {
58  return -1;
59  }
60 
61  int scoreIns(
62  int step_,
63  const Type_ left_[],
64  int left_index_) const {
65  return -1;
66  }
67 
69  int score1_,
70  int score2_) const {
71  return score1_ > score2_;
72  }
73 };
74 
75 template<typename Type_, typename ScoreFce_ = DiffScoreLCS<Type_> >
76 class Hirschberg {
77  private:
78  struct ScoreRecord {
79  DiffAction action;
80  int score;
81  };
82 
83  static ScoreRecord selectAction(
84  const ScoreFce_& score_fce_,
85  int step_,
86  const Type_ left_[],
87  int left_index_,
88  const Type_ right_[],
89  int right_index_,
90  const ScoreRecord& sub_prev_,
91  const ScoreRecord& del_prev_,
92  const ScoreRecord& ins_prev_) {
93  /* -- compute scores for subsequence, insertion and deletion */
94  bool same_;
95  auto sub_result_(score_fce_.scoreSub(
96  step_, left_, left_index_, right_, right_index_));
97  int sub_score_(sub_prev_.score + std::get<1>(sub_result_));
98  int del_score_(
99  del_prev_.score + score_fce_.scoreDel(step_, right_, right_index_));
100  int ins_score_(
101  ins_prev_.score + score_fce_.scoreIns(step_, left_, left_index_));
102 
103  /* -- find the extreme */
104  if(score_fce_.betterScore(del_score_, ins_score_)) {
105  if(score_fce_.betterScore(del_score_, sub_score_)) {
106  return {DiffAction::DELETE, del_score_};
107  }
108  else {
109  if(std::get<0>(sub_result_))
110  return {DiffAction::SUBSTR, sub_score_};
111  else
112  return {DiffAction::CHANGE, sub_score_};
113  }
114  }
115  else {
116  if(score_fce_.betterScore(ins_score_, sub_score_)) {
117  return {DiffAction::INSERT, ins_score_};
118  }
119  else {
120  if(std::get<0>(sub_result_))
121  return {DiffAction::SUBSTR, sub_score_};
122  else
123  return {DiffAction::CHANGE, sub_score_};
124  }
125  }
126  }
127 
128  static void diffScore(
129  const ScoreFce_& score_fce_,
130  const Type_ left_[],
131  int left_begin_,
132  int left_end_,
133  const Type_ right_[],
134  int right_begin_,
135  int right_end_,
136  int step_,
137  std::vector<ScoreRecord>& score_) {
138  assert(step_ == 1 || step_ == -1);
139 
140  /* -- initialize first line */
141  const int left_len_(score_.size());
142  score_[0] = {DiffAction::SUBSTR, 0};
143  for(int i_(1); i_ < left_len_; ++i_) {
144  score_[i_] = {
146  score_[i_ - 1].score + score_fce_.scoreIns(
147  step_,
148  left_,
149  left_begin_ + (i_ - 1) * step_)};
150  }
151 
152  /* -- compute the scores */
153  std::vector<ScoreRecord> next_line_(left_len_, {DiffAction::INSERT, 0});
154  for(; right_begin_ != right_end_; right_begin_ += step_) {
155  next_line_[0] = {
157  score_[0].score + score_fce_.scoreDel(step_, right_,right_begin_)};
158  for(int i_(1); i_ < left_len_; ++i_) {
159  next_line_[i_] = selectAction(
160  score_fce_,
161  step_,
162  left_,
163  left_begin_ + (i_ - 1) * step_,
164  right_,
165  right_begin_,
166  score_[i_ - 1],
167  score_[i_],
168  next_line_[i_ - 1]);
169  }
170  score_.swap(next_line_);
171  }
172  }
173 
174  static void diffImpl(
175  const ScoreFce_& score_fce_,
176  const Type_ left_[],
177  int left_begin_,
178  int left_end_,
179  const Type_ right_[],
180  int right_begin_,
181  int right_end_,
182  DiffLogBuilder& diff_log_) {
183  assert(left_begin_ >= 0 && left_end_ >= 0);
184  assert(right_begin_ >= 0 && right_end_ >= 0);
185  assert(left_begin_ <= left_end_ && right_begin_ <= right_end_);
186 
187  /* -- left is empty, the right tail is deleted */
188  if(left_begin_ == left_end_) {
189  for(int i_(right_begin_); i_ < right_end_; ++i_)
190  diff_log_.addDelete(i_);
191  }
192  /* -- right is empty, the left tail must be inserted */
193  else if(right_begin_ == right_end_) {
194  for(int i_(left_begin_); i_ < left_end_; ++i_)
195  diff_log_.addInsert(i_);
196  }
197  /* -- only one line */
198  else if(left_begin_ + 1 == left_end_) {
199  ScoreRecord last_sub_{DiffAction::SUBSTR, 0};
200  ScoreRecord last_del_{
202  score_fce_.scoreIns(1, left_, left_begin_)};
203  int index_(-1);
204  DiffAction change_action_;
205  for(int i_(right_begin_); i_ < right_end_; ++i_) {
206  ScoreRecord last_ins_{
208  last_sub_.score + score_fce_.scoreDel(1, right_, i_)};
209  last_del_ = selectAction(
210  score_fce_,
211  1,
212  left_,
213  left_begin_,
214  right_,
215  i_,
216  last_sub_,
217  last_del_,
218  last_ins_);
219  if(last_del_.action == DiffAction::CHANGE
220  || last_del_.action == DiffAction::SUBSTR) {
221  index_ = i_;
222  change_action_ = last_del_.action;
223  }
224  last_sub_ = last_ins_;
225  }
226  for(int i_(right_begin_); i_ < right_end_; ++i_) {
227  if(i_ != index_)
228  diff_log_.addDelete(i_);
229  else if(change_action_ == DiffAction::CHANGE)
230  diff_log_.addChange(left_begin_, i_);
231  else
232  diff_log_.addMatch(left_begin_, i_);
233  }
234  }
235  /* -- only one column */
236  else if(right_begin_ + 1 == right_end_) {
237  ScoreRecord last_sub_{DiffAction::SUBSTR, 0};
238  ScoreRecord last_ins_{
240  score_fce_.scoreDel(1, right_, right_begin_)};
241  int index_(-1);
242  DiffAction change_action_;
243  for(int i_(left_begin_); i_ < left_end_; ++i_) {
244  ScoreRecord last_del_{
246  last_sub_.score + score_fce_.scoreIns(1, left_, i_)};
247  last_ins_ = selectAction(
248  score_fce_,
249  1,
250  left_,
251  i_,
252  right_,
253  right_begin_,
254  last_sub_,
255  last_del_,
256  last_ins_);
257  if(last_ins_.action == DiffAction::CHANGE
258  || last_ins_.action == DiffAction::SUBSTR) {
259  index_ = i_;
260  change_action_ = last_ins_.action;
261  }
262  last_sub_ = last_del_;
263  }
264  for(int i_(left_begin_); i_ < left_end_; ++i_) {
265  if(i_ != index_)
266  diff_log_.addInsert(i_);
267  else if(change_action_ == DiffAction::CHANGE)
268  diff_log_.addChange(i_, right_begin_);
269  else
270  diff_log_.addMatch(i_, right_begin_);
271  }
272  }
273  /* -- the algorithm core */
274  else {
275  const int rmid_((right_begin_ + right_end_) / 2);
276  const int left_len_(left_end_ - left_begin_);
277  int lmid_(0);
278 
279  {
280  /* -- prefixes */
281  std::vector<ScoreRecord> score_pre_(left_len_ + 1);
282  diffScore(
283  score_fce_,
284  left_,
285  left_begin_,
286  left_end_,
287  right_,
288  right_begin_,
289  rmid_,
290  1,
291  score_pre_);
292 
293  /* -- suffixes */
294  std::vector<ScoreRecord> score_suf_(left_len_ + 1);
295  diffScore(
296  score_fce_,
297  left_,
298  left_end_ - 1,
299  left_begin_ - 1,
300  right_,
301  right_end_ - 1,
302  rmid_ - 1,
303  -1,
304  score_suf_);
305 
306  /* -- find maximal score */
307  int score_max_(score_pre_[0].score + score_suf_[left_len_].score);
308  for(int i_(1); i_ <= left_len_; ++i_) {
309  const int score_(score_pre_[i_].score + score_suf_[left_len_ - i_].score);
310  if(score_ > score_max_) {
311  score_max_ = score_;
312  lmid_ = i_;
313  }
314  }
315  }
316 
317  /* -- recurrent invocation */
318  diffImpl(
319  score_fce_,
320  left_,
321  left_begin_,
322  left_begin_ + lmid_,
323  right_,
324  right_begin_,
325  rmid_,
326  diff_log_);
327  diffImpl(
328  score_fce_,
329  left_,
330  left_begin_ + lmid_,
331  left_end_,
332  right_,
333  rmid_,
334  right_end_,
335  diff_log_);
336  }
337  }
338 
339  public:
358  static void diff(
359  const ScoreFce_& score_fce_,
360  const Type_ left_[],
361  std::size_t left_len_,
362  const Type_ right_[],
363  std::size_t right_len_,
364  DiffLogBuilder& diff_log_) {
365  if(left_len_ <= right_len_) {
366  diffImpl(
367  score_fce_, left_, 0, left_len_, right_, 0, right_len_, diff_log_);
368  }
369  else {
370  /* -- we can spare memory - if last 2 lines for the left sequence
371  * are computed and kept in scoring algorithm. If the left
372  * sequence is shorter, the lines are shorter too. */
373  DiffLogBuilderReverse reverse_(&diff_log_);
374  diffImpl(
375  score_fce_, right_, 0, right_len_, left_, 0, left_len_, reverse_);
376  }
377  }
378 };
379 
390 template<typename Type_, typename ScoreFce_ = DiffScoreLCS<Type_> >
392  const Type_ left_[],
393  std::size_t left_len_,
394  const Type_ right_[],
395  std::size_t right_len_,
396  DiffLogBuilder& log_builder_,
397  ScoreFce_ score_fce_ = ScoreFce_()) {
398  /* -- do the job */
400  score_fce_, left_, left_len_, right_, right_len_, log_builder_);
401 }
402 
411 template<typename Type_, typename ScoreFce_ = DiffScoreLCS<Type_> >
413  const std::vector<Type_>& left_,
414  const std::vector<Type_>& right_,
415  DiffLogBuilder& log_builder_,
416  ScoreFce_ score_fce_ = ScoreFce_()) {
417  /* -- do the job */
419  score_fce_,
420  left_.data(),
421  left_.size(),
422  right_.data(),
423  right_.size(),
424  log_builder_);
425 }
426 
427 } /* -- namespace OTest2 */
428 
429 #endif /* OTest2__LIB_HIRSCHBERG_H_ */
OTest2::Hirschberg
Definition: hirschberg.h:76
OTest2::DiffLogBuilder::addMatch
virtual void addMatch(int left_index_, int right_index_)=0
Add match of characters in both sequences.
OTest2::hirschbergDiff
void hirschbergDiff(const Type_ left_[], std::size_t left_len_, const Type_ right_[], std::size_t right_len_, DiffLogArray &diff_log_, ScoreFce_ score_fce_=ScoreFce_())
Compute the diff algorithm for specified sequences.
Definition: difflogarray.h:100
OTest2::DiffScoreLCS::scoreIns
int scoreIns(int step_, const Type_ left_[], int left_index_) const
Definition: hirschberg.h:61
OTest2::DiffAction
DiffAction
Definition: hirschberg.h:31
OTest2::DiffAction::CHANGE
@ CHANGE
OTest2::DiffLogBuilder
Generic interface of the log builder.
Definition: difflogbuilder.h:30
OTest2::Hirschberg::diff
static void diff(const ScoreFce_ &score_fce_, const Type_ left_[], std::size_t left_len_, const Type_ right_[], std::size_t right_len_, DiffLogBuilder &diff_log_)
Compute the difference of two sequences.
Definition: hirschberg.h:358
OTest2
Definition: assertbean.h:25
OTest2::DiffAction::INSERT
@ INSERT
OTest2::DiffLogBuilder::addChange
virtual void addChange(int left_index_, int right_index_)=0
Add change of both sequences.
OTest2::DiffScoreLCS
Ordinary scoring function - standard LCS algorithm.
Definition: hirschberg.h:42
OTest2::DiffLogBuilderReverse
Simple decorator - inversion of operations.
Definition: difflogreverse.h:29
difflogreverse.h
OTest2::DiffScoreLCS::scoreDel
int scoreDel(int step_, const Type_ right_[], int right_index_) const
Definition: hirschberg.h:54
OTest2::DiffLogBuilder::addInsert
virtual void addInsert(int left_index_)=0
Add inserted item to the left sequence.
OTest2::DiffScoreLCS::betterScore
bool betterScore(int score1_, int score2_) const
Definition: hirschberg.h:68
OTest2::DiffAction::SUBSTR
@ SUBSTR
OTest2::DiffAction::DELETE
@ DELETE
OTest2::DiffScoreLCS::scoreSub
std::tuple< bool, int > scoreSub(int step_, const Type_ left_[], int left_index_, const Type_ right_[], int right_index_) const
Definition: hirschberg.h:44
OTest2::DiffLogBuilder::addDelete
virtual void addDelete(int right_index_)=0
Add deleted item from the right sequence.