| /////////////////////////////////////////////////////////////////////// |
| // File: detlinefit.h |
| // Description: Deterministic least upper-quartile squares line fitting. |
| // Author: Ray Smith |
| // Created: Thu Feb 28 14:35:01 PDT 2008 |
| // |
| // (C) Copyright 2008, Google Inc. |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| /////////////////////////////////////////////////////////////////////// |
| |
| #ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_ |
| #define TESSERACT_CCSTRUCT_DETLINEFIT_H_ |
| |
| #include "points.h" |
| |
| namespace tesseract { |
| |
| // This class fits a line to a set of ICOORD points. |
| // There is no restriction on the direction of the line, as it |
| // uses a vector method, ie no concern over infinite gradients. |
| // The fitted line has the least upper quartile of squares of perpendicular |
| // distances of all source points from the line, subject to the constraint |
| // that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}] |
| // i.e. the 9 combinations of one of the first 3 and last 3 points. |
| // A fundamental assumption of this algorithm is that one of the first 3 and |
| // one of the last 3 points are near the best line fit. |
| // The points must be Added in line order for the algorithm to work properly. |
| // No floating point calculations are needed* to make an accurate fit, |
| // and no random numbers are needed** so the algorithm is deterministic, |
| // architecture-stable, and compiler-stable as well as stable to minor |
| // changes in the input. |
| // *A single floating point division is used to compute each line's distance. |
| // This is unlikely to result in choice of a different line, but if it does, |
| // it would be easy to replace with a 64 bit integer calculation. |
| // **Random numbers are used in the nth_item function, but the worst |
| // non-determinism that can result is picking a different result among equals, |
| // and that wouldn't make any difference to the end-result distance, so the |
| // randomness does not affect the determinism of the algorithm. The random |
| // numbers are only there to guarantee average linear time. |
| // Fitting time is linear, but with a high constant, as it tries 9 different |
| // lines and computes the distance of all points each time. |
| // This class is aimed at replacing the LLSQ (linear least squares) and |
| // LMS (least median of squares) classes that are currently used for most |
| // of the line fitting in Tesseract. |
| class DetLineFit { |
| public: |
| DetLineFit(); |
| ~DetLineFit(); |
| |
| // Delete all Added points. |
| void Clear(); |
| |
| // Add a new point. Takes a copy - the pt doesn't need to stay in scope. |
| // Add must be called on points in sequence along the line. |
| void Add(const ICOORD& pt); |
| |
| // Fit a line to the points, returning the fitted line as a pair of |
| // points, and the upper quartile error. |
| double Fit(ICOORD* pt1, ICOORD* pt2); |
| |
| private: |
| double ComputeErrors(const ICOORD start, const ICOORD end, int* distances); |
| |
| ICOORDELT_LIST pt_list_; // All the added points. |
| }; |
| |
| } // namespace tesseract. |
| |
| #endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_ |
| |
| |