Commit c6b021cc authored by Oswald Buddenhagen's avatar Oswald Buddenhagen Committed by The Qt Project
Browse files

make StringSimilarityMatcher instantiation cheap


... by removing an allocation.

move some of the functions out of the class to avoid polluting the header.

Change-Id: If0d3638215e59f7d88be7217e4d3abcbfd7a201e
Reviewed-by: default avatarhjk <qthjk@ovi.com>
parent e9b9b2d6
dev 5.10 5.11 5.12 5.12.1 5.12.10 5.12.11 5.12.12 5.12.2 5.12.3 5.12.4 5.12.5 5.12.6 5.12.7 5.12.8 5.12.9 5.13 5.13.0 5.13.1 5.13.2 5.14 5.14.0 5.14.1 5.14.2 5.15 5.15.0 5.15.1 5.15.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 5.9.8 6.0 6.0.0 6.1 6.1.0 6.1.1 6.1.2 6.1.3 6.2 6.2.0 6.2.1 6.2.2 old/5.0 old/5.1 old/5.2 wip/cmake wip/winrt v5.15.0-alpha1 v5.14.1 v5.14.0 v5.14.0-rc2 v5.14.0-rc1 v5.14.0-beta3 v5.14.0-beta2 v5.14.0-beta1 v5.14.0-alpha1 v5.13.2 v5.13.1 v5.13.0 v5.13.0-rc3 v5.13.0-rc2 v5.13.0-rc1 v5.13.0-beta4 v5.13.0-beta3 v5.13.0-beta2 v5.13.0-beta1 v5.13.0-alpha1 v5.12.7 v5.12.6 v5.12.5 v5.12.4 v5.12.3 v5.12.2 v5.12.1 v5.12.0 v5.12.0-rc2 v5.12.0-rc1 v5.12.0-beta4 v5.12.0-beta3 v5.12.0-beta2 v5.12.0-beta1 v5.12.0-alpha1 v5.11.3 v5.11.2 v5.11.1 v5.11.0 v5.11.0-rc2 v5.11.0-rc1 v5.11.0-beta4 v5.11.0-beta3 v5.11.0-beta2 v5.11.0-beta1 v5.11.0-alpha1 v5.10.1 v5.10.0 v5.10.0-rc3 v5.10.0-rc2 v5.10.0-rc1 v5.10.0-beta4 v5.10.0-beta3 v5.10.0-beta2 v5.10.0-beta1 v5.10.0-alpha1 v5.9.9 v5.9.8 v5.9.7 v5.9.6 v5.9.5 v5.9.4 v5.9.3 v5.9.2 v5.9.1 v5.9.0 v5.9.0-rc2 v5.9.0-rc1 v5.9.0-beta4 v5.9.0-beta3 v5.9.0-beta2 v5.9.0-beta1 v5.9.0-alpha1 v5.8.0 v5.8.0-rc1 v5.8.0-beta1 v5.8.0-alpha1 v5.7.1 v5.7.0 v5.7.0-rc1 v5.7.0-beta1 v5.7.0-alpha1 v5.6.3 v5.6.2 v5.6.1 v5.6.1-1 v5.6.0 v5.6.0-rc1 v5.6.0-beta1 v5.6.0-alpha1 v5.5.1 v5.5.0 v5.5.0-rc1 v5.5.0-beta1 v5.5.0-alpha1 v5.4.2 v5.4.1 v5.4.0 v5.4.0-rc1 v5.4.0-beta1 v5.4.0-alpha1 v5.3.2 v5.3.1 v5.3.0 v5.3.0-rc1 v5.3.0-beta1 v5.3.0-alpha1 v5.2.1 v5.2.0 v5.2.0-rc1 v5.2.0-beta1 v5.2.0-alpha1 v5.1.1 v5.1.0 v5.1.0-rc2 v5.1.0-rc1 v5.1.0-beta1 v5.1.0-alpha1 v5.0.2 v5.0.1 v5.0.0 v5.0.0-rc2 v5.0.0-rc1 v5.0.0-beta2
No related merge requests found
Showing with 47 additions and 51 deletions
...@@ -132,50 +132,38 @@ static const int bitCount[256] = { ...@@ -132,50 +132,38 @@ static const int bitCount[256] = {
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
}; };
struct CoMatrix static inline void setCoOccurence(CoMatrix &m, char c, char d)
{ {
int k = indexOf[(uchar) c] + 20 * indexOf[(uchar) d];
m.b[k >> 3] |= (1 << (k & 0x7));
}
CoMatrix::CoMatrix(const QString &str)
{
QByteArray ba = str.toUtf8();
const char *text = ba.constData();
char c = '\0', d;
memset( b, 0, 52 );
/* /*
The matrix has 20 * 20 = 400 entries. This requires 50 bytes, or 13 The Knuth books are not in the office only for show; they help make
words. Some operations are performed on words for more efficiency. loops 30% faster and 20% as readable.
*/ */
union { while ( (d = *text) != '\0' ) {
quint8 b[52]; setCoOccurence(*this, c, d);
quint32 w[13]; if ( (c = *++text) != '\0' ) {
}; setCoOccurence(*this, d, c);
text++;
CoMatrix() { memset( b, 0, 52 ); }
CoMatrix(const QString &str)
{
QByteArray ba = str.toUtf8();
const char *text = ba.constData();
char c = '\0', d;
memset( b, 0, 52 );
/*
The Knuth books are not in the office only for show; they help make
loops 30% faster and 20% as readable.
*/
while ( (d = *text) != '\0' ) {
setCoOccurence( c, d );
if ( (c = *++text) != '\0' ) {
setCoOccurence( d, c );
text++;
}
} }
} }
}
void setCoOccurence( char c, char d ) { static inline int worth(const CoMatrix &m)
int k = indexOf[(uchar) c] + 20 * indexOf[(uchar) d]; {
b[k >> 3] |= (1 << (k & 0x7)); int w = 0;
} for (int i = 0; i < 50; i++)
w += bitCount[m.b[i]];
int worth() const { return w;
int w = 0; }
for ( int i = 0; i < 50; i++ )
w += bitCount[b[i]];
return w;
}
};
static inline CoMatrix reunion(const CoMatrix &m, const CoMatrix &n) static inline CoMatrix reunion(const CoMatrix &m, const CoMatrix &n)
{ {
...@@ -194,8 +182,8 @@ static inline CoMatrix intersection(const CoMatrix &m, const CoMatrix &n) ...@@ -194,8 +182,8 @@ static inline CoMatrix intersection(const CoMatrix &m, const CoMatrix &n)
} }
StringSimilarityMatcher::StringSimilarityMatcher(const QString &stringToMatch) StringSimilarityMatcher::StringSimilarityMatcher(const QString &stringToMatch)
: m_cm(stringToMatch)
{ {
m_cm = new CoMatrix(stringToMatch);
m_length = stringToMatch.length(); m_length = stringToMatch.length();
} }
...@@ -203,16 +191,11 @@ int StringSimilarityMatcher::getSimilarityScore(const QString &strCandidate) ...@@ -203,16 +191,11 @@ int StringSimilarityMatcher::getSimilarityScore(const QString &strCandidate)
{ {
CoMatrix cmTarget(strCandidate); CoMatrix cmTarget(strCandidate);
int delta = qAbs(m_length - strCandidate.size()); int delta = qAbs(m_length - strCandidate.size());
int score = ( (intersection(*m_cm, cmTarget).worth() + 1) << 10 ) / int score = ( (worth(intersection(m_cm, cmTarget)) + 1) << 10 ) /
( reunion(*m_cm, cmTarget).worth() + (delta << 1) + 1 ); ( worth(reunion(m_cm, cmTarget)) + (delta << 1) + 1 );
return score; return score;
} }
StringSimilarityMatcher::~StringSimilarityMatcher()
{
delete m_cm;
}
/** /**
* Checks how similar two strings are. * Checks how similar two strings are.
* The return value is the score, and a higher score is more similar * The return value is the score, and a higher score is more similar
...@@ -226,8 +209,8 @@ int getSimilarityScore(const QString &str1, const QString &str2) ...@@ -226,8 +209,8 @@ int getSimilarityScore(const QString &str1, const QString &str2)
CoMatrix cm(str1); CoMatrix cm(str1);
int delta = qAbs(str1.size() - str2.size()); int delta = qAbs(str1.size() - str2.size());
int score = ( (intersection(cm, cmTarget).worth() + 1) << 10 ) int score = ( (worth(intersection(cm, cmTarget)) + 1) << 10 )
/ ( reunion(cm, cmTarget).worth() + (delta << 1) + 1 ); / ( worth(reunion(cm, cmTarget)) + (delta << 1) + 1 );
return score; return score;
} }
......
...@@ -71,7 +71,21 @@ inline bool operator!=( const Candidate& c, const Candidate& d ) { ...@@ -71,7 +71,21 @@ inline bool operator!=( const Candidate& c, const Candidate& d ) {
typedef QList<Candidate> CandidateList; typedef QList<Candidate> CandidateList;
struct CoMatrix; struct CoMatrix
{
CoMatrix(const QString &str);
CoMatrix() {}
/*
The matrix has 20 * 20 = 400 entries. This requires 50 bytes, or 13
words. Some operations are performed on words for more efficiency.
*/
union {
quint8 b[52];
quint32 w[13];
};
};
/** /**
* This class is more efficient for searching through a large array of candidate strings, since we only * This class is more efficient for searching through a large array of candidate strings, since we only
* have to construct the CoMatrix for the \a stringToMatch once, * have to construct the CoMatrix for the \a stringToMatch once,
...@@ -81,11 +95,10 @@ struct CoMatrix; ...@@ -81,11 +95,10 @@ struct CoMatrix;
class StringSimilarityMatcher { class StringSimilarityMatcher {
public: public:
StringSimilarityMatcher(const QString &stringToMatch); StringSimilarityMatcher(const QString &stringToMatch);
~StringSimilarityMatcher();
int getSimilarityScore(const QString &strCandidate); int getSimilarityScore(const QString &strCandidate);
private: private:
CoMatrix *m_cm; CoMatrix m_cm;
int m_length; int m_length;
}; };
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment