From c8bb939e7e45214f17daf8f576582e5bff2527ea Mon Sep 17 00:00:00 2001
From: Paul Clark <paul.clark@spalge.com>
Date: Sat, 13 Jan 2018 20:17:58 +1300
Subject: [PATCH] Support regex format include rules in user scripts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For user scripts with `@include` and `@exclude` directives in their
headers, if the value starts and ends with `/` then assume it is a
regular expression to be matched against a URL. This feature has been in
the original greasemonkey module since 2011 and currently in webengine
regex include rules are being blindly passed to the glob matcher.

An alternative to this approach of leaving the bracketing slashes on the
values as a kind of in-band signalling (and making the glob escaping
conditional) would be to add new members to the UserScriptData struct for
holding regex patterns.

This commit also applies cleanly to v5.8 and every release after it.

[ChangeLog][][UserScript] User script metadata parsing now supports regular
expressions in @include and @exclude rules.

Task-number: QTBUG-65484
Change-Id: Ie2e483419f6784f995131ffb7d2d5e91c2c55b1d
Reviewed-by: Jüri Valdmann <juri.valdmann@qt.io>
Reviewed-by: Szabolcs David <davidsz@inf.u-szeged.hu>
Reviewed-by: Florian Bruhin <qt-project.org@the-compiler.org>
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
---
 .../renderer/user_resource_controller.cpp     | 28 +++++++++++++++++--
 src/core/user_script.cpp                      | 17 +++++++----
 .../qmltests/data/script-with-metadata.js     |  4 ++-
 .../quick/qmltests/data/tst_userScripts.qml   | 12 +++++++-
 4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/src/core/renderer/user_resource_controller.cpp b/src/core/renderer/user_resource_controller.cpp
index bae1d95dc..eed520876 100644
--- a/src/core/renderer/user_resource_controller.cpp
+++ b/src/core/renderer/user_resource_controller.cpp
@@ -58,6 +58,8 @@
 #include "type_conversion.h"
 #include "user_script.h"
 
+#include <QRegularExpression>
+
 Q_GLOBAL_STATIC(UserResourceController, qt_webengine_userResourceController)
 
 static content::RenderView * const globalScriptsIndex = 0;
@@ -65,6 +67,28 @@ static content::RenderView * const globalScriptsIndex = 0;
 // Scripts meant to run after the load event will be run 500ms after DOMContentLoaded if the load event doesn't come within that delay.
 static const int afterLoadTimeout = 500;
 
+static bool regexMatchesURL(const std::string &pat, const GURL &url) {
+    QRegularExpression qre(QtWebEngineCore::toQt(pat));
+    qre.setPatternOptions(QRegularExpression::CaseInsensitiveOption);
+    if (!qre.isValid())
+        return false;
+    return qre.match(QtWebEngineCore::toQt(url.spec())).hasMatch();
+}
+
+static bool includeRuleMatchesURL(const std::string &pat, const GURL &url)
+{
+    // Match patterns for greasemonkey's @include and @exclude rules which can
+    // be either strings with wildcards or regular expressions.
+    if (pat.front() == '/' && pat.back() == '/') {
+        std::string re(++pat.cbegin(), --pat.cend());
+        if (regexMatchesURL(re, url))
+            return true;
+    } else if (base::MatchPattern(url.spec(), pat)) {
+        return true;
+    }
+    return false;
+}
+
 static bool scriptMatchesURL(const UserScriptData &scriptData, const GURL &url) {
     // Logic taken from Chromium (extensions/common/user_script.cc)
     bool matchFound;
@@ -82,7 +106,7 @@ static bool scriptMatchesURL(const UserScriptData &scriptData, const GURL &url)
     if (!scriptData.globs.empty()) {
         matchFound = false;
         for (auto it = scriptData.globs.begin(), end = scriptData.globs.end(); it != end; ++it) {
-            if (base::MatchPattern(url.spec(), *it))
+            if (includeRuleMatchesURL(*it, url))
                 matchFound = true;
         }
         if (!matchFound)
@@ -91,7 +115,7 @@ static bool scriptMatchesURL(const UserScriptData &scriptData, const GURL &url)
 
     if (!scriptData.excludeGlobs.empty()) {
         for (auto it = scriptData.excludeGlobs.begin(), end = scriptData.excludeGlobs.end(); it != end; ++it) {
-            if (base::MatchPattern(url.spec(), *it))
+            if (includeRuleMatchesURL(*it, url))
                 return false;
         }
     }
diff --git a/src/core/user_script.cpp b/src/core/user_script.cpp
index b33dd6a7d..9b9d66d55 100644
--- a/src/core/user_script.cpp
+++ b/src/core/user_script.cpp
@@ -244,13 +244,20 @@ void UserScript::parseMetadataHeader()
             if (GetDeclarationValue(line, kNameDeclaration, &value)) {
                 setName(toQt(value));
             } else if (GetDeclarationValue(line, kIncludeDeclaration, &value)) {
-                // We escape some characters that MatchPattern() considers special.
-                base::ReplaceSubstringsAfterOffset(&value, 0, "\\", "\\\\");
-                base::ReplaceSubstringsAfterOffset(&value, 0, "?", "\\?");
+                if (value.front() != '/' || value.back() != '/') {
+                  // The greasemonkey spec only allows for wildcards (*), so
+                  // escape the additional things which MatchPattern allows.
+                  base::ReplaceSubstringsAfterOffset(&value, 0, "\\", "\\\\");
+                  base::ReplaceSubstringsAfterOffset(&value, 0, "?", "\\?");
+                }
                 scriptData->globs.push_back(value);
             } else if (GetDeclarationValue(line, kExcludeDeclaration, &value)) {
-                base::ReplaceSubstringsAfterOffset(&value, 0, "\\", "\\\\");
-                base::ReplaceSubstringsAfterOffset(&value, 0, "?", "\\?");
+                if (value.front() != '/' || value.back() != '/') {
+                  // The greasemonkey spec only allows for wildcards (*), so
+                  // escape the additional things which MatchPattern allows.
+                  base::ReplaceSubstringsAfterOffset(&value, 0, "\\", "\\\\");
+                  base::ReplaceSubstringsAfterOffset(&value, 0, "?", "\\?");
+                }
                 scriptData->excludeGlobs.push_back(value);
             } else if (GetDeclarationValue(line, kMatchDeclaration, &value)) {
                 if (URLPattern::PARSE_SUCCESS == urlPatternParser.Parse(value))
diff --git a/tests/auto/quick/qmltests/data/script-with-metadata.js b/tests/auto/quick/qmltests/data/script-with-metadata.js
index 4dcf50f55..de2e3974c 100644
--- a/tests/auto/quick/qmltests/data/script-with-metadata.js
+++ b/tests/auto/quick/qmltests/data/script-with-metadata.js
@@ -2,8 +2,10 @@
 // @name           Test script
 // @homepageURL    http://www.qt.io/
 // @description    Test script with metadata block
-// @include        *test*.html
+// @include        *data/test*.html
+// @include        /favicon.html?$/
 // @exclude        *test2.html
+// @exclude        /test[-]iframe/
 // @run-at         document-end
 // ==/UserScript==
 
diff --git a/tests/auto/quick/qmltests/data/tst_userScripts.qml b/tests/auto/quick/qmltests/data/tst_userScripts.qml
index e9a4eba99..d7c7d5983 100644
--- a/tests/auto/quick/qmltests/data/tst_userScripts.qml
+++ b/tests/auto/quick/qmltests/data/tst_userScripts.qml
@@ -170,7 +170,7 @@ Item {
 
             webEngineView.userScripts = [ scriptWithMetadata ];
 
-            // @include *test*.html
+            // @include *data/test*.html
             webEngineView.url = Qt.resolvedUrl("test1.html");
             webEngineView.waitForLoadSucceeded();
             tryCompare(webEngineView, "title", "New title");
@@ -179,6 +179,16 @@ Item {
             webEngineView.url = Qt.resolvedUrl("test2.html");
             webEngineView.waitForLoadSucceeded();
             tryCompare(webEngineView, "title", "Test page with huge link area");
+
+            // @include /favicon.html?$/
+            webEngineView.url = Qt.resolvedUrl("favicon.html");
+            webEngineView.waitForLoadSucceeded();
+            tryCompare(webEngineView, "title", "New title");
+
+            // @exclude /test[-]iframe/
+            webEngineView.url = Qt.resolvedUrl("test-iframe.html");
+            webEngineView.waitForLoadSucceeded();
+            tryCompare(webEngineView, "title", "Test page with huge link area and iframe");
         }
 
         function test_profileWideScript() {
-- 
GitLab