Commit d8a7941d authored by Bettina's avatar Bettina Committed by Commit Bot

Remove log statements in phishing_dom_feature_extractor.

Also add more tests in phishing_url_feature_extractor.

Bug: 1069793
Change-Id: Iebc588915d1fcd003bd942419fae9c20323f25ca
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2242228Reviewed-by: default avatarDaniel Rubery <drubery@chromium.org>
Commit-Queue: Bettina Dea <bdea@chromium.org>
Cr-Commit-Position: refs/heads/master@{#778586}
parent 5e4d9ec7
......@@ -9,7 +9,6 @@
#include "base/bind.h"
#include "base/compiler_specific.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/single_thread_task_runner.h"
#include "base/strings/string_util.h"
......@@ -110,7 +109,9 @@ PhishingDOMFeatureExtractor::PhishingDOMFeatureExtractor()
PhishingDOMFeatureExtractor::~PhishingDOMFeatureExtractor() {
// The RenderView should have called CancelPendingExtraction() before
// we are destroyed.
CheckNoPendingExtraction();
DCHECK(done_callback_.is_null());
DCHECK(!cur_frame_data_.get());
DCHECK(cur_document_.IsNull());
}
void PhishingDOMFeatureExtractor::ExtractFeatures(blink::WebDocument document,
......@@ -118,7 +119,9 @@ void PhishingDOMFeatureExtractor::ExtractFeatures(blink::WebDocument document,
DoneCallback done_callback) {
// The RenderView should have called CancelPendingExtraction() before
// starting a new extraction, so DCHECK this.
CheckNoPendingExtraction();
DCHECK(done_callback_.is_null());
DCHECK(!cur_frame_data_.get());
DCHECK(cur_document_.IsNull());
// However, in an opt build, we will go ahead and clean up the pending
// extraction so that we can start in a known state.
CancelPendingExtraction();
......@@ -192,7 +195,6 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() {
base::TimeTicks now = clock_->NowTicks();
if (now - page_feature_state_->start_time >=
base::TimeDelta::FromMilliseconds(kMaxTotalTimeMs)) {
DLOG(ERROR) << "Feature extraction took too long, giving up";
// We expect this to happen infrequently, so record when it does.
UMA_HISTOGRAM_COUNTS_1M("SBClientPhishing.DOMFeatureTimeout", 1);
RunCallback(false);
......@@ -232,20 +234,16 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() {
void PhishingDOMFeatureExtractor::HandleLink(
const blink::WebElement& element) {
// Count the number of times we link to a different host.
if (!element.HasAttribute("href")) {
DVLOG(1) << "Skipping anchor tag with no href";
if (!element.HasAttribute("href"))
return;
}
// Retrieve the link and resolve the link in case it's relative.
blink::WebURL full_url = CompleteURL(element, element.GetAttribute("href"));
std::string domain;
bool is_external = IsExternalDomain(full_url, &domain);
if (domain.empty()) {
DVLOG(1) << "Could not extract domain from link: " << full_url;
if (domain.empty())
return;
}
if (is_external) {
++page_feature_state_->external_links;
......@@ -278,10 +276,8 @@ void PhishingDOMFeatureExtractor::HandleForm(
std::string domain;
bool is_external = IsExternalDomain(full_url, &domain);
if (domain.empty()) {
DVLOG(1) << "Could not extract domain from form action: " << full_url;
if (domain.empty())
return;
}
if (is_external) {
++page_feature_state_->action_other_domain;
......@@ -291,22 +287,16 @@ void PhishingDOMFeatureExtractor::HandleForm(
void PhishingDOMFeatureExtractor::HandleImage(
const blink::WebElement& element) {
if (!element.HasAttribute("src")) {
DVLOG(1) << "Skipping img tag with no src";
}
// Record whether the image points to a different domain.
blink::WebURL full_url = CompleteURL(element, element.GetAttribute("src"));
std::string domain;
bool is_external = IsExternalDomain(full_url, &domain);
if (domain.empty()) {
DVLOG(1) << "Could not extract domain from image src: " << full_url;
if (domain.empty())
return;
}
if (is_external) {
if (is_external)
++page_feature_state_->img_other_domain;
}
++page_feature_state_->total_imgs;
}
......@@ -339,17 +329,6 @@ void PhishingDOMFeatureExtractor::HandleScript(
++page_feature_state_->num_script_tags;
}
void PhishingDOMFeatureExtractor::CheckNoPendingExtraction() {
DCHECK(done_callback_.is_null());
DCHECK(!cur_frame_data_.get());
DCHECK(cur_document_.IsNull());
if (!done_callback_.is_null() || cur_frame_data_.get() ||
!cur_document_.IsNull()) {
LOG(ERROR) << "Extraction in progress, missing call to "
<< "CancelPendingExtraction";
}
}
void PhishingDOMFeatureExtractor::RunCallback(bool success) {
// Record some timing stats that we can use to evaluate feature extraction
// performance. These include both successful and failed extractions.
......
......@@ -88,11 +88,6 @@ class PhishingDOMFeatureExtractor {
void HandleInput(const blink::WebElement& element);
void HandleScript(const blink::WebElement& element);
// Helper to verify that there is no pending feature extraction. Dies in
// debug builds if the state is not as expected. This is a no-op in release
// builds.
void CheckNoPendingExtraction();
// Runs |done_callback_| and then clears all internal state.
void RunCallback(bool success);
......
......@@ -6,6 +6,8 @@
#include <string>
#include <vector>
#include "base/format_macros.h"
#include "base/strings/stringprintf.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/renderer/safe_browsing/test_utils.h"
#include "testing/gmock/include/gmock/gmock.h"
......@@ -25,10 +27,24 @@ class PhishingUrlFeatureExtractorTest : public ::testing::Test {
PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens(full,
tokens);
}
void FillFeatureMap(size_t count, FeatureMap* features) {
for (size_t i = 0; i < count; ++i) {
EXPECT_TRUE(
features->AddBooleanFeature(base::StringPrintf("Feature%" PRIuS, i)));
}
}
};
TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
std::string url = "http://123.0.0.1/mydocuments/a.file.html";
FeatureMap features;
// If feature map is already full, features cannot be extracted.
FillFeatureMap(FeatureMap::kMaxFeatureMapSize, &features);
ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
features.Clear();
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kUrlHostIsIpAddress);
expected_features.AddBooleanFeature(features::kUrlPathToken +
......@@ -38,9 +54,13 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
expected_features.AddBooleanFeature(features::kUrlPathToken +
std::string("html"));
FeatureMap features;
ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
ExpectFeatureMapsAreEqual(features, expected_features);
// If feature map is already full, features cannot be extracted.
features.Clear();
FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 1, &features);
ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
features.Clear();
url = "http://www.www.cnn.co.uk/sports/sports/index.html?shouldnotappear";
expected_features.Clear();
......@@ -61,6 +81,10 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
features.Clear();
ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
ExpectFeatureMapsAreEqual(features, expected_features);
features.Clear();
// If feature map is already full, features cannot be extracted.
FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 5, &features);
ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
url = "http://justadomain.com/";
expected_features.Clear();
......@@ -72,6 +96,10 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
features.Clear();
ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
ExpectFeatureMapsAreEqual(features, expected_features);
// If feature map is already full, features cannot be extracted.
features.Clear();
FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 1, &features);
ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
url = "http://witharef.com/#abc";
expected_features.Clear();
......@@ -96,6 +124,10 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
features.Clear();
ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
ExpectFeatureMapsAreEqual(features, expected_features);
// If feature map is already full, features cannot be extracted.
features.Clear();
FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 2, &features);
ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
url = "http://unrecognized.tld/";
EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment