Commit 5575f886 authored by cjhopman@chromium.org's avatar cjhopman@chromium.org

Roll DomDistillerJs (and make updater generate BUG=)

The updater now pulls BUG= lines out of the git log and 
processes/sorts/uniqifies them and prints them for even better 
commit messages.

Picked up changes:
6ef4764 Extend visibility checks to images that are extracted.
6e8c385 Updated Boilerpipe with some more modern tags.
cde81dc Add extract_text_only option
0a31800 filter out invisible elements
82b35ba Fix crash when content extraction fails.

BUG=354157,356336,362085,364356,367243

Review URL: https://codereview.chromium.org/297563002

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@271582 0039d316-1c4b-4281-b951-d872f2087c98
parent 6474bc33
Name: dom-distiller-js Name: dom-distiller-js
URL: https://code.google.com/p/dom-distiller URL: https://code.google.com/p/dom-distiller
Version: a15dc85486 Version: 6ef4764253
License: BSD License: BSD
Security Critical: yes Security Critical: yes
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -27,4 +27,6 @@ message DomDistillerResult { ...@@ -27,4 +27,6 @@ message DomDistillerResult {
} }
message DomDistillerOptions { message DomDistillerOptions {
// Whether to extract only the text (or to include the containing html).
optional bool extract_text_only = 1;
} }
...@@ -167,6 +167,13 @@ namespace dom_distiller { ...@@ -167,6 +167,13 @@ namespace dom_distiller {
dom_distiller::proto::DomDistillerOptions message; dom_distiller::proto::DomDistillerOptions message;
const base::DictionaryValue* dict; const base::DictionaryValue* dict;
if (!json->GetAsDictionary(&dict)) goto error; if (!json->GetAsDictionary(&dict)) goto error;
if (dict->HasKey("1")) {
bool field_value;
if (!dict->GetBoolean("1", &field_value)) {
goto error;
}
message.set_extract_text_only(field_value);
}
return message; return message;
error: error:
...@@ -175,6 +182,9 @@ namespace dom_distiller { ...@@ -175,6 +182,9 @@ namespace dom_distiller {
static scoped_ptr<base::Value> WriteToValue(const dom_distiller::proto::DomDistillerOptions& message) { static scoped_ptr<base::Value> WriteToValue(const dom_distiller::proto::DomDistillerOptions& message) {
scoped_ptr<base::DictionaryValue> dict(new base::DictionaryValue()); scoped_ptr<base::DictionaryValue> dict(new base::DictionaryValue());
if (message.has_extract_text_only()) {
dict->SetBoolean("1", message.extract_text_only());
}
return dict.PassAs<base::Value>(); return dict.PassAs<base::Value>();
} }
}; };
......
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
dom_distiller_js_package=$dom_distiller_js_path/package dom_distiller_js_package=$dom_distiller_js_path/package
readme_chromium=$dom_distiller_js_path/README.chromium readme_chromium=$dom_distiller_js_path/README.chromium
tmpdir=/tmp/domdistiller-$$ tmpdir=/tmp/domdistiller-$$
changes=/tmp/domdistiller.changes changes=$tmpdir/domdistiller.changes
bugs=$tmpdir/domdistiller.bugs
curr_gitsha=$(grep 'Version:' $readme_chromium | awk '{print $2}') curr_gitsha=$(grep 'Version:' $readme_chromium | awk '{print $2}')
rm -rf $tmpdir rm -rf $tmpdir
...@@ -23,8 +24,27 @@ ...@@ -23,8 +24,27 @@
pushd $tmpdir pushd $tmpdir
git clone https://code.google.com/p/dom-distiller/ . git clone https://code.google.com/p/dom-distiller/ .
new_gitsha=$(git rev-parse --short=10 HEAD) new_gitsha=$(git rev-parse --short=10 HEAD)
git log --oneline ${curr_gitsha}.. > $changes git log --oneline ${curr_gitsha}.. > $changes
echo -n BUG= > $bugs
# This extracts BUG= lines from the log, extracts the numbers part, removes
# whitespace and deletes empty lines. Then, split on ',', sort, uniquify and
# rejoin. Finally, remove the trailing ',' and concat to $bugs.
git log \
| grep BUG= \
| sed -e 's/.*BUG=\(.*\)/\1/' -e 's/\s*//g' -e '/^$/d' \
| tr ',' '\n' \
| sort \
| uniq \
| tr '\n' ',' \
| head --bytes=-1 \
>> $bugs
echo >> $bugs # add a newline
ant package ant package
popd popd
...@@ -34,13 +54,18 @@ ...@@ -34,13 +54,18 @@
cp $tmpdir/LICENSE $dom_distiller_js_path/ cp $tmpdir/LICENSE $dom_distiller_js_path/
sed -i "s/Version: [0-9a-f]*/Version: $new_gitsha/" $readme_chromium sed -i "s/Version: [0-9a-f]*/Version: $new_gitsha/" $readme_chromium
echo
echo
echo "---Generated commit message---"
echo
echo "Picked up changes:" echo "Picked up changes:"
cat $changes cat $changes
echo
cat $bugs
# Run checklicenses.py on the pulled files, but only print the output on # Run checklicenses.py on the pulled files, but only print the output on
# failures. # failures.
tools/checklicenses/checklicenses.py $dom_distiller_js_path > $tmpdir/checklicenses.out || cat $tmpdir/checklicenses.out tools/checklicenses/checklicenses.py $dom_distiller_js_path > $tmpdir/checklicenses.out || cat $tmpdir/checklicenses.out
rm -rf $tmpdir rm -rf $tmpdir
rm $changes
) )
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment