From 29b5b46c87979f49e2d1c4e574429ac649ff2bdf Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 3 Dec 2017 17:28:30 -0600 Subject: [PATCH 1/6] Strip HTML from keyword mute input. #234. --- app/lib/feed_manager.rb | 17 +---------------- app/models/glitch/filter_helper.rb | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 16 deletions(-) create mode 100644 app/models/glitch/filter_helper.rb diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index fe5ebfc36..7507b37d2 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -178,22 +178,7 @@ class FeedManager end def keyword_filter?(status, receiver_id) - text_matcher = Glitch::KeywordMute.text_matcher_for(receiver_id) - tag_matcher = Glitch::KeywordMute.tag_matcher_for(receiver_id) - - should_filter = text_matcher.matches?(status.text) - should_filter ||= text_matcher.matches?(status.spoiler_text) - should_filter ||= tag_matcher.matches?(status.tags) - - if status.reblog? - reblog = status.reblog - - should_filter ||= text_matcher.matches?(reblog.text) - should_filter ||= text_matcher.matches?(reblog.spoiler_text) - should_filter ||= tag_matcher.matches?(status.tags) - end - - should_filter + Glitch::FilterHelper.new(receiver_id).matches?(status) end def filter_from_mentions?(status, receiver_id) diff --git a/app/models/glitch/filter_helper.rb b/app/models/glitch/filter_helper.rb new file mode 100644 index 000000000..0dfd69526 --- /dev/null +++ b/app/models/glitch/filter_helper.rb @@ -0,0 +1,23 @@ +class Glitch::FilterHelper + include ActionView::Helpers::SanitizeHelper + + attr_reader :text_matcher + attr_reader :tag_matcher + + def initialize(receiver_id) + @text_matcher = Glitch::KeywordMute.text_matcher_for(receiver_id) + @tag_matcher = Glitch::KeywordMute.tag_matcher_for(receiver_id) + end + + def matches?(status) + matchers_match?(status) || (status.reblog? && matchers_match?(status.reblog)) + end + + private + + def matchers_match?(status) + text_matcher.matches?(strip_tags(status.text)) || + text_matcher.matches?(strip_tags(status.spoiler_text)) || + tag_matcher.matches?(status.tags) + end +end From d263e3bc2de720f4e6749cf4a8f2074427b65d07 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 3 Dec 2017 21:40:28 -0600 Subject: [PATCH 2/6] Fill out some examples for Glitch::FilterHelper. #234. Also add HTML entity decoding to Glitch::FilterHelper, which is needed to e.g. match "<" to the tag-stripped version of "

<3

" or "

<3

". --- app/models/glitch/filter_helper.rb | 16 ++++++--- spec/models/glitch/filter_helper_spec.rb | 43 ++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 spec/models/glitch/filter_helper_spec.rb diff --git a/app/models/glitch/filter_helper.rb b/app/models/glitch/filter_helper.rb index 0dfd69526..11be877c1 100644 --- a/app/models/glitch/filter_helper.rb +++ b/app/models/glitch/filter_helper.rb @@ -1,12 +1,16 @@ +require 'htmlentities' + class Glitch::FilterHelper include ActionView::Helpers::SanitizeHelper attr_reader :text_matcher attr_reader :tag_matcher + attr_reader :entity_decoder def initialize(receiver_id) - @text_matcher = Glitch::KeywordMute.text_matcher_for(receiver_id) - @tag_matcher = Glitch::KeywordMute.tag_matcher_for(receiver_id) + @text_matcher = Glitch::KeywordMute.text_matcher_for(receiver_id) + @tag_matcher = Glitch::KeywordMute.tag_matcher_for(receiver_id) + @entity_decoder = HTMLEntities.new end def matches?(status) @@ -16,8 +20,12 @@ class Glitch::FilterHelper private def matchers_match?(status) - text_matcher.matches?(strip_tags(status.text)) || - text_matcher.matches?(strip_tags(status.spoiler_text)) || + text_matcher.matches?(prepare_text(status.text)) || + text_matcher.matches?(prepare_text(status.spoiler_text)) || tag_matcher.matches?(status.tags) end + + def prepare_text(text) + entity_decoder.decode(strip_tags(text)) + end end diff --git a/spec/models/glitch/filter_helper_spec.rb b/spec/models/glitch/filter_helper_spec.rb new file mode 100644 index 000000000..9a808667d --- /dev/null +++ b/spec/models/glitch/filter_helper_spec.rb @@ -0,0 +1,43 @@ +require 'rails_helper' + +RSpec.describe Glitch::FilterHelper do + describe '#matches?' do + let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } + let(:helper) { Glitch::FilterHelper.new(alice) } + + it 'ignores names of HTML tags in status text' do + status = Fabricate(:status, text: 'uh example') + Glitch::KeywordMute.create!(account: alice, keyword: 'addr') + + expect(helper.matches?(status)).to be false + end + + it 'ignores properties of HTML tags in status text' do + status = Fabricate(:status, text: 'uh example') + Glitch::KeywordMute.create!(account: alice, keyword: 'href') + + expect(helper.matches?(status)).to be false + end + + it 'matches text inside HTML tags' do + status = Fabricate(:status, text: '

HEY THIS IS SOMETHING ANNOYING

') + Glitch::KeywordMute.create!(account: alice, keyword: 'annoying') + + expect(helper.matches?(status)).to be true + end + + it 'matches < in HTML-stripped text' do + status = Fabricate(:status, text: '

I <3 oats

') + Glitch::KeywordMute.create!(account: alice, keyword: '<3') + + expect(helper.matches?(status)).to be true + end + + it 'matches < in HTML text' do + status = Fabricate(:status, text: '

I <3 oats

') + Glitch::KeywordMute.create!(account: alice, keyword: '<3') + + expect(helper.matches?(status)).to be true + end + end +end From 53c86b29f05049d77d17a35a0ca6287174431783 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 3 Dec 2017 21:49:55 -0600 Subject: [PATCH 3/6] Glitch::FilterHelper -> Glitch::KeywordMuteHelper. #234. The class helps out with keyword mutes, not just some general concept of "filtering". --- app/lib/feed_manager.rb | 2 +- .../glitch/{filter_helper.rb => keyword_mute_helper.rb} | 4 ++-- .../{filter_helper_spec.rb => keyword_mute_helper_spec.rb} | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) rename app/models/glitch/{filter_helper.rb => keyword_mute_helper.rb} (88%) rename spec/models/glitch/{filter_helper_spec.rb => keyword_mute_helper_spec.rb} (93%) diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index 7507b37d2..c16b25635 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -178,7 +178,7 @@ class FeedManager end def keyword_filter?(status, receiver_id) - Glitch::FilterHelper.new(receiver_id).matches?(status) + Glitch::KeywordMuteHelper.new(receiver_id).matches?(status) end def filter_from_mentions?(status, receiver_id) diff --git a/app/models/glitch/filter_helper.rb b/app/models/glitch/keyword_mute_helper.rb similarity index 88% rename from app/models/glitch/filter_helper.rb rename to app/models/glitch/keyword_mute_helper.rb index 11be877c1..1b8c64e4e 100644 --- a/app/models/glitch/filter_helper.rb +++ b/app/models/glitch/keyword_mute_helper.rb @@ -1,6 +1,6 @@ require 'htmlentities' -class Glitch::FilterHelper +class Glitch::KeywordMuteHelper include ActionView::Helpers::SanitizeHelper attr_reader :text_matcher @@ -26,6 +26,6 @@ class Glitch::FilterHelper end def prepare_text(text) - entity_decoder.decode(strip_tags(text)) + entity_decoder.decode(strip_tags(text)).tap { |x| puts x } end end diff --git a/spec/models/glitch/filter_helper_spec.rb b/spec/models/glitch/keyword_mute_helper_spec.rb similarity index 93% rename from spec/models/glitch/filter_helper_spec.rb rename to spec/models/glitch/keyword_mute_helper_spec.rb index 9a808667d..9d09e58da 100644 --- a/spec/models/glitch/filter_helper_spec.rb +++ b/spec/models/glitch/keyword_mute_helper_spec.rb @@ -1,9 +1,9 @@ require 'rails_helper' -RSpec.describe Glitch::FilterHelper do +RSpec.describe Glitch::KeywordMuteHelper do describe '#matches?' do let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } - let(:helper) { Glitch::FilterHelper.new(alice) } + let(:helper) { Glitch::KeywordMuteHelper.new(alice) } it 'ignores names of HTML tags in status text' do status = Fabricate(:status, text: 'uh example') From 9105b0c95428e3bbecd6f8ad106508095eed5643 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 10 Feb 2018 02:32:39 -0600 Subject: [PATCH 4/6] Introduce html2text for extracting plaintext from statuses. #236. Unlike strip_tags, html2text will preserve text present in other nodes, e.g. anchor tags: [1] pry(main)> str = 'A link' => "A link" [2] pry(main)> Html2Text.convert(str) => "[A link](http://www.example.com)" [3] pry(main)> include ActionView::Helpers::SanitizeHelper => Object [4] pry(main)> strip_tags(str) => "A link" Preserving the href of an anchor allows keyword mutes to also match on URLs, which is something that the frontend regex filter can currently do. --- Gemfile | 1 + Gemfile.lock | 3 +++ 2 files changed, 4 insertions(+) diff --git a/Gemfile b/Gemfile index 1d128d657..d2cd3b42d 100644 --- a/Gemfile +++ b/Gemfile @@ -42,6 +42,7 @@ gem 'fast_blank', '~> 1.0' gem 'goldfinger', '~> 2.1' gem 'hiredis', '~> 0.6' gem 'redis-namespace', '~> 1.5' +gem 'html2text' gem 'htmlentities', '~> 4.3' gem 'http', '~> 3.0' gem 'http_accept_language', '~> 2.1' diff --git a/Gemfile.lock b/Gemfile.lock index 3a65f35a5..3400b1a0f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -205,6 +205,8 @@ GEM highline (1.7.10) hiredis (0.6.1) hkdf (0.3.0) + html2text (0.2.1) + nokogiri (~> 1.6) htmlentities (4.3.4) http (3.0.0) addressable (~> 2.3) @@ -601,6 +603,7 @@ DEPENDENCIES goldfinger (~> 2.1) hamlit-rails (~> 0.2) hiredis (~> 0.6) + html2text htmlentities (~> 4.3) http (~> 3.0) http_accept_language (~> 2.1) From f1f67c46c5b369476090429a46dbc646d772ae25 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 10 Feb 2018 10:32:14 -0600 Subject: [PATCH 5/6] Use Html2Text to generate plaintext for keyword mutes. #236. This allows us to match URLs inside link hrefs. --- app/models/glitch/keyword_mute_helper.rb | 8 ++------ spec/models/glitch/keyword_mute_helper_spec.rb | 7 +++++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/app/models/glitch/keyword_mute_helper.rb b/app/models/glitch/keyword_mute_helper.rb index 1b8c64e4e..6d067947f 100644 --- a/app/models/glitch/keyword_mute_helper.rb +++ b/app/models/glitch/keyword_mute_helper.rb @@ -1,16 +1,12 @@ -require 'htmlentities' +require 'html2text' class Glitch::KeywordMuteHelper - include ActionView::Helpers::SanitizeHelper - attr_reader :text_matcher attr_reader :tag_matcher - attr_reader :entity_decoder def initialize(receiver_id) @text_matcher = Glitch::KeywordMute.text_matcher_for(receiver_id) @tag_matcher = Glitch::KeywordMute.tag_matcher_for(receiver_id) - @entity_decoder = HTMLEntities.new end def matches?(status) @@ -26,6 +22,6 @@ class Glitch::KeywordMuteHelper end def prepare_text(text) - entity_decoder.decode(strip_tags(text)).tap { |x| puts x } + Html2Text.convert(text) end end diff --git a/spec/models/glitch/keyword_mute_helper_spec.rb b/spec/models/glitch/keyword_mute_helper_spec.rb index 9d09e58da..b3f991d5b 100644 --- a/spec/models/glitch/keyword_mute_helper_spec.rb +++ b/spec/models/glitch/keyword_mute_helper_spec.rb @@ -39,5 +39,12 @@ RSpec.describe Glitch::KeywordMuteHelper do expect(helper.matches?(status)).to be true end + + it 'matches link hrefs in HTML text' do + status = Fabricate(:status, text: '

yep

') + Glitch::KeywordMute.create!(account: alice, keyword: 'milk') + + expect(helper.matches?(status)).to be true + end end end From 380989def863fb8eafbd7f6b53f8bfbf9f813528 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 10 Feb 2018 11:01:49 -0600 Subject: [PATCH 6/6] Remove NOKOGIRI_USE_SYSTEM_LIBRARIES from Travis. #236. Nokogiri linked to the version of libxml2 shipped in Travis' Trusty image exhibits incorrect behavior when parsing strings resembling

I <3 oats

In particular, Html2Text.convert (which uses nokogiri) will return "I" for the above string when it ought to return "I <3 oats". The version of libxml2 shipped with nokogiri 1.8.1+ exhibits correct parse behavior for this string, as does the libxml2 present in Ubuntu Xenial. --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c9a442aed..505d8683e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,7 +18,6 @@ env: - LOCAL_DOMAIN=cb6e6126.ngrok.io - LOCAL_HTTPS=true - RAILS_ENV=test - - NOKOGIRI_USE_SYSTEM_LIBRARIES=true - PARALLEL_TEST_PROCESSORS=2 - "PATH=$HOME:$PATH"