2022-07-13 08:03:28 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
class HashtagNormalizer
|
|
|
|
def normalize(str)
|
|
|
|
remove_invalid_characters(ascii_folding(lowercase(cjk_width(str))))
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def remove_invalid_characters(str)
|
2022-11-09 22:49:30 -06:00
|
|
|
str.gsub(Tag::HASHTAG_INVALID_CHARS_RE, '')
|
2022-07-13 08:03:28 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def ascii_folding(str)
|
|
|
|
ASCIIFolding.new.fold(str)
|
|
|
|
end
|
|
|
|
|
|
|
|
def lowercase(str)
|
|
|
|
str.mb_chars.downcase.to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
def cjk_width(str)
|
|
|
|
str.unicode_normalize(:nfkc)
|
|
|
|
end
|
|
|
|
end
|