harden message_body generator

Phillip Davis created

as such, resulting nil/"" + fix tests:
- guard s.body with .to_s in to_catapult_possible_oob
- accept OOB-only stanzas in message handler guard
- fix tests that don't handle nil/empty message_body
- fix the main handler's guard to accept stanzas with a body OR media
- test_invisible_separator fixed, needed to bypass changes to the body
  from XML roundtrip
- discovered unholy geometries in webmock's JSON parsing

Change summary

sgx-bwmsgsv2.rb                                    |  8 
test/property/generators/message.rb                |  2 
test/property/rantly_extensions/data_extensions.rb | 66 ++++++++++++++-
test/property/test_invisible_separator.rb          | 23 +++--
test/property/test_to_catapult.rb                  |  2 
test/property/test_webhook_handler.rb              |  8 +
6 files changed, 83 insertions(+), 26 deletions(-)

Detailed changes

sgx-bwmsgsv2.rb πŸ”—

@@ -417,7 +417,7 @@ module SGXbwmsgsv2
 		end
 		LOG.debug "Found OOB URL node, checking MMS eligibility"
 
-		body = s.respond_to?(:body) ? s.body : ''
+		body = s.respond_to?(:body) ? s.body.to_s : ''
 
 		if num_dest.is_a?(String) && num_dest !~ /^\+?1/
 			unless body.include?(un.text)
@@ -597,7 +597,7 @@ module SGXbwmsgsv2
 		true
 	end
 
-	message :body do |m|
+	message(->(m) { m.body || m.at("oob|x > oob|url", oob: "jabber:x:oob") }) do |m|
 		EMPromise.all([
 			validate_num(m),
 			fetch_catapult_cred_for(m.from)
@@ -1182,8 +1182,8 @@ class WebhookHandler < Goliath::API
 					return [200, {}, "OK"]
 				end
 			else
-				text = "unknown type (#{type})"\
-					" with text: " + jparams['text']
+				text = "unknown type (#{type})" \
+					" with text: #{jparams['text']}"
 
 				# TODO: log/notify of this properly
 				LOG.warn("Unknown inbound message type", text: text)

test/property/rantly_extensions/data_extensions.rb πŸ”—

@@ -22,6 +22,12 @@ class Rantly
 			# @return [String]
 			ITALIAN_PHONE = /\+39[0-9]{9,10}/.freeze
 
+			NON_ASCII_CHAR = /[Γ€-ΓΏΔ€-ΕΏΠ€-ΣΏδΈ€-δΏΏ]/.freeze
+			ASCII_PRINT_CHAR = /[[:print:]]/.freeze
+
+			SEGMENT_SIZE_GSM7 = 160
+			SEGMENT_SIZE_UCS2 = 70
+
 			# @return [String]
 			def french_phone
 				FRENCH_PHONE.random_example(**REGEXP_EXAMPLES_OPTS)
@@ -103,13 +109,6 @@ class Rantly
 				)
 			end
 
-			# @return [String]
-			def message_body
-				words = array(range(1, 6)) { sized(range(3, 10)) { string(:alnum) } }
-				guard(words.none? { |w| BADWORD_LIST.include?(w.downcase) })
-				words.join(" ")
-			end
-
 			# @return [String]
 			def media_url
 				user_id = sized(range(3, 10)) { string(:alnum) }
@@ -117,6 +116,59 @@ class Rantly
 				ext = choose(".jpg", ".png", ".gif", ".mp4", ".pdf", ".smil", ".txt", ".xml")
 				"https://messaging.bandwidth.com/api/v2/users/#{user_id}/media/#{name}#{ext}"
 			end
+
+			# @param ascii_only [Boolean, nil] truthy=force ASCII, nil=random
+			# @param nil_pct [Integer] weight (out of 100) for nil result
+			# @param empty_pct [Integer] weight (out of 100) for empty string result
+			# @return [String, nil]
+			def message_body(ascii_only: nil, nil_pct: 2, empty_pct: 2)
+				text_pct = 100 - nil_pct - empty_pct
+
+				freq(
+					[nil_pct, proc { nil }],
+					[empty_pct, proc { "" }],
+					[text_pct, proc { _message_body_text(ascii_only: ascii_only) }]
+				)
+			end
+
+			# @param ascii_only [Boolean, nil]
+			# @return [String]
+			def _message_body_text(ascii_only: nil)
+				use_ascii = ascii_only || boolean
+				segment_size = use_ascii ? SEGMENT_SIZE_GSM7 : SEGMENT_SIZE_UCS2
+				threshold = segment_size * 3
+
+				len = freq(
+					[70, proc { range(1, threshold) }],
+					[30, proc { range(threshold + 1, threshold + segment_size * 2) }]
+				)
+
+				body = if use_ascii
+					       sized(len) { string(:print) }
+				       else
+					       _utf8_body(len)
+				       end
+
+				guard(!body.downcase.match?(BADWORDS))
+				body
+			end
+
+			# @param length [Integer]
+			# @param non_ascii_fraction [Float] 0..1
+			# @return [String]
+			def _utf8_body(length, non_ascii_fraction = 0.3)
+				n_non_ascii = [((length * non_ascii_fraction).ceil), 1].max
+				n_ascii = length - n_non_ascii
+
+				ascii_chars = Array.new(n_ascii) {
+					ASCII_PRINT_CHAR.random_example(**REGEXP_EXAMPLES_OPTS)
+				}
+				non_ascii_chars = Array.new(n_non_ascii) {
+					NON_ASCII_CHAR.random_example(**REGEXP_EXAMPLES_OPTS)
+				}
+
+				(ascii_chars + non_ascii_chars).shuffle.join
+			end
 		end
 	end
 

test/property/test_invisible_separator.rb πŸ”—

@@ -17,7 +17,7 @@ class InvisibleSeparatorPropertyTest < Minitest::Test
 
 	def test_message_containing_invisible_separator_is_rejected
 		property_of {
-			chars = message_body.chars
+			chars = message_body(nil_pct: 0).chars
 			insertions = range(1, 3)
 			insertions.times { chars.insert(range(0, chars.length), "\u2063") }
 			body = chars.join
@@ -52,24 +52,27 @@ class InvisibleSeparatorPropertyTest < Minitest::Test
 	def test_message_without_invisible_separator_is_not_rejected_as_unavailable
 		property_of {
 			dest = nanpa_phone
-			[message_body, dest]
+			[message_body(nil_pct: 0, empty_pct: 0), dest]
 		}.check { |body, dest|
 			reset_stanzas!
 			reset_redis!
 			WebMock.reset!
 
-			bw_req = stub_request(:post, BW_MESSAGES_URL).with(
-				body: hash_including(
-					to: dest,
-					text: body
-				)
-			).to_return(
+			m = Blather::Stanza::Message.new("#{dest}@component", body)
+			m.from = "test@example.com"
+
+			# WebMock's JSON parser does ad-hoc conversion to YAML (!!!!)
+			# then returns the parsed YAML object and pretends it's JSON.
+			# The YAML parser chokes on (at least) backslashes. Avoid that
+			# mess when you can.
+			bw_req = stub_request(:post, BW_MESSAGES_URL).with { |req|
+				parsed = ::JSON.parse(req.body)
+				parsed["to"] == dest && parsed["text"] == m.body.to_s
+			}.to_return(
 				status: 201,
 				body: JSON.dump(id: "bw-msg-stub")
 			)
 
-			m = Blather::Stanza::Message.new("#{dest}@component", body)
-			m.from = "test@example.com"
 			process_stanza(m)
 
 			assert_empty written,

test/property/test_to_catapult.rb πŸ”—

@@ -15,7 +15,7 @@ class ToCatapultPropertyTest < Minitest::Test
 			resource = maybe_http_escapable_string
 			status = choose(201, 202)
 
-			[dest, message_body, stanza_id, resource, status]
+			[dest, message_body(nil_pct: 0, empty_pct: 0), stanza_id, resource, status]
 		}.check { |dest, body, stanza_id, resource, status|
 			reset_stanzas!
 			reset_redis!

test/property/test_webhook_handler.rb πŸ”—

@@ -255,6 +255,7 @@ class WebhookPropertyTest < Minitest::Test
 								[top_level_to] +
 								array(integer(2)) { nanpa_phone }
 						}
+						.text { message_body(nil_pct: 0, empty_pct: 0) }
 						.generate(registered, jid, dir)
 				}
 				.type { "message-received" }
@@ -303,6 +304,7 @@ class WebhookPropertyTest < Minitest::Test
 								[top_level_to] +
 								array(integer(2)) { nanpa_phone }
 						}
+						.text { message_body(nil_pct: 0, empty_pct: 0) }
 						.generate(registered, jid, dir)
 				}
 				.generate
@@ -332,7 +334,7 @@ class WebhookPropertyTest < Minitest::Test
 	end
 	em :test_inbound_resend_emits_correct_resend_stream_event
 
-	def test_inbound_empty_text_no_media_returns_400
+	def test_inbound_empty_or_nil_text_no_media_returns_400
 		property_of {
 			Webhook
 				.new(REDIS)
@@ -341,7 +343,7 @@ class WebhookPropertyTest < Minitest::Test
 					Message
 						.new(REDIS)
 						.to { [nanpa_phone] }
-						.text { "" }
+						.text { choose(nil, "") }
 						.media { nil }
 						.generate(registered, jid, dir)
 				}
@@ -354,7 +356,7 @@ class WebhookPropertyTest < Minitest::Test
 			assert_empty entries
 		}
 	end
-	em :test_inbound_empty_text_no_media_returns_400
+	em :test_inbound_empty_or_nil_text_no_media_returns_400
 
 	def test_inbound_unknown_type_sends_notification
 		property_of {