data_extensions.rb

  1# frozen_string_literal: true
  2
  3require "regexp-examples"
  4
  5class Rantly
  6	module Data
  7		module Extensions
  8			REGEXP_EXAMPLES_OPTS = {
  9				max_group_results: 10,
 10				max_repeater_variance: 10
 11			}.freeze
 12
 13			# @see https://github.com/mnestorov/regex-patterns
 14			# @return [String]
 15			FRENCH_PHONE = /\+33[1-9]\d{8}/.freeze
 16			# @return [String]
 17			GERMAN_PHONE = /\+49[1-9]\d{3,12}/.freeze
 18			# @return [String]
 19			UK_PHONE = /\+44[1-9]\d{9,10}/.freeze
 20			# @return [String]
 21			SPANISH_PHONE = /\+34[6-9]\d{8}/.freeze
 22			# @return [String]
 23			ITALIAN_PHONE = /\+39[0-9]{9,10}/.freeze
 24
 25			NON_ASCII_CHAR = /[À-ÿĀ-ſЀ-ӿ一-俿]/.freeze
 26			ASCII_PRINT_CHAR = /[[:print:]]/.freeze
 27
 28			SEGMENT_SIZE_GSM7 = 160
 29			SEGMENT_SIZE_UCS2 = 70
 30
 31			# @return [String]
 32			def french_phone
 33				FRENCH_PHONE.random_example(**REGEXP_EXAMPLES_OPTS)
 34			end
 35
 36			# @return [String]
 37			def german_phone
 38				GERMAN_PHONE.random_example(**REGEXP_EXAMPLES_OPTS)
 39			end
 40
 41			# @return [String]
 42			def uk_phone
 43				UK_PHONE.random_example(**REGEXP_EXAMPLES_OPTS)
 44			end
 45
 46			# @return [String]
 47			def spanish_phone
 48				SPANISH_PHONE.random_example(**REGEXP_EXAMPLES_OPTS)
 49			end
 50
 51			# @return [String]
 52			def italian_phone
 53				ITALIAN_PHONE.random_example(**REGEXP_EXAMPLES_OPTS)
 54			end
 55
 56			# @return [String]
 57			def non_nanp_phone
 58				send(
 59					choose(
 60						:french_phone, :german_phone, :uk_phone,
 61						:spanish_phone, :italian_phone
 62					)
 63				)
 64			end
 65
 66			# @note https://stackoverflow.com/questions/6478875/regular-expression-matching-e-164-formatted-phone-numbers
 67			# @return [String]
 68			def nanpa_phone
 69				"+1" +
 70					sized(1) { string(/[2-9]/) } +
 71					sized(2) { string(/[0-9]/) } +
 72					sized(1) { string(/[2-9]/) } +
 73					sized(6) { string(/[0-9]/) }
 74			end
 75
 76			# @note https://stackoverflow.com/questions/4894198/how-to-generate-a-random-date-in-ruby
 77			# @return [String]
 78			def iso8601(from = 0.0, to = Time.now)
 79				value { Time.at(from + float * (to.to_f - from.to_f)).iso8601 }
 80			end
 81
 82			# @return [String]
 83			def bare_jid
 84				local = sized(range(3, 12)) { string(:alnum) }
 85				domain = sized(range(3, 8)) { string(:lower) }
 86				"#{local}@#{domain}.example.com"
 87			end
 88
 89			# @return [String]
 90			def bw_message_id
 91				sized(range(6, 19)) { string(:alnum) }
 92			end
 93
 94			# @return [String]
 95			def shortcode
 96				range(10000, 999999).to_s
 97			end
 98
 99			# @return [Array<String>]
100			HTTP_ESCAPABLE = " &=?/+@#".chars.freeze
101
102			# @return [String]
103			def maybe_http_escapable_string
104				base = sized(range(3, 8)) { string(:alnum) }
105				choose(
106					base,
107					base + choose(*HTTP_ESCAPABLE) +
108						sized(range(1, 5)) { string(:alnum) }
109				)
110			end
111
112			# @return [String]
113			def media_url
114				user_id = sized(range(3, 10)) { string(:alnum) }
115				name = sized(range(3, 12)) { string(:alnum) }
116				ext = choose(".jpg", ".png", ".gif", ".mp4", ".pdf", ".smil", ".txt", ".xml")
117				"https://messaging.bandwidth.com/api/v2/users/#{user_id}/media/#{name}#{ext}"
118			end
119
120			# @param ascii_only [Boolean, nil] truthy=force ASCII, nil=random
121			# @param nil_pct [Integer] weight (out of 100) for nil result
122			# @param empty_pct [Integer] weight (out of 100) for empty string result
123			# @return [String, nil]
124			def message_body(ascii_only: nil, nil_pct: 2, empty_pct: 2)
125				text_pct = 100 - nil_pct - empty_pct
126
127				freq(
128					[nil_pct, proc { nil }],
129					[empty_pct, proc { "" }],
130					[text_pct, proc { _message_body_text(ascii_only: ascii_only) }]
131				)
132			end
133
134			# @param ascii_only [Boolean, nil]
135			# @return [String]
136			def _message_body_text(ascii_only: nil)
137				use_ascii = ascii_only || boolean
138				segment_size = use_ascii ? SEGMENT_SIZE_GSM7 : SEGMENT_SIZE_UCS2
139				threshold = segment_size * 3
140
141				len = freq(
142					[70, proc { range(1, threshold) }],
143					[30, proc { range(threshold + 1, threshold + segment_size * 2) }]
144				)
145
146				body = if use_ascii
147					       sized(len) { string(:print) }
148				       else
149					       _utf8_body(len)
150				       end
151
152				guard(!body.downcase.match?(BADWORDS))
153				body
154			end
155
156			# @param length [Integer]
157			# @param non_ascii_fraction [Float] 0..1
158			# @return [String]
159			def _utf8_body(length, non_ascii_fraction = 0.3)
160				n_non_ascii = [((length * non_ascii_fraction).ceil), 1].max
161				n_ascii = length - n_non_ascii
162
163				ascii_chars = Array.new(n_ascii) {
164					ASCII_PRINT_CHAR.random_example(**REGEXP_EXAMPLES_OPTS)
165				}
166				non_ascii_chars = Array.new(n_non_ascii) {
167					NON_ASCII_CHAR.random_example(**REGEXP_EXAMPLES_OPTS)
168				}
169
170				(ascii_chars + non_ascii_chars).shuffle.join
171			end
172		end
173	end
174
175	include Data::Extensions
176end