wp/wp-admin/js/word-count.js
changeset 7 cf61fcea0001
parent 5 5e2f62d02dcd
child 9 177826044cd9
--- a/wp/wp-admin/js/word-count.js	Tue Jun 09 11:14:17 2015 +0000
+++ b/wp/wp-admin/js/word-count.js	Mon Oct 14 17:39:30 2019 +0200
@@ -1,44 +1,220 @@
-/* global wordCountL10n */
-var wpWordCount;
-(function($,undefined) {
-	wpWordCount = {
+/**
+ * Word or character counting functionality. Count words or characters in a provided text string.
+ *
+ * @summary   Count words or characters in a text.
+ *
+ * @namespace wp.utils
+ * @since     2.6.0
+ */
 
-		settings : {
-			strip : /<[a-zA-Z\/][^<>]*>/g, // strip HTML tags
-			clean : /[0-9.(),;:!?%#$¿'"_+=\\/-]+/g, // regexp to remove punctuation, etc.
-			w : /\S\s+/g, // word-counting regexp
-			c : /\S/g // char-counting regexp for asian languages
-		},
+( function() {
+	/**
+	 * Word counting utility
+	 *
+	 * @namespace wp.utils.wordcounter
+	 * @memberof  wp.utils
+	 *
+	 * @class
+	 *
+	 * @param {Object} settings                                   Optional. Key-value object containing overrides for
+	 *                                                            settings.
+	 * @param {RegExp} settings.HTMLRegExp                        Optional. Regular expression to find HTML elements.
+	 * @param {RegExp} settings.HTMLcommentRegExp                 Optional. Regular expression to find HTML comments.
+	 * @param {RegExp} settings.spaceRegExp                       Optional. Regular expression to find irregular space
+	 *                                                            characters.
+	 * @param {RegExp} settings.HTMLEntityRegExp                  Optional. Regular expression to find HTML entities.
+	 * @param {RegExp} settings.connectorRegExp                   Optional. Regular expression to find connectors that
+	 *                                                            split words.
+	 * @param {RegExp} settings.removeRegExp                      Optional. Regular expression to find remove unwanted
+	 *                                                            characters to reduce false-positives.
+	 * @param {RegExp} settings.astralRegExp                      Optional. Regular expression to find unwanted
+	 *                                                            characters when searching for non-words.
+	 * @param {RegExp} settings.wordsRegExp                       Optional. Regular expression to find words by spaces.
+	 * @param {RegExp} settings.characters_excluding_spacesRegExp Optional. Regular expression to find characters which
+	 *                                                            are non-spaces.
+	 * @param {RegExp} settings.characters_including_spacesRegExp Optional. Regular expression to find characters
+	 *                                                            including spaces.
+	 * @param {RegExp} settings.shortcodesRegExp                  Optional. Regular expression to find shortcodes.
+	 * @param {Object} settings.l10n                              Optional. Localization object containing specific
+	 *                                                            configuration for the current localization.
+	 * @param {String} settings.l10n.type                         Optional. Method of finding words to count.
+	 * @param {Array}  settings.l10n.shortcodes                   Optional. Array of shortcodes that should be removed
+	 *                                                            from the text.
+	 *
+	 * @return void
+	 */
+	function WordCounter( settings ) {
+		var key,
+			shortcodes;
 
-		block : 0,
+		// Apply provided settings to object settings.
+		if ( settings ) {
+			for ( key in settings ) {
 
-		wc : function(tx, type) {
-			var t = this, w = $('.word-count'), tc = 0;
+				// Only apply valid settings.
+				if ( settings.hasOwnProperty( key ) ) {
+					this.settings[ key ] = settings[ key ];
+				}
+			}
+		}
+
+		shortcodes = this.settings.l10n.shortcodes;
+
+		// If there are any localization shortcodes, add this as type in the settings.
+		if ( shortcodes && shortcodes.length ) {
+			this.settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' );
+		}
+	}
 
-			if ( type === undefined )
-				type = wordCountL10n.type;
-			if ( type !== 'w' && type !== 'c' )
-				type = 'w';
+	// Default settings.
+	WordCounter.prototype.settings = {
+		HTMLRegExp: /<\/?[a-z][^>]*?>/gi,
+		HTMLcommentRegExp: /<!--[\s\S]*?-->/g,
+		spaceRegExp: /&nbsp;|&#160;/gi,
+		HTMLEntityRegExp: /&\S+?;/g,
+
+		// \u2014 = em-dash
+		connectorRegExp: /--|\u2014/g,
 
-			if ( t.block )
-				return;
+		// Characters to be removed from input text.
+		removeRegExp: new RegExp( [
+			'[',
 
-			t.block = 1;
+				// Basic Latin (extract)
+				'\u0021-\u0040\u005B-\u0060\u007B-\u007E',
+
+				// Latin-1 Supplement (extract)
+				'\u0080-\u00BF\u00D7\u00F7',
 
-			setTimeout( function() {
-				if ( tx ) {
-					tx = tx.replace( t.settings.strip, ' ' ).replace( /&nbsp;|&#160;/gi, ' ' );
-					tx = tx.replace( t.settings.clean, '' );
-					tx.replace( t.settings[type], function(){tc++;} );
-				}
-				w.html(tc.toString());
+				/*
+				 * The following range consists of:
+				 * General Punctuation
+				 * Superscripts and Subscripts
+				 * Currency Symbols
+				 * Combining Diacritical Marks for Symbols
+				 * Letterlike Symbols
+				 * Number Forms
+				 * Arrows
+				 * Mathematical Operators
+				 * Miscellaneous Technical
+				 * Control Pictures
+				 * Optical Character Recognition
+				 * Enclosed Alphanumerics
+				 * Box Drawing
+				 * Block Elements
+				 * Geometric Shapes
+				 * Miscellaneous Symbols
+				 * Dingbats
+				 * Miscellaneous Mathematical Symbols-A
+				 * Supplemental Arrows-A
+				 * Braille Patterns
+				 * Supplemental Arrows-B
+				 * Miscellaneous Mathematical Symbols-B
+				 * Supplemental Mathematical Operators
+				 * Miscellaneous Symbols and Arrows
+				 */
+				'\u2000-\u2BFF',
 
-				setTimeout( function() { t.block = 0; }, 2000 );
-			}, 1 );
-		}
+				// Supplemental Punctuation
+				'\u2E00-\u2E7F',
+			']'
+		].join( '' ), 'g' ),
+
+		// Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF
+		astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
+		wordsRegExp: /\S\s+/g,
+		characters_excluding_spacesRegExp: /\S/g,
+
+		/*
+		 * Match anything that is not a formatting character, excluding:
+		 * \f = form feed
+		 * \n = new line
+		 * \r = carriage return
+		 * \t = tab
+		 * \v = vertical tab
+		 * \u00AD = soft hyphen
+		 * \u2028 = line separator
+		 * \u2029 = paragraph separator
+		 */
+		characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g,
+		l10n: window.wordCountL10n || {}
 	};
 
-	$(document).bind( 'wpcountwords', function(e, txt) {
-		wpWordCount.wc(txt);
-	});
-}(jQuery));
+	/**
+	 * Counts the number of words (or other specified type) in the specified text.
+	 *
+	 * @summary  Count the number of elements in a text.
+	 *
+	 * @since    2.6.0
+	 * @memberof wp.utils.wordcounter
+	 *
+	 * @param {String}  text Text to count elements in.
+	 * @param {String}  type Optional. Specify type to use.
+	 *
+	 * @return {Number} The number of items counted.
+	 */
+	WordCounter.prototype.count = function( text, type ) {
+		var count = 0;
+
+		// Use default type if none was provided.
+		type = type || this.settings.l10n.type;
+
+		// Sanitize type to one of three possibilities: 'words', 'characters_excluding_spaces' or 'characters_including_spaces'.
+		if ( type !== 'characters_excluding_spaces' && type !== 'characters_including_spaces' ) {
+			type = 'words';
+		}
+
+		// If we have any text at all.
+		if ( text ) {
+			text = text + '\n';
+
+			// Replace all HTML with a new-line.
+			text = text.replace( this.settings.HTMLRegExp, '\n' );
+
+			// Remove all HTML comments.
+			text = text.replace( this.settings.HTMLcommentRegExp, '' );
+
+			// If a shortcode regular expression has been provided use it to remove shortcodes.
+			if ( this.settings.shortcodesRegExp ) {
+				text = text.replace( this.settings.shortcodesRegExp, '\n' );
+			}
+
+			// Normalize non-breaking space to a normal space.
+			text = text.replace( this.settings.spaceRegExp, ' ' );
+
+			if ( type === 'words' ) {
+
+				// Remove HTML Entities.
+				text = text.replace( this.settings.HTMLEntityRegExp, '' );
+
+				// Convert connectors to spaces to count attached text as words.
+				text = text.replace( this.settings.connectorRegExp, ' ' );
+
+				// Remove unwanted characters.
+				text = text.replace( this.settings.removeRegExp, '' );
+			} else {
+
+				// Convert HTML Entities to "a".
+				text = text.replace( this.settings.HTMLEntityRegExp, 'a' );
+
+				// Remove surrogate points.
+				text = text.replace( this.settings.astralRegExp, 'a' );
+			}
+
+			// Match with the selected type regular expression to count the items.
+			text = text.match( this.settings[ type + 'RegExp' ] );
+
+			// If we have any matches, set the count to the number of items found.
+			if ( text ) {
+				count = text.length;
+			}
+		}
+
+		return count;
+	};
+
+	// Add the WordCounter to the WP Utils.
+	window.wp = window.wp || {};
+	window.wp.utils = window.wp.utils || {};
+	window.wp.utils.WordCounter = WordCounter;
+} )();