diff -r 490d5cc509ed -r cf61fcea0001 wp/wp-admin/js/word-count.js --- a/wp/wp-admin/js/word-count.js Tue Jun 09 11:14:17 2015 +0000 +++ b/wp/wp-admin/js/word-count.js Mon Oct 14 17:39:30 2019 +0200 @@ -1,44 +1,220 @@ -/* global wordCountL10n */ -var wpWordCount; -(function($,undefined) { - wpWordCount = { +/** + * Word or character counting functionality. Count words or characters in a provided text string. + * + * @summary Count words or characters in a text. + * + * @namespace wp.utils + * @since 2.6.0 + */ - settings : { - strip : /<[a-zA-Z\/][^<>]*>/g, // strip HTML tags - clean : /[0-9.(),;:!?%#$¿'"_+=\\/-]+/g, // regexp to remove punctuation, etc. - w : /\S\s+/g, // word-counting regexp - c : /\S/g // char-counting regexp for asian languages - }, +( function() { + /** + * Word counting utility + * + * @namespace wp.utils.wordcounter + * @memberof wp.utils + * + * @class + * + * @param {Object} settings Optional. Key-value object containing overrides for + * settings. + * @param {RegExp} settings.HTMLRegExp Optional. Regular expression to find HTML elements. + * @param {RegExp} settings.HTMLcommentRegExp Optional. Regular expression to find HTML comments. + * @param {RegExp} settings.spaceRegExp Optional. Regular expression to find irregular space + * characters. + * @param {RegExp} settings.HTMLEntityRegExp Optional. Regular expression to find HTML entities. + * @param {RegExp} settings.connectorRegExp Optional. Regular expression to find connectors that + * split words. + * @param {RegExp} settings.removeRegExp Optional. Regular expression to find remove unwanted + * characters to reduce false-positives. + * @param {RegExp} settings.astralRegExp Optional. Regular expression to find unwanted + * characters when searching for non-words. + * @param {RegExp} settings.wordsRegExp Optional. Regular expression to find words by spaces. + * @param {RegExp} settings.characters_excluding_spacesRegExp Optional. Regular expression to find characters which + * are non-spaces. + * @param {RegExp} settings.characters_including_spacesRegExp Optional. Regular expression to find characters + * including spaces. + * @param {RegExp} settings.shortcodesRegExp Optional. Regular expression to find shortcodes. + * @param {Object} settings.l10n Optional. Localization object containing specific + * configuration for the current localization. + * @param {String} settings.l10n.type Optional. Method of finding words to count. + * @param {Array} settings.l10n.shortcodes Optional. Array of shortcodes that should be removed + * from the text. + * + * @return void + */ + function WordCounter( settings ) { + var key, + shortcodes; - block : 0, + // Apply provided settings to object settings. + if ( settings ) { + for ( key in settings ) { - wc : function(tx, type) { - var t = this, w = $('.word-count'), tc = 0; + // Only apply valid settings. + if ( settings.hasOwnProperty( key ) ) { + this.settings[ key ] = settings[ key ]; + } + } + } + + shortcodes = this.settings.l10n.shortcodes; + + // If there are any localization shortcodes, add this as type in the settings. + if ( shortcodes && shortcodes.length ) { + this.settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' ); + } + } - if ( type === undefined ) - type = wordCountL10n.type; - if ( type !== 'w' && type !== 'c' ) - type = 'w'; + // Default settings. + WordCounter.prototype.settings = { + HTMLRegExp: /<\/?[a-z][^>]*?>/gi, + HTMLcommentRegExp: //g, + spaceRegExp: / | /gi, + HTMLEntityRegExp: /&\S+?;/g, + + // \u2014 = em-dash + connectorRegExp: /--|\u2014/g, - if ( t.block ) - return; + // Characters to be removed from input text. + removeRegExp: new RegExp( [ + '[', - t.block = 1; + // Basic Latin (extract) + '\u0021-\u0040\u005B-\u0060\u007B-\u007E', + + // Latin-1 Supplement (extract) + '\u0080-\u00BF\u00D7\u00F7', - setTimeout( function() { - if ( tx ) { - tx = tx.replace( t.settings.strip, ' ' ).replace( / | /gi, ' ' ); - tx = tx.replace( t.settings.clean, '' ); - tx.replace( t.settings[type], function(){tc++;} ); - } - w.html(tc.toString()); + /* + * The following range consists of: + * General Punctuation + * Superscripts and Subscripts + * Currency Symbols + * Combining Diacritical Marks for Symbols + * Letterlike Symbols + * Number Forms + * Arrows + * Mathematical Operators + * Miscellaneous Technical + * Control Pictures + * Optical Character Recognition + * Enclosed Alphanumerics + * Box Drawing + * Block Elements + * Geometric Shapes + * Miscellaneous Symbols + * Dingbats + * Miscellaneous Mathematical Symbols-A + * Supplemental Arrows-A + * Braille Patterns + * Supplemental Arrows-B + * Miscellaneous Mathematical Symbols-B + * Supplemental Mathematical Operators + * Miscellaneous Symbols and Arrows + */ + '\u2000-\u2BFF', - setTimeout( function() { t.block = 0; }, 2000 ); - }, 1 ); - } + // Supplemental Punctuation + '\u2E00-\u2E7F', + ']' + ].join( '' ), 'g' ), + + // Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF + astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, + wordsRegExp: /\S\s+/g, + characters_excluding_spacesRegExp: /\S/g, + + /* + * Match anything that is not a formatting character, excluding: + * \f = form feed + * \n = new line + * \r = carriage return + * \t = tab + * \v = vertical tab + * \u00AD = soft hyphen + * \u2028 = line separator + * \u2029 = paragraph separator + */ + characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g, + l10n: window.wordCountL10n || {} }; - $(document).bind( 'wpcountwords', function(e, txt) { - wpWordCount.wc(txt); - }); -}(jQuery)); + /** + * Counts the number of words (or other specified type) in the specified text. + * + * @summary Count the number of elements in a text. + * + * @since 2.6.0 + * @memberof wp.utils.wordcounter + * + * @param {String} text Text to count elements in. + * @param {String} type Optional. Specify type to use. + * + * @return {Number} The number of items counted. + */ + WordCounter.prototype.count = function( text, type ) { + var count = 0; + + // Use default type if none was provided. + type = type || this.settings.l10n.type; + + // Sanitize type to one of three possibilities: 'words', 'characters_excluding_spaces' or 'characters_including_spaces'. + if ( type !== 'characters_excluding_spaces' && type !== 'characters_including_spaces' ) { + type = 'words'; + } + + // If we have any text at all. + if ( text ) { + text = text + '\n'; + + // Replace all HTML with a new-line. + text = text.replace( this.settings.HTMLRegExp, '\n' ); + + // Remove all HTML comments. + text = text.replace( this.settings.HTMLcommentRegExp, '' ); + + // If a shortcode regular expression has been provided use it to remove shortcodes. + if ( this.settings.shortcodesRegExp ) { + text = text.replace( this.settings.shortcodesRegExp, '\n' ); + } + + // Normalize non-breaking space to a normal space. + text = text.replace( this.settings.spaceRegExp, ' ' ); + + if ( type === 'words' ) { + + // Remove HTML Entities. + text = text.replace( this.settings.HTMLEntityRegExp, '' ); + + // Convert connectors to spaces to count attached text as words. + text = text.replace( this.settings.connectorRegExp, ' ' ); + + // Remove unwanted characters. + text = text.replace( this.settings.removeRegExp, '' ); + } else { + + // Convert HTML Entities to "a". + text = text.replace( this.settings.HTMLEntityRegExp, 'a' ); + + // Remove surrogate points. + text = text.replace( this.settings.astralRegExp, 'a' ); + } + + // Match with the selected type regular expression to count the items. + text = text.match( this.settings[ type + 'RegExp' ] ); + + // If we have any matches, set the count to the number of items found. + if ( text ) { + count = text.length; + } + } + + return count; + }; + + // Add the WordCounter to the WP Utils. + window.wp = window.wp || {}; + window.wp.utils = window.wp.utils || {}; + window.wp.utils.WordCounter = WordCounter; +} )();