/*! * XRegExp 3.0.0-pre * * Steven Levithan © 2007-2012 MIT License */ /** * XRegExp provides augmented, extensible regular expressions. You get new syntax, flags, and * methods beyond what browsers support natively. XRegExp is also a regex utility belt with tools * to make your client-side grepping simpler and more powerful, while freeing you from worrying * about pesky cross-browser inconsistencies and the dubious `lastIndex` property. */ var XRegExp = (function(undefined) { 'use strict'; /* ============================== * Private variables * ============================== */ var // ... // Property name used for extended regex instance data REGEX_DATA = 'xregexp', // Internal reference to the `XRegExp` object self, // Optional features that can be installed and uninstalled features = { astral: false, natives: false }, // Store native methods to use and restore ('native' is an ES3 reserved keyword) nativ = { exec: RegExp.prototype.exec, test: RegExp.prototype.test, match: String.prototype.match, replace: String.prototype.replace, split: String.prototype.split }, // Storage for fixed/extended native methods fixed = {}, // Storage for regexes cached by `XRegExp.cache` cache = {}, // Storage for pattern details cached by the `XRegExp` constructor patternCache = {}, // Storage for regex syntax tokens added internally or by `XRegExp.addToken` tokens = [], // Token scopes defaultScope = 'default', classScope = 'class', // Regexes that match native regex syntax, including octals nativeTokens = { // Any native multicharacter token in default scope, or any single character 'default': /\\(?:0(?:[0-3][0-7]{0,2}|[4-7][0-7]?)?|[1-9]\d*|x[\dA-Fa-f]{2}|u[\dA-Fa-f]{4}|c[A-Za-z]|[\s\S])|\(\?[:=!]|[?*+]\?|{\d+(?:,\d*)?}\??|[\s\S]/, // Any native multicharacter token in character class scope, or any single character 'class': /\\(?:[0-3][0-7]{0,2}|[4-7][0-7]?|x[\dA-Fa-f]{2}|u[\dA-Fa-f]{4}|c[A-Za-z]|[\s\S])|[\s\S]/ }, // Any backreference or dollar-prefixed character in replacement strings replacementToken = /\$(?:{([\w$]+)}|(\d\d?|[\s\S]))/g, // Check for correct `exec` handling of nonparticipating capturing groups correctExecNpcg = nativ.exec.call(/()??/, '')[1] === undefined, // Check for flag y support hasNativeY = RegExp.prototype.sticky !== undefined, // Tracker for known flags, including addon flags registeredFlags = { g: true, i: true, m: true, y: hasNativeY }, // Shortcut to `Object.prototype.toString` toString = {}.toString, // Shortcut to `XRegExp.addToken` add; /* ============================== * Private functions * ============================== */ /** * Attaches named capture data and `XRegExp.prototype` properties to a regex object. * @private * @param {RegExp} regex Regex to augment. * @param {Array} captureNames Array with capture names, or `null`. * @param {Boolean} [addProto=false] Whether to attach `XRegExp.prototype` properties. Not * attaching properties avoids a minor performance penalty. * @returns {RegExp} Augmented regex. */ function augment(regex, captureNames, addProto) { var p; if (addProto) { // Can't auto-inherit these since the XRegExp constructor returns a nonprimitive value if (regex.__proto__) { regex.__proto__ = self.prototype; } else { for (p in self.prototype) { // A `self.prototype.hasOwnProperty(p)` check wouldn't be worth it here, since // this is performance sensitive, and enumerable `Object.prototype` or // `RegExp.prototype` extensions exist on `regex.prototype` anyway regex[p] = self.prototype[p]; } } } regex[REGEX_DATA] = {captureNames: captureNames}; return regex; } /** * Removes any duplicate characters from the provided string. * @private * @param {String} str String to remove duplicate characters from. * @returns {String} String with any duplicate characters removed. */ function clipDuplicates(str) { return nativ.replace.call(str, /([\s\S])(?=[\s\S]*\1)/g, ''); } /** * Copies a regex object while preserving special properties for named capture and augmenting with * `XRegExp.prototype` methods. The copy has a fresh `lastIndex` property (set to zero). Allows * adding and removing native flags while copying the regex. * @private * @param {RegExp} regex Regex to copy. * @param {Object} [options] Allows specifying native flags to add or remove while copying the * regex, and whether to attach `XRegExp.prototype` properties. * @returns {RegExp} Copy of the provided regex, possibly with modified flags. */ function copy(regex, options) { if (!self.isRegExp(regex)) { throw new TypeError('Type RegExp expected'); } // Get native flags in use var flags = nativ.exec.call(/\/([a-z]*)$/i, String(regex))[1]; options = options || {}; if (options.add) { flags = clipDuplicates(flags + options.add); } if (options.remove) { // Would need to escape `options.remove` if this was public flags = nativ.replace.call(flags, new RegExp('[' + options.remove + ']+', 'g'), ''); } // Augment with `XRegExp.prototype` methods, but use the native `RegExp` constructor and // avoid searching for special tokens. That would be wrong for regexes constructed by // `RegExp`, and unnecessary for regexes constructed by `XRegExp` because the regex has // already undergone the translation to native regex syntax regex = augment( new RegExp(regex.source, flags), hasNamedCapture(regex) ? regex[REGEX_DATA].captureNames.slice(0) : null, options.addProto ); return regex; } /** * Returns a new copy of the object used to hold extended regex instance data, tailored for a * native nonaugmented regex. * @private * @returns {Object} Object with base regex instance data. */ function getBaseProps() { return {captureNames: null}; } /** * Determines whether a regex has extended instance data used to track capture names. * @private * @param {RegExp} regex Regex to check. * @returns {Boolean} Whether the regex uses named capture. */ function hasNamedCapture(regex) { return !!(regex[REGEX_DATA] && regex[REGEX_DATA].captureNames); } /** * Returns the first index at which a given value can be found in an array. * @private * @param {Array} array Array to search. * @param {*} value Value to locate in the array. * @returns {Number} Zero-based index at which the item is found, or -1. */ function indexOf(array, value) { // Use the native array method, if available if (Array.prototype.indexOf) { return array.indexOf(value); } var len = array.length, i; // Not a very good shim, but good enough for XRegExp's use of it for (i = 0; i < len; ++i) { if (array[i] === value) { return i; } } return -1; } /** * Determines whether a value is of the specified type, by resolving its internal [[Class]]. * @private * @param {*} value Object to check. * @param {String} type Type to check for, in TitleCase. * @returns {Boolean} Whether the object matches the type. */ function isType(value, type) { return toString.call(value) === '[object ' + type + ']'; } /** * Checks whether the next nonignorable token after the specified position is a quantifier. * @private * @param {String} pattern Pattern to search within. * @param {Number} pos Index in `pattern` to search at. * @param {String} flags Flags used by the pattern. * @returns {Boolean} Whether the next token is a quantifier. */ function isQuantifierNext(pattern, pos, flags) { return nativ.test.call( flags.indexOf('x') > -1 ? // Ignore any leading whitespace, line comments, and inline comments /^(?:\s+|#.*|\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/ : // Ignore any leading inline comments /^(?:\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/, pattern.slice(pos) ); } /** * Checks for flag-related errors, and strips/applies flags in a leading mode modifier. Offloads * the flag preparation logic from the `XRegExp` constructor. * @private * @param {String} pattern Regex pattern, possibly with a leading mode modifier. * @param {String} flags Any combination of flags. * @returns {Object} Object with properties `pattern` and `flags`. */ function prepareFlags(pattern, flags) { var i; // Recent browsers throw on duplicate flags, so copy this behavior for nonnative flags if (clipDuplicates(flags) !== flags) { throw new SyntaxError('Invalid duplicate regex flag ' + flags); } // Strip and apply a leading mode modifier with any combination of flags except g or y pattern = nativ.replace.call(pattern, /^\(\?([\w$]+)\)/, function($0, $1) { if (nativ.test.call(/[gy]/, $1)) { throw new SyntaxError('Cannot use flag g or y in mode modifier ' + $0); } // Allow duplicate flags within the mode modifier flags = clipDuplicates(flags + $1); return ''; }); // Throw on unknown native or nonnative flags for (i = 0; i < flags.length; ++i) { if (!registeredFlags[flags.charAt(i)]) { throw new SyntaxError('Unknown regex flag ' + flags.charAt(i)); } } return { pattern: pattern, flags: flags }; } /** * Prepares an options object from the given value. * @private * @param {String|Object} value Value to convert to an options object. * @returns {Object} Options object. */ function prepareOptions(value) { value = value || {}; if (isType(value, 'String')) { value = self.forEach(value, /[^\s,]+/, function(match) { this[match] = true; }, {}); } return value; } /** * Registers a flag so it doesn't throw an 'unknown flag' error. * @private * @param {String} flag Single-character flag to register. */ function registerFlag(flag) { if (!/^[\w$]$/.test(flag)) { throw new Error('Flag must be a single character A-Za-z0-9_$'); } registeredFlags[flag] = true; } /** * Runs built-in and custom regex syntax tokens in reverse insertion order at the specified * position, until a match is found. * @private * @param {String} pattern Original pattern from which an XRegExp object is being built. * @param {String} flags Flags being used to construct the regex. * @param {Number} pos Position to search for tokens within `pattern`. * @param {Number} scope Regex scope to apply: 'default' or 'class'. * @param {Object} context Context object to use for token handler functions. * @returns {Object} Object with properties `matchLength`, `output`, and `reparse`; or `null`. */ function runTokens(pattern, flags, pos, scope, context) { var i = tokens.length, result = null, match, t; // Run in reverse insertion order while (i--) { t = tokens[i]; if ( (t.scope === scope || t.scope === 'all') && (!t.flag || flags.indexOf(t.flag) > -1) ) { match = self.exec(pattern, t.regex, pos, 'sticky'); if (match) { result = { matchLength: match[0].length, output: t.handler.call(context, match, scope, flags), reparse: t.reparse }; // Finished with token tests break; } } } return result; } /** * Enables or disables implicit astral mode opt-in. * @private * @param {Boolean} on `true` to enable; `false` to disable. */ function setAstral(on) { // Reset the pattern cache used by the `XRegExp` constructor, since the same pattern and // flags might now produce different results self.cache.flush('patterns'); features.astral = on; } /** * Enables or disables native method overrides. * @private * @param {Boolean} on `true` to enable; `false` to disable. */ function setNatives(on) { RegExp.prototype.exec = (on ? fixed : nativ).exec; RegExp.prototype.test = (on ? fixed : nativ).test; String.prototype.match = (on ? fixed : nativ).match; String.prototype.replace = (on ? fixed : nativ).replace; String.prototype.split = (on ? fixed : nativ).split; features.natives = on; } /** * Returns the object, or throws an error if it is `null` or `undefined`. This is used to follow * the ES5 abstract operation `ToObject`. * @private * @param {*} value Object to check and return. * @returns {*} The provided object. */ function toObject(value) { // This matches both `null` and `undefined` if (value == null) { throw new TypeError('Cannot convert null or undefined to object'); } return value; } /* ============================== * Constructor * ============================== */ /** * Creates an extended regular expression object for matching text with a pattern. Differs from a * native regular expression in that additional syntax and flags are supported. The returned object * is in fact a native `RegExp` and works with all native methods. * @class XRegExp * @constructor * @param {String|RegExp} pattern Regex pattern string, or an existing regex object to copy. * @param {String} [flags] Any combination of flags. * Native flags: *
  • `g` - global *
  • `i` - ignore case *
  • `m` - multiline anchors *
  • `y` - sticky (Firefox 3+) * Additional XRegExp flags: *
  • `n` - explicit capture *
  • `s` - dot matches all (aka singleline) *
  • `x` - free-spacing and line comments (aka extended) *
  • `A` - astral (requires the Unicode Base addon) * Flags cannot be provided when constructing one `RegExp` from another. * @returns {RegExp} Extended regular expression object. * @example * * // With named capture and flag x * XRegExp('(? [0-9]{4} ) -? # year \n\ * (? [0-9]{2} ) -? # month \n\ * (? [0-9]{2} ) # day ', 'x'); * * // Providing a regex object copies it. Native regexes are recompiled using native (not XRegExp) * // syntax. Copies maintain special properties for named capture, are augmented with * // `XRegExp.prototype` methods, and have fresh `lastIndex` properties (set to zero). * XRegExp(/regex/); */ self = function(pattern, flags) { var context = { hasNamedCapture: false, captureNames: [] }, scope = defaultScope, output = '', pos = 0, result, token, key; if (self.isRegExp(pattern)) { if (flags !== undefined) { throw new TypeError('Cannot supply flags when copying a RegExp'); } return copy(pattern, {addProto: true}); } // Copy the argument behavior of `RegExp` pattern = pattern === undefined ? '' : String(pattern); flags = flags === undefined ? '' : String(flags); // Cache-lookup key; intentionally using an invalid regex sequence as the separator key = pattern + '***' + flags; if (!patternCache[key]) { // Check for flag-related errors, and strip/apply flags in a leading mode modifier result = prepareFlags(pattern, flags); pattern = result.pattern; flags = result.flags; // Use XRegExp's syntax tokens to translate the pattern to a native regex pattern... // `pattern.length` may change on each iteration, if tokens use the `reparse` option while (pos < pattern.length) { do { // Check for custom tokens at the current position result = runTokens(pattern, flags, pos, scope, context); // If the matched token used the `reparse` option, splice its output into the // pattern before running tokens again at the same position if (result && result.reparse) { pattern = pattern.slice(0, pos) + result.output + pattern.slice(pos + result.matchLength); } } while (result && result.reparse); if (result) { output += result.output; pos += (result.matchLength || 1); } else { // Get the native token at the current position token = self.exec(pattern, nativeTokens[scope], pos, 'sticky')[0]; output += token; pos += token.length; if (token === '[' && scope === defaultScope) { scope = classScope; } else if (token === ']' && scope === classScope) { scope = defaultScope; } } } patternCache[key] = { // Cleanup token cruft: repeated `(?:)(?:)` and leading/trailing `(?:)` pattern: nativ.replace.call(output, /\(\?:\)(?=\(\?:\))|^\(\?:\)|\(\?:\)$/g, ''), // Strip all but native flags flags: nativ.replace.call(flags, /[^gimy]+/g, ''), // `context.captureNames` has an item for each capturing group, even if unnamed captures: context.hasNamedCapture ? context.captureNames : null } } key = patternCache[key]; return augment(new RegExp(key.pattern, key.flags), key.captures, /*addProto*/ true); }; // Add `RegExp.prototype` to the prototype chain self.prototype = new RegExp; /* ============================== * Public properties * ============================== */ /** * The XRegExp version number. * @static * @memberOf XRegExp * @type String */ self.version = '3.0.0-pre'; /* ============================== * Public methods * ============================== */ /** * Extends XRegExp syntax and allows custom flags. This is used internally and can be used to * create XRegExp addons. If more than one token can match the same string, the last added wins. * @memberOf XRegExp * @param {RegExp} regex Regex object that matches the new token. * @param {Function} handler Function that returns a new pattern string (using native regex syntax) * to replace the matched token within all future XRegExp regexes. Has access to persistent * properties of the regex being built, through `this`. Invoked with three arguments: *
  • The match array, with named backreference properties. *
  • The regex scope where the match was found: 'default' or 'class'. *
  • The flags used by the regex, including any flags in a leading mode modifier. * The handler function becomes part of the XRegExp construction process, so be careful not to * construct XRegExps within the function or you will trigger infinite recursion. * @param {Object} [options] Options object with optional properties: *
  • `scope` {String} Scope where the token applies: 'default', 'class', or 'all'. *
  • `flag` {String} Single-character flag that triggers the token. This also registers the * flag, which prevents XRegExp from throwing an 'unknown flag' error when the flag is used. *
  • `optionalFlags` {String} Any custom flags checked for within the token `handler` that are * not required to trigger the token. This registers the flags, to prevent XRegExp from * throwing an 'unknown flag' error when any of the flags are used. *
  • `reparse` {Boolean} Whether the `handler` function's output should not be treated as * final, and instead be reparseable by other tokens (including the current token). Allows * token chaining or deferring. * @example * * // Basic usage: Add \a for the ALERT control code * XRegExp.addToken( * /\\a/, * function() {return '\\x07';}, * {scope: 'all'} * ); * XRegExp('\\a[\\a-\\n]+').test('\x07\n\x07'); // -> true * * // Add the U (ungreedy) flag from PCRE and RE2, which reverses greedy and lazy quantifiers * XRegExp.addToken( * /([?*+]|{\d+(?:,\d*)?})(\??)/, * function(match) {return match[1] + (match[2] ? '' : '?');}, * {flag: 'U'} * ); * XRegExp('a+', 'U').exec('aaa')[0]; // -> 'a' * XRegExp('a+?', 'U').exec('aaa')[0]; // -> 'aaa' */ self.addToken = function(regex, handler, options) { options = options || {}; var optionalFlags = options.optionalFlags, i; if (options.flag) { registerFlag(options.flag); } if (optionalFlags) { optionalFlags = nativ.split.call(optionalFlags, ''); for (i = 0; i < optionalFlags.length; ++i) { registerFlag(optionalFlags[i]); } } // Add to the private list of syntax tokens tokens.push({ regex: copy(regex, {add: 'g' + (hasNativeY ? 'y' : '')}), handler: handler, scope: options.scope || defaultScope, flag: options.flag, reparse: options.reparse }); // Reset the pattern cache used by the `XRegExp` constructor, since the same pattern and // flags might now produce different results self.cache.flush('patterns'); }; /** * Caches and returns the result of calling `XRegExp(pattern, flags)`. On any subsequent call with * the same pattern and flag combination, the cached copy of the regex is returned. * @memberOf XRegExp * @param {String} pattern Regex pattern string. * @param {String} [flags] Any combination of XRegExp flags. * @returns {RegExp} Cached XRegExp object. * @example * * while (match = XRegExp.cache('.', 'gs').exec(str)) { * // The regex is compiled once only * } */ self.cache = function(pattern, flags) { var key = pattern + '***' + (flags || ''); return cache[key] || (cache[key] = self(pattern, flags)); }; // Intentionally undocumented self.cache.flush = function(cacheName) { if (cacheName === 'patterns') { // Flush the pattern cache used by the `XRegExp` constructor patternCache = {}; } else { // Flush the regex object cache populated by `XRegExp.cache` cache = {}; } }; /** * Escapes any regular expression metacharacters, for use when matching literal strings. The result * can safely be used at any point within a regex that uses any flags. * @memberOf XRegExp * @param {String} str String to escape. * @returns {String} String with regex metacharacters escaped. * @example * * XRegExp.escape('Escaped? <.>'); * // -> 'Escaped\?\ <\.>' */ self.escape = function(str) { return nativ.replace.call(toObject(str), /[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); }; /** * Executes a regex search in a specified string. Returns a match array or `null`. If the provided * regex uses named capture, named backreference properties are included on the match array. * Optional `pos` and `sticky` arguments specify the search start position, and whether the match * must start at the specified position only. The `lastIndex` property of the provided regex is not * used, but is updated for compatibility. Also fixes browser bugs compared to the native * `RegExp.prototype.exec` and can be used reliably cross-browser. * @memberOf XRegExp * @param {String} str String to search. * @param {RegExp} regex Regex to search with. * @param {Number} [pos=0] Zero-based index at which to start the search. * @param {Boolean|String} [sticky=false] Whether the match must start at the specified position * only. The string `'sticky'` is accepted as an alternative to `true`. * @returns {Array} Match array with named backreference properties, or `null`. * @example * * // Basic use, with named backreference * var match = XRegExp.exec('U+2620', XRegExp('U\\+(?[0-9A-F]{4})')); * match.hex; // -> '2620' * * // With pos and sticky, in a loop * var pos = 2, result = [], match; * while (match = XRegExp.exec('<1><2><3><4>5<6>', /<(\d)>/, pos, 'sticky')) { * result.push(match[1]); * pos = match.index + match[0].length; * } * // result -> ['2', '3', '4'] */ self.exec = function(str, regex, pos, sticky) { var cacheFlags = 'g', match, r2; if (hasNativeY && (sticky || (regex.sticky && sticky !== false))) { cacheFlags += 'y'; } regex[REGEX_DATA] = regex[REGEX_DATA] || getBaseProps(); // Shares cached copies with `XRegExp.match`/`replace` r2 = regex[REGEX_DATA][cacheFlags] || ( regex[REGEX_DATA][cacheFlags] = copy(regex, { add: cacheFlags, remove: sticky === false ? 'y' : '' }) ); r2.lastIndex = pos = pos || 0; // Fixed `exec` required for `lastIndex` fix, named backreferences, etc. match = fixed.exec.call(r2, str); if (sticky && match && match.index !== pos) { match = null; } if (regex.global) { regex.lastIndex = match ? r2.lastIndex : 0; } return match; }; /** * Executes a provided function once per regex match. * @memberOf XRegExp * @param {String} str String to search. * @param {RegExp} regex Regex to search with. * @param {Function} callback Function to execute for each match. Invoked with four arguments: *
  • The match array, with named backreference properties. *
  • The zero-based match index. *
  • The string being traversed. *
  • The regex object being used to traverse the string. * @param {*} [context] Object to use as `this` when executing `callback`. * @returns {*} Provided `context` object. * @example * * // Extracts every other digit from a string * XRegExp.forEach('1a2345', /\d/, function(match, i) { * if (i % 2) this.push(+match[0]); * }, []); * // -> [2, 4] */ self.forEach = function(str, regex, callback, context) { var pos = 0, i = -1, match; while ((match = self.exec(str, regex, pos))) { // Because `regex` is provided to `callback`, the function can use the deprecated/ // nonstandard `RegExp.prototype.compile` to mutate the regex. However, since // `XRegExp.exec` doesn't use `lastIndex` to set the search position, this can't lead // to an infinite loop, at least. Actually, because of the way `XRegExp.exec` caches // globalized versions of regexes, mutating the regex will not have any effect on the // iteration or matched strings, which is a nice side effect that brings extra safety callback.call(context, match, ++i, str, regex); pos = match.index + (match[0].length || 1); } return context; }; /** * Copies a regex object and adds flag `g`. The copy maintains special properties for named * capture, is augmented with `XRegExp.prototype` methods, and has a fresh `lastIndex` property * (set to zero). Native regexes are not recompiled using XRegExp syntax. * @memberOf XRegExp * @param {RegExp} regex Regex to globalize. * @returns {RegExp} Copy of the provided regex with flag `g` added. * @example * * var globalCopy = XRegExp.globalize(/regex/); * globalCopy.global; // -> true */ self.globalize = function(regex) { return copy(regex, {add: 'g', addProto: true}); }; /** * Installs optional features according to the specified options. Can be undone using * {@link #XRegExp.uninstall}. * @memberOf XRegExp * @param {Object|String} options Options object or string. * @example * * // With an options object * XRegExp.install({ * // Enables support for astral code points in Unicode addons (implicitly sets flag A) * astral: true, * * // Overrides native regex methods with fixed/extended versions that support named * // backreferences and fix numerous cross-browser bugs * natives: true * }); * * // With an options string * XRegExp.install('astral natives'); */ self.install = function(options) { options = prepareOptions(options); if (!features.astral && options.astral) { setAstral(true); } if (!features.natives && options.natives) { setNatives(true); } }; /** * Checks whether an individual optional feature is installed. * @memberOf XRegExp * @param {String} feature Name of the feature to check. One of: *
  • `natives` *
  • `astral` * @returns {Boolean} Whether the feature is installed. * @example * * XRegExp.isInstalled('natives'); */ self.isInstalled = function(feature) { return !!(features[feature]); }; /** * Returns `true` if an object is a regex; `false` if it isn't. This works correctly for regexes * created in another frame, when `instanceof` and `constructor` checks would fail. * @memberOf XRegExp * @param {*} value Object to check. * @returns {Boolean} Whether the object is a `RegExp` object. * @example * * XRegExp.isRegExp('string'); // -> false * XRegExp.isRegExp(/regex/i); // -> true * XRegExp.isRegExp(RegExp('^', 'm')); // -> true * XRegExp.isRegExp(XRegExp('(?s).')); // -> true */ self.isRegExp = function(value) { return toString.call(value) === '[object RegExp]'; //return isType(value, 'RegExp'); }; /** * Returns the first matched string, or in global mode, an array containing all matched strings. * This is essentially a more convenient re-implementation of `String.prototype.match` that gives * the result types you actually want (string instead of `exec`-style array in match-first mode, * and an empty array instead of `null` when no matches are found in match-all mode). It also lets * you override flag g and ignore `lastIndex`, and fixes browser bugs. * @memberOf XRegExp * @param {String} str String to search. * @param {RegExp} regex Regex to search with. * @param {String} [scope='one'] Use 'one' to return the first match as a string. Use 'all' to * return an array of all matched strings. If not explicitly specified and `regex` uses flag g, * `scope` is 'all'. * @returns {String|Array} In match-first mode: First match as a string, or `null`. In match-all * mode: Array of all matched strings, or an empty array. * @example * * // Match first * XRegExp.match('abc', /\w/); // -> 'a' * XRegExp.match('abc', /\w/g, 'one'); // -> 'a' * XRegExp.match('abc', /x/g, 'one'); // -> null * * // Match all * XRegExp.match('abc', /\w/g); // -> ['a', 'b', 'c'] * XRegExp.match('abc', /\w/, 'all'); // -> ['a', 'b', 'c'] * XRegExp.match('abc', /x/, 'all'); // -> [] */ self.match = function(str, regex, scope) { var global = (regex.global && scope !== 'one') || scope === 'all', cacheFlags = (global ? 'g' : '') + (regex.sticky ? 'y' : ''), result, r2; regex[REGEX_DATA] = regex[REGEX_DATA] || getBaseProps(); // Shares cached copies with `XRegExp.exec`/`replace` r2 = regex[REGEX_DATA][cacheFlags || 'noGY'] || ( regex[REGEX_DATA][cacheFlags || 'noGY'] = copy(regex, { add: cacheFlags, remove: scope === 'one' ? 'g' : '' }) ); result = nativ.match.call(toObject(str), r2); if (regex.global) { regex.lastIndex = ( (scope === 'one' && result) ? // Can't use `r2.lastIndex` since `r2` is nonglobal in this case (result.index + result[0].length) : 0 ); } return global ? (result || []) : (result && result[0]); }; /** * Retrieves the matches from searching a string using a chain of regexes that successively search * within previous matches. The provided `chain` array can contain regexes and objects with `regex` * and `backref` properties. When a backreference is specified, the named or numbered backreference * is passed forward to the next regex or returned. * @memberOf XRegExp * @param {String} str String to search. * @param {Array} chain Regexes that each search for matches within preceding results. * @returns {Array} Matches by the last regex in the chain, or an empty array. * @example * * // Basic usage; matches numbers within tags * XRegExp.matchChain('1 2 3 4 a 56', [ * XRegExp('(?is).*?'), * /\d+/ * ]); * // -> ['2', '4', '56'] * * // Passing forward and returning specific backreferences * html = 'XRegExp\ * Google'; * XRegExp.matchChain(html, [ * {regex: //i, backref: 1}, * {regex: XRegExp('(?i)^https?://(?[^/?#]+)'), backref: 'domain'} * ]); * // -> ['xregexp.com', 'www.google.com'] */ self.matchChain = function(str, chain) { return (function recurseChain(values, level) { var item = chain[level].regex ? chain[level] : {regex: chain[level]}, matches = [], addMatch = function(match) { if (item.backref) { /* Safari 4.0.5 (but not 5.0.5+) inappropriately uses sparse arrays to hold * the `undefined`s for backreferences to nonparticipating capturing * groups. In such cases, a `hasOwnProperty` or `in` check on its own would * inappropriately throw the exception, so also check if the backreference * is a number that is within the bounds of the array. */ if (!(match.hasOwnProperty(item.backref) || +item.backref < match.length)) { throw new ReferenceError('Backreference to undefined group: ' + item.backref); } matches.push(match[item.backref] || ''); } else { matches.push(match[0]); } }, i; for (i = 0; i < values.length; ++i) { self.forEach(values[i], item.regex, addMatch); } return ((level === chain.length - 1) || !matches.length) ? matches : recurseChain(matches, level + 1); }([str], 0)); }; /** * Returns a new string with one or all matches of a pattern replaced. The pattern can be a string * or regex, and the replacement can be a string or a function to be called for each match. To * perform a global search and replace, use the optional `scope` argument or include flag g if * using a regex. Replacement strings can use `${n}` for named and numbered backreferences. * Replacement functions can use named backreferences via `arguments[0].name`. Also fixes browser * bugs compared to the native `String.prototype.replace` and can be used reliably cross-browser. * @memberOf XRegExp * @param {String} str String to search. * @param {RegExp|String} search Search pattern to be replaced. * @param {String|Function} replacement Replacement string or a function invoked to create it. * Replacement strings can include special replacement syntax: *
  • $$ - Inserts a literal $ character. *
  • $&, $0 - Inserts the matched substring. *
  • $` - Inserts the string that precedes the matched substring (left context). *
  • $' - Inserts the string that follows the matched substring (right context). *
  • $n, $nn - Where n/nn are digits referencing an existent capturing group, inserts * backreference n/nn. *
  • ${n} - Where n is a name or any number of digits that reference an existent capturing * group, inserts backreference n. * Replacement functions are invoked with three or more arguments: *
  • The matched substring (corresponds to $& above). Named backreferences are accessible as * properties of this first argument. *
  • 0..n arguments, one for each backreference (corresponding to $1, $2, etc. above). *
  • The zero-based index of the match within the total search string. *
  • The total string being searched. * @param {String} [scope='one'] Use 'one' to replace the first match only, or 'all'. If not * explicitly specified and using a regex with flag g, `scope` is 'all'. * @returns {String} New string with one or all matches replaced. * @example * * // Regex search, using named backreferences in replacement string * var name = XRegExp('(?\\w+) (?\\w+)'); * XRegExp.replace('John Smith', name, '${last}, ${first}'); * // -> 'Smith, John' * * // Regex search, using named backreferences in replacement function * XRegExp.replace('John Smith', name, function(match) { * return match.last + ', ' + match.first; * }); * // -> 'Smith, John' * * // String search, with replace-all * XRegExp.replace('RegExp builds RegExps', 'RegExp', 'XRegExp', 'all'); * // -> 'XRegExp builds XRegExps' */ self.replace = function(str, search, replacement, scope) { var isRegex = self.isRegExp(search), global = (search.global && scope !== 'one') || scope === 'all', cacheFlags = (global ? 'g' : '') + (search.sticky ? 'y' : ''), s2 = search, result; if (isRegex) { search[REGEX_DATA] = search[REGEX_DATA] || getBaseProps(); // Shares cached copies with `XRegExp.exec`/`match`. Since a copy is used, // `search`'s `lastIndex` isn't updated *during* replacement iterations s2 = search[REGEX_DATA][cacheFlags || 'noGY'] || ( search[REGEX_DATA][cacheFlags || 'noGY'] = copy(search, { add: cacheFlags, remove: scope === 'one' ? 'g' : '' }) ); } else if (global) { s2 = new RegExp(self.escape(String(search)), 'g'); } // Fixed `replace` required for named backreferences, etc. result = fixed.replace.call(toObject(str), s2, replacement); if (isRegex && search.global) { // Fixes IE, Safari bug (last tested IE 9, Safari 5.1) search.lastIndex = 0; } return result; }; /** * Performs batch processing of string replacements. Used like {@link #XRegExp.replace}, but * accepts an array of replacement details. Later replacements operate on the output of earlier * replacements. Replacement details are accepted as an array with a regex or string to search for, * the replacement string or function, and an optional scope of 'one' or 'all'. Uses the XRegExp * replacement text syntax, which supports named backreference properties via `${name}`. * @memberOf XRegExp * @param {String} str String to search. * @param {Array} replacements Array of replacement detail arrays. * @returns {String} New string with all replacements. * @example * * str = XRegExp.replaceEach(str, [ * [XRegExp('(?a)'), 'z${name}'], * [/b/gi, 'y'], * [/c/g, 'x', 'one'], // scope 'one' overrides /g * [/d/, 'w', 'all'], // scope 'all' overrides lack of /g * ['e', 'v', 'all'], // scope 'all' allows replace-all for strings * [/f/g, function($0) { * return $0.toUpperCase(); * }] * ]); */ self.replaceEach = function(str, replacements) { var i, r; for (i = 0; i < replacements.length; ++i) { r = replacements[i]; str = self.replace(str, r[0], r[1], r[2]); } return str; }; /** * Splits a string into an array of strings using a regex or string separator. Matches of the * separator are not included in the result array. However, if `separator` is a regex that contains * capturing groups, backreferences are spliced into the result each time `separator` is matched. * Fixes browser bugs compared to the native `String.prototype.split` and can be used reliably * cross-browser. * @memberOf XRegExp * @param {String} str String to split. * @param {RegExp|String} separator Regex or string to use for separating the string. * @param {Number} [limit] Maximum number of items to include in the result array. * @returns {Array} Array of substrings. * @example * * // Basic use * XRegExp.split('a b c', ' '); * // -> ['a', 'b', 'c'] * * // With limit * XRegExp.split('a b c', ' ', 2); * // -> ['a', 'b'] * * // Backreferences in result array * XRegExp.split('..word1..', /([a-z]+)(\d+)/i); * // -> ['..', 'word', '1', '..'] */ self.split = function(str, separator, limit) { return fixed.split.call(toObject(str), separator, limit); }; /** * Executes a regex search in a specified string. Returns `true` or `false`. Optional `pos` and * `sticky` arguments specify the search start position, and whether the match must start at the * specified position only. The `lastIndex` property of the provided regex is not used, but is * updated for compatibility. Also fixes browser bugs compared to the native * `RegExp.prototype.test` and can be used reliably cross-browser. * @memberOf XRegExp * @param {String} str String to search. * @param {RegExp} regex Regex to search with. * @param {Number} [pos=0] Zero-based index at which to start the search. * @param {Boolean|String} [sticky=false] Whether the match must start at the specified position * only. The string `'sticky'` is accepted as an alternative to `true`. * @returns {Boolean} Whether the regex matched the provided value. * @example * * // Basic use * XRegExp.test('abc', /c/); // -> true * * // With pos and sticky * XRegExp.test('abc', /c/, 0, 'sticky'); // -> false */ self.test = function(str, regex, pos, sticky) { // Do this the easy way :-) return !!self.exec(str, regex, pos, sticky); }; /** * Uninstalls optional features according to the specified options. All optional features start out * uninstalled, so this is used to undo the actions of {@link #XRegExp.install}. * @memberOf XRegExp * @param {Object|String} options Options object or string. * @example * * // With an options object * XRegExp.uninstall({ * // Disables support for astral code points in Unicode addons * astral: true, * * // Restores native regex methods * natives: true * }); * * // With an options string * XRegExp.uninstall('astral natives'); */ self.uninstall = function(options) { options = prepareOptions(options); if (features.astral && options.astral) { setAstral(false); } if (features.natives && options.natives) { setNatives(false); } }; /** * Returns an XRegExp object that is the union of the given patterns. Patterns can be provided as * regex objects or strings. Metacharacters are escaped in patterns provided as strings. * Backreferences in provided regex objects are automatically renumbered to work correctly within * the larger combined pattern. Native flags used by provided regexes are ignored in favor of the * `flags` argument. * @memberOf XRegExp * @param {Array} patterns Regexes and strings to combine. * @param {String} [flags] Any combination of XRegExp flags. * @returns {RegExp} Union of the provided regexes and strings. * @example * * XRegExp.union(['a+b*c', /(dogs)\1/, /(cats)\1/], 'i'); * // -> /a\+b\*c|(dogs)\1|(cats)\2/i */ self.union = function(patterns, flags) { var parts = /(\()(?!\?)|\\([1-9]\d*)|\\[\s\S]|\[(?:[^\\\]]|\\[\s\S])*]/g, output = [], numCaptures = 0, numPriorCaptures, captureNames, pattern, rewrite = function(match, paren, backref) { var name = captureNames[numCaptures - numPriorCaptures]; // Capturing group if (paren) { ++numCaptures; // If the current capture has a name, preserve the name if (name) { return '(?<' + name + '>'; } // Backreference } else if (backref) { // Rewrite the backreference return '\\' + (+backref + numPriorCaptures); } return match; }, i; if (!(isType(patterns, 'Array') && patterns.length)) { throw new TypeError('Must provide a nonempty array of patterns to merge'); } for (i = 0; i < patterns.length; ++i) { pattern = patterns[i]; if (self.isRegExp(pattern)) { numPriorCaptures = numCaptures; captureNames = (pattern[REGEX_DATA] && pattern[REGEX_DATA].captureNames) || []; // Rewrite backreferences. Passing to XRegExp dies on octals and ensures patterns // are independently valid; helps keep this simple. Named captures are put back output.push(nativ.replace.call(self(pattern.source).source, parts, rewrite)); } else { output.push(self.escape(pattern)); } } return self(output.join('|'), flags); }; /* ============================== * Fixed/extended native methods * ============================== */ /** * Adds named capture support (with backreferences returned as `result.name`), and fixes browser * bugs in the native `RegExp.prototype.exec`. Calling `XRegExp.install('natives')` uses this to * override the native method. Use via `XRegExp.exec` without overriding natives. * @private * @param {String} str String to search. * @returns {Array} Match array with named backreference properties, or `null`. */ fixed.exec = function(str) { var origLastIndex = this.lastIndex, match = nativ.exec.apply(this, arguments), name, r2, i; if (match) { // Fix browsers whose `exec` methods don't return `undefined` for nonparticipating // capturing groups. This fixes IE 5.5-8, but not IE 9's quirks mode or emulation of // older IEs. IE 9 in standards mode follows the spec if (!correctExecNpcg && match.length > 1 && indexOf(match, '') > -1) { r2 = copy(this, {remove: 'g'}); // Using `str.slice(match.index)` rather than `match[0]` in case lookahead allowed // matching due to characters outside the match nativ.replace.call(String(str).slice(match.index), r2, function() { var len = arguments.length, i; // Skip index 0 and the last 2 for (i = 1; i < len - 2; ++i) { if (arguments[i] === undefined) { match[i] = undefined; } } }); } // Attach named capture properties if (this[REGEX_DATA] && this[REGEX_DATA].captureNames) { // Skip index 0 for (i = 1; i < match.length; ++i) { name = this[REGEX_DATA].captureNames[i - 1]; if (name) { match[name] = match[i]; } } } // Fix browsers that increment `lastIndex` after zero-length matches if (this.global && !match[0].length && (this.lastIndex > match.index)) { this.lastIndex = match.index; } } if (!this.global) { // Fixes IE, Opera bug (last tested IE 9, Opera 11.6) this.lastIndex = origLastIndex; } return match; }; /** * Fixes browser bugs in the native `RegExp.prototype.test`. Calling `XRegExp.install('natives')` * uses this to override the native method. * @private * @param {String} str String to search. * @returns {Boolean} Whether the regex matched the provided value. */ fixed.test = function(str) { // Do this the easy way :-) return !!fixed.exec.call(this, str); }; /** * Adds named capture support (with backreferences returned as `result.name`), and fixes browser * bugs in the native `String.prototype.match`. Calling `XRegExp.install('natives')` uses this to * override the native method. * @private * @param {RegExp|*} regex Regex to search with. If not a regex object, it is passed to `RegExp`. * @returns {Array} If `regex` uses flag g, an array of match strings or `null`. Without flag g, * the result of calling `regex.exec(this)`. */ fixed.match = function(regex) { var result; if (!self.isRegExp(regex)) { // Use the native `RegExp` rather than `XRegExp` regex = new RegExp(regex); } else if (regex.global) { result = nativ.match.apply(this, arguments); // Fixes IE bug regex.lastIndex = 0; return result; } return fixed.exec.call(regex, toObject(this)); }; /** * Adds support for `${n}` tokens for named and numbered backreferences in replacement text, and * provides named backreferences to replacement functions as `arguments[0].name`. Also fixes * browser bugs in replacement text syntax when performing a replacement using a nonregex search * value, and the value of a replacement regex's `lastIndex` property during replacement iterations * and upon completion. Note that this doesn't support SpiderMonkey's proprietary third (`flags`) * argument. Calling `XRegExp.install('natives')` uses this to override the native method. Use via * `XRegExp.replace` without overriding natives. * @private * @param {RegExp|String} search Search pattern to be replaced. * @param {String|Function} replacement Replacement string or a function invoked to create it. * @returns {String} New string with one or all matches replaced. */ fixed.replace = function(search, replacement) { var isRegex = self.isRegExp(search), origLastIndex, captureNames, result; if (isRegex) { if (search[REGEX_DATA]) { captureNames = search[REGEX_DATA].captureNames; } // Only needed if `search` is nonglobal origLastIndex = search.lastIndex; } else { search += ''; // Type-convert } // Don't use `typeof`; some older browsers return 'function' for regex objects if (isType(replacement, 'Function')) { // Stringifying `this` fixes a bug in IE < 9 where the last argument in replacement // functions isn't type-converted to a string result = nativ.replace.call(String(this), search, function() { var args = arguments, i; if (captureNames) { // Change the `arguments[0]` string primitive to a `String` object that can // store properties. This really does need to use `String` as a constructor args[0] = new String(args[0]); // Store named backreferences on the first argument for (i = 0; i < captureNames.length; ++i) { if (captureNames[i]) { args[0][captureNames[i]] = args[i + 1]; } } } // Update `lastIndex` before calling `replacement`. Fixes IE, Chrome, Firefox, // Safari bug (last tested IE 9, Chrome 17, Firefox 11, Safari 5.1) if (isRegex && search.global) { search.lastIndex = args[args.length - 2] + args[0].length; } // Should pass `undefined` as context; see // return replacement.apply(undefined, args); }); } else { // Ensure that the last value of `args` will be a string when given nonstring `this`, // while still throwing on `null` or `undefined` context result = nativ.replace.call(this == null ? this : String(this), search, function() { // Keep this function's `arguments` available through closure var args = arguments; return nativ.replace.call(String(replacement), replacementToken, function($0, $1, $2) { var n; // Named or numbered backreference with curly braces if ($1) { /* XRegExp behavior for `${n}`: * 1. Backreference to numbered capture, if `n` is an integer. Use `0` for * for the entire match. Any number of leading zeros may be used. * 2. Backreference to named capture `n`, if it exists and is not an * integer overridden by numbered capture. In practice, this does not * overlap with numbered capture since XRegExp does not allow named * capture to use a bare integer as the name. * 3. If the name or number does not refer to an existing capturing group, * it's an error. */ n = +$1; // Type-convert; drop leading zeros if (n <= args.length - 3) { return args[n] || ''; } // Groups with the same name is an error, else would need `lastIndexOf` n = captureNames ? indexOf(captureNames, $1) : -1; if (n < 0) { throw new SyntaxError('Backreference to undefined group ' + $0); } return args[n + 1] || ''; } // Else, special variable or numbered backreference without curly braces if ($2 === '$') { // $$ return '$'; } if ($2 === '&' || +$2 === 0) { // $&, $0 (not followed by 1-9), $00 return args[0]; } if ($2 === '`') { // $` (left context) return args[args.length - 1].slice(0, args[args.length - 2]); } if ($2 === "'") { // $' (right context) return args[args.length - 1].slice(args[args.length - 2] + args[0].length); } // Else, numbered backreference without curly braces $2 = +$2; // Type-convert; drop leading zero /* XRegExp behavior for `$n` and `$nn`: * - Backrefs end after 1 or 2 digits. Use `${..}` for more digits. * - `$1` is an error if no capturing groups. * - `$10` is an error if less than 10 capturing groups. Use `${1}0` instead. * - `$01` is `$1` if at least one capturing group, else it's an error. * - `$0` (not followed by 1-9) and `$00` are the entire match. * Native behavior, for comparison: * - Backrefs end after 1 or 2 digits. Cannot reference capturing group 100+. * - `$1` is a literal `$1` if no capturing groups. * - `$10` is `$1` followed by a literal `0` if less than 10 capturing groups. * - `$01` is `$1` if at least one capturing group, else it's a literal `$01`. * - `$0` is a literal `$0`. */ if (!isNaN($2)) { if ($2 > args.length - 3) { throw new SyntaxError('Backreference to undefined group ' + $0); } return args[$2] || ''; } throw new SyntaxError('Invalid token ' + $0); }); }); } if (isRegex) { if (search.global) { // Fixes IE, Safari bug (last tested IE 9, Safari 5.1) search.lastIndex = 0; } else { // Fixes IE, Opera bug (last tested IE 9, Opera 11.6) search.lastIndex = origLastIndex; } } return result; }; /** * Fixes browser bugs in the native `String.prototype.split`. Calling `XRegExp.install('natives')` * uses this to override the native method. Use via `XRegExp.split` without overriding natives. * @private * @param {RegExp|String} separator Regex or string to use for separating the string. * @param {Number} [limit] Maximum number of items to include in the result array. * @returns {Array} Array of substrings. */ fixed.split = function(separator, limit) { if (!self.isRegExp(separator)) { // Browsers handle nonregex split correctly, so use the faster native method return nativ.split.apply(this, arguments); } var str = String(this), output = [], origLastIndex = separator.lastIndex, lastLastIndex = 0, lastLength; /* Values for `limit`, per the spec: * If undefined: pow(2,32) - 1 * If 0, Infinity, or NaN: 0 * If positive number: limit = floor(limit); if (limit >= pow(2,32)) limit -= pow(2,32); * If negative number: pow(2,32) - floor(abs(limit)) * If other: Type-convert, then use the above rules */ // This line fails in very strange ways for some values of `limit` in Opera 10.5-10.63, // unless Opera Dragonfly is open (go figure). It works in at least Opera 9.5-10.1 and 11+ limit = (limit === undefined ? -1 : limit) >>> 0; self.forEach(str, separator, function(match) { // This condition is not the same as `if (match[0].length)` if ((match.index + match[0].length) > lastLastIndex) { output.push(str.slice(lastLastIndex, match.index)); if (match.length > 1 && match.index < str.length) { Array.prototype.push.apply(output, match.slice(1)); } lastLength = match[0].length; lastLastIndex = match.index + lastLength; } }); if (lastLastIndex === str.length) { if (!nativ.test.call(separator, '') || lastLength) { output.push(''); } } else { output.push(str.slice(lastLastIndex)); } separator.lastIndex = origLastIndex; return output.length > limit ? output.slice(0, limit) : output; }; /* ============================== * Built-in syntax/flag tokens * ============================== */ add = self.addToken; /* Letter identity escapes that natively match literal characters: `\a`, `\A`, etc. These should be * SyntaxErrors but are allowed in web reality. XRegExp makes them errors for cross-browser * consistency and to reserve their syntax, but lets them be superseded by addons. */ add( /\\([ABCE-RTUVXYZaeg-mopqyz]|c(?![A-Za-z])|u(?![\dA-Fa-f]{4})|x(?![\dA-Fa-f]{2}))/, function(match, scope) { // \B is allowed in default scope only if (match[1] === 'B' && scope === defaultScope) { return match[0]; } throw new SyntaxError('Invalid escape ' + match[0]); }, {scope: 'all'} ); /* Empty character class: `[]` or `[^]`. This fixes a critical cross-browser syntax inconsistency. * Unless this is standardized (per the ES spec), regex syntax can't be accurately parsed because * character class endings can't be determined. */ add( /\[(\^?)]/, function(match) { // For cross-browser compatibility with ES3, convert [] to \b\B and [^] to [\s\S]. // (?!) should work like \b\B, but is unreliable in some versions of Firefox return match[1] ? '[\\s\\S]' : '\\b\\B'; } ); /* Comment pattern: `(?# )`. Inline comments are an alternative to the line comments allowed in * free-spacing mode (flag x). */ add( /\(\?#[^)]*\)/, function(match, scope, flags) { // Keep tokens separated unless the following token is a quantifier return isQuantifierNext(match.input, match.index + match[0].length, flags) ? '' : '(?:)'; } ); /* Whitespace and line comments, in free-spacing mode (aka extended mode, flag x) only. */ add( /\s+|#.*/, function(match, scope, flags) { // Keep tokens separated unless the following token is a quantifier return isQuantifierNext(match.input, match.index + match[0].length, flags) ? '' : '(?:)'; }, {flag: 'x'} ); /* Dot, in dotall mode (aka singleline mode, flag s) only. */ add( /\./, function() { return '[\\s\\S]'; }, {flag: 's'} ); /* Named backreference: `\k`. Backreference names can use the characters A-Z, a-z, 0-9, _, * and $ only. Also allows numbered backreferences as `\k`. */ add( /\\k<([\w$]+)>/, function(match) { // Groups with the same name is an error, else would need `lastIndexOf` var index = isNaN(match[1]) ? (indexOf(this.captureNames, match[1]) + 1) : +match[1], endIndex = match.index + match[0].length; if (!index || index > this.captureNames.length) { throw new SyntaxError('Backreference to undefined group ' + match[0]); } // Keep backreferences separate from subsequent literal numbers return '\\' + index + ( endIndex === match.input.length || isNaN(match.input.charAt(endIndex)) ? '' : '(?:)' ); } ); /* Numbered backreference or octal, plus any following digits: `\0`, `\11`, etc. Octals except `\0` * not followed by 0-9 and backreferences to unopened capture groups throw an error. Other matches * are returned unaltered. IE < 9 doesn't support backreferences above `\99` in regex syntax. */ add( /\\(\d+)/, function(match, scope) { if ( !( scope === defaultScope && /^[1-9]/.test(match[1]) && +match[1] <= this.captureNames.length ) && match[1] !== '0' ) { throw new SyntaxError('Cannot use octal escape or backreference to undefined group ' + match[0]); } return match[0]; }, {scope: 'all'} ); /* Named capturing group; match the opening delimiter only: `(?`. Capture names can use the * characters A-Z, a-z, 0-9, _, and $ only. Names can't be integers. Supports Python-style * `(?P` as an alternate syntax to avoid issues in recent Opera (which natively supports the * Python-style syntax). Otherwise, XRegExp might treat numbered backreferences to Python-style * named capture as octals. */ add( /\(\?P?<([\w$]+)>/, function(match) { // Disallow bare integers as names because named backreferences are added to match // arrays and therefore numeric properties may lead to incorrect lookups if (!isNaN(match[1])) { throw new SyntaxError('Cannot use integer as capture name ' + match[0]); } if (match[1] === 'length' || match[1] === '__proto__') { throw new SyntaxError('Cannot use reserved word as capture name ' + match[0]); } if (indexOf(this.captureNames, match[1]) > -1) { throw new SyntaxError('Cannot use same name for multiple groups ' + match[0]); } this.captureNames.push(match[1]); this.hasNamedCapture = true; return '('; } ); /* Capturing group; match the opening parenthesis only. Required for support of named capturing * groups. Also adds explicit capture mode (flag n). */ add( /\((?!\?)/, function(match, scope, flags) { if (flags.indexOf('n') > -1) { return '(?:'; } this.captureNames.push(null); return '('; }, {optionalFlags: 'n'} ); /* ============================== * Expose XRegExp * ============================== */ return self; }());