From 681487f82f55fba66f01f9913e4ff103e5b2ef4c Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Fri, 25 Dec 2020 13:16:56 +0100 Subject: [PATCH] all: weed out more backtracking string regexes Conflict:do not change test Reference:https://github.com/pygments/pygments/commit/681487f82f55fba66f01f9913e4ff103e5b2ef4c --- pygments/lexers/actionscript.py | 4 ++-- pygments/lexers/ambient.py | 2 +- pygments/lexers/boa.py | 6 ++--- pygments/lexers/configs.py | 2 +- pygments/lexers/d.py | 4 ++-- pygments/lexers/dotnet.py | 8 +++---- pygments/lexers/dsls.py | 4 ++-- pygments/lexers/go.py | 2 +- pygments/lexers/graphics.py | 4 ++-- pygments/lexers/haxe.py | 4 ++-- pygments/lexers/iolang.py | 2 +- pygments/lexers/jvm.py | 16 +++++++------- pygments/lexers/lisp.py | 12 +++++----- pygments/lexers/make.py | 4 ++-- pygments/lexers/parsers.py | 48 ++++++++++++++++++++-------------------- pygments/lexers/php.py | 4 ++-- pygments/lexers/prolog.py | 4 ++-- pygments/lexers/ruby.py | 34 ++++++++++++++-------------- pygments/lexers/scripting.py | 8 +++---- pygments/lexers/supercollider.py | 4 ++-- pygments/lexers/templates.py | 24 ++++++++++---------- pygments/lexers/textedit.py | 4 ++-- pygments/lexers/urbi.py | 4 ++-- pygments/lexers/webmisc.py | 4 ++-- pygments/lexers/x10.py | 2 +- 25 files changed, 107 insertions(+), 107 deletions(-) diff --git a/pygments/lexers/actionscript.py b/pygments/lexers/actionscript.py index f4b4964..7992358 100644 --- a/pygments/lexers/actionscript.py +++ b/pygments/lexers/actionscript.py @@ -37,7 +37,7 @@ class ActionScriptLexer(RegexLexer): (r'\s+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'/(\\\\|\\/|[^/\n])*/[gim]*', String.Regex), + (r'/(\\\\|\\[^\\]|[^/\\\n])*/[gim]*', String.Regex), (r'[~^*!%&<>|+=:;,/?\\-]+', Operator), (r'[{}\[\]();.]+', Punctuation), (words(( @@ -144,7 +144,7 @@ class ActionScript3Lexer(RegexLexer): bygroups(Keyword, Text, Keyword.Type, Text, Operator)), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'/(\\\\|\\/|[^\n])*/[gisx]*', String.Regex), + (r'/(\\\\|\\[^\\]|[^\\\n])*/[gisx]*', String.Regex), (r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)), (r'(case|default|for|each|in|while|do|break|return|continue|if|else|' r'throw|try|catch|with|new|typeof|arguments|instanceof|this|' diff --git a/pygments/lexers/ambient.py b/pygments/lexers/ambient.py index 7d42d12..d2cc06a 100644 --- a/pygments/lexers/ambient.py +++ b/pygments/lexers/ambient.py @@ -44,7 +44,7 @@ class AmbientTalkLexer(RegexLexer): (builtin, Name.Builtin), (r'(true|false|nil)\b', Keyword.Constant), (r'(~|lobby|jlobby|/)\.', Keyword.Constant, 'namespace'), - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r'\|', Punctuation, 'arglist'), (r'<:|[*^!%&<>+=,./?-]|:=', Operator), (r"`[a-zA-Z_]\w*", String.Symbol), diff --git a/pygments/lexers/boa.py b/pygments/lexers/boa.py index a57c0e4..2425583 100644 --- a/pygments/lexers/boa.py +++ b/pygments/lexers/boa.py @@ -92,9 +92,9 @@ class BoaLexer(RegexLexer): (classes, Name.Classes), (words(operators), Operator), (r'[][(),;{}\\.]', Punctuation), - (r'"(\\\\|\\"|[^"])*"', String), - (r'`(\\\\|\\`|[^`])*`', String), - (words(string_sep), String.Delimeter), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"`(\\\\|\\[^\\]|[^`\\])*`", String.Backtick), + (words(string_sep), String.Delimiter), (r'[a-zA-Z_]+', Name.Variable), (r'[0-9]+', Number.Integer), (r'\s+?', Text), # Whitespace diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index 0911b6e..0bae4eb 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -909,7 +909,7 @@ class TOMLLexer(RegexLexer): (r'\s+', Text), (r'#.*?$', Comment.Single), # Basic string - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # Literal string (r'\'\'\'(.*)\'\'\'', String), (r'\'[^\']*\'', String), diff --git a/pygments/lexers/d.py b/pygments/lexers/d.py index b14f7dc..f833e5e 100644 --- a/pygments/lexers/d.py +++ b/pygments/lexers/d.py @@ -93,7 +93,7 @@ class DLexer(RegexLexer): # -- AlternateWysiwygString (r'`[^`]*`[cwd]?', String), # -- DoubleQuotedString - (r'"(\\\\|\\"|[^"])*"[cwd]?', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"[cwd]?', String), # -- EscapeSequence (r"\\(['\"?\\abfnrtv]|x[0-9a-fA-F]{2}|[0-7]{1,3}" r"|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|&\w+;)", @@ -224,7 +224,7 @@ class CrocLexer(RegexLexer): (r'@`(``|[^`])*`', String), (r"@'(''|[^'])*'", String), # -- DoubleQuotedString - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # Tokens (r'(~=|\^=|%=|\*=|==|!=|>>>=|>>>|>>=|>>|>=|<=>|\?=|-\>' r'|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.|/=)' diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index 458a9eb..c4d2077 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -88,7 +88,7 @@ class CSharpLexer(RegexLexer): (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(""|[^"])*"', String), - (r'"(\\\\|\\"|[^"\n])*["\n]', String), + (r'"(\\\\|\\[^\\]|[^"\\\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), @@ -213,7 +213,7 @@ class NemerleLexer(RegexLexer): (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(""|[^"])*"', String), - (r'"(\\\\|\\"|[^"\n])*["\n]', String), + (r'"(\\\\|\\[^\\]|[^"\\\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number), @@ -315,8 +315,8 @@ class BooLexer(RegexLexer): (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), - (r'/(\\\\|\\/|[^/\s])/', String.Regex), - (r'@/(\\\\|\\/|[^/])*/', String.Regex), + (r'/(\\\\|\\[^\\]|[^/\\\s])/', String.Regex), + (r'@/(\\\\|\\[^\\]|[^/\\])*/', String.Regex), (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator), (r'(as|abstract|callable|constructor|destructor|do|import|' r'enum|event|final|get|interface|internal|of|override|' diff --git a/pygments/lexers/dsls.py b/pygments/lexers/dsls.py index 0af3c6c..0e4ba40 100644 --- a/pygments/lexers/dsls.py +++ b/pygments/lexers/dsls.py @@ -632,7 +632,7 @@ class AlloyLexer(RegexLexer): (iden_rex, Name), (r'[:,]', Punctuation), (r'[0-9]+', Number.Integer), - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r'\n', Text), ] } @@ -827,7 +827,7 @@ class FlatlineLexer(RegexLexer): (r'0x-?[a-f\d]+', Number.Hex), # strings, symbols and characters - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r"\\(.|[a-z]+)", String.Char), # expression template placeholder diff --git a/pygments/lexers/go.py b/pygments/lexers/go.py index f6bb7fc..ebb34b5 100644 --- a/pygments/lexers/go.py +++ b/pygments/lexers/go.py @@ -90,7 +90,7 @@ class GoLexer(RegexLexer): # -- raw_string_lit (r'`[^`]*`', String), # -- interpreted_string_lit - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # Tokens (r'(<<=|>>=|<<|>>|<=|>=|&\^=|&\^|\+=|-=|\*=|/=|%=|&=|\|=|&&|\|\|' r'|<-|\+\+|--|==|!=|:=|\.\.\.|[+\-*/%&])', Operator), diff --git a/pygments/lexers/graphics.py b/pygments/lexers/graphics.py index b0b9145..61031a4 100644 --- a/pygments/lexers/graphics.py +++ b/pygments/lexers/graphics.py @@ -425,7 +425,7 @@ class AsymptoteLexer(RegexLexer): ], 'statements': [ # simple string (TeX friendly) - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # C style string (with character escapes) (r"'", String, 'string'), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), @@ -775,7 +775,7 @@ class PovrayLexer(RegexLexer): (r'[0-9]+\.[0-9]*', Number.Float), (r'\.[0-9]+', Number.Float), (r'[0-9]+', Number.Integer), - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r'\s+', Text), ] } diff --git a/pygments/lexers/haxe.py b/pygments/lexers/haxe.py index b357508..e85e61c 100644 --- a/pygments/lexers/haxe.py +++ b/pygments/lexers/haxe.py @@ -467,7 +467,7 @@ class HaxeLexer(ExtendedRegexLexer): (r'"', String.Double, ('#pop', 'expr-chain', 'string-double')), # EReg - (r'~/(\\\\|\\/|[^/\n])*/[gimsu]*', String.Regex, ('#pop', 'expr-chain')), + (r'~/(\\\\|\\[^\\]|[^/\\\n])*/[gimsu]*', String.Regex, ('#pop', 'expr-chain')), # Array (r'\[', Punctuation, ('#pop', 'expr-chain', 'array-decl')), @@ -722,7 +722,7 @@ class HaxeLexer(ExtendedRegexLexer): (r'"', String.Double, ('#pop', 'string-double')), # EReg - (r'~/(\\\\|\\/|[^/\n])*/[gim]*', String.Regex, '#pop'), + (r'~/(\\\\|\\[^\\]|[^/\\\n])*/[gim]*', String.Regex, '#pop'), # Array (r'\[', Operator, ('#pop', 'array-decl')), diff --git a/pygments/lexers/iolang.py b/pygments/lexers/iolang.py index f33c871..b108939 100644 --- a/pygments/lexers/iolang.py +++ b/pygments/lexers/iolang.py @@ -37,7 +37,7 @@ class IoLexer(RegexLexer): (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), (r'/\+', Comment.Multiline, 'nestedcomment'), # DoubleQuotedString - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # Operators (r'::=|:=|=|\(|\)|;|,|\*|-|\+|>|<|@|!|/|\||\^|\.|%|&|\[|\]|\{|\}', Operator), diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 7f6d166..f008c1d 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -285,12 +285,12 @@ class ScalaLexer(RegexLexer): (r'(import|package)(\s+)', bygroups(Keyword, Text), 'import'), (r'(type)(\s+)', bygroups(Keyword, Text), 'type'), (r'""".*?"""(?!")', String), - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), (u"'%s" % idrest, Text.Symbol), (r'[fs]"""', String, 'interptriplestring'), # interpolated strings (r'[fs]"', String, 'interpstring'), # interpolated strings - (r'raw"(\\\\|\\"|[^"])*"', String), # raw strings + (r'raw"(\\\\|\\[^\\]|[^"\\])*"', String), # raw strings # (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator, # Name.Attribute)), (idrest, Name), @@ -612,7 +612,7 @@ class IokeLexer(RegexLexer): # Symbols (r':[\w!:?]+', String.Symbol), (r'[\w!:?]+:(?![\w!?])', String.Other), - (r':"(\\\\|\\"|[^"])*"', String.Symbol), + (r':"(\\\\|\\[^\\]|[^"\\])*"', String.Symbol), # Documentation (r'((?<=fn\()|(?<=fnx\()|(?<=method\()|(?<=macro\()|(?<=lecro\()' @@ -830,7 +830,7 @@ class ClojureLexer(RegexLexer): (r'0x-?[abcdef\d]+', Number.Hex), # strings, symbols and characters - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r"'" + valid_name, String.Symbol), (r"\\(.|[a-z]+)", String.Char), @@ -973,7 +973,7 @@ class CeylonLexer(RegexLexer): (r'(class|interface|object|alias)(\s+)', bygroups(Keyword.Declaration, Text), 'class'), (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r"'\\.'|'[^\\]'|'\\\{#[0-9a-fA-F]{4}\}'", String.Char), (r'(\.)([a-z_]\w*)', bygroups(Operator, Name.Attribute)), @@ -1049,7 +1049,7 @@ class KotlinLexer(RegexLexer): (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), (r'@"(""|[^"])*"', String), - (r'"(\\\\|\\"|[^"\n])*["\n]', String), + (r'"(\\\\|\\[^\\]|[^"\\\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFL]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), @@ -1646,8 +1646,8 @@ class SarlLexer(RegexLexer): (r'(agent|annotation|artifact|behavior|capacity|class|enum|event|interface|skill|space)(\s+)', bygroups(Keyword.Declaration, Text), 'class'), (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), - (r'"(\\\\|\\"|[^"])*"', String), - (r"'(\\\\|\\'|[^'])*'", String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r'[a-zA-Z_]\w*:', Name.Label), (r'[a-zA-Z_$]\w*', Name), (r'[~^*!%&\[\](){}<>\|+=:;,./?-]', Operator), diff --git a/pygments/lexers/lisp.py b/pygments/lexers/lisp.py index 601d5a5..9e9b9ee 100644 --- a/pygments/lexers/lisp.py +++ b/pygments/lexers/lisp.py @@ -119,7 +119,7 @@ class SchemeLexer(RegexLexer): # (r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number), # strings, symbols and characters - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r"'" + valid_name, String.Symbol), (r"#\\([()/'\"._!§$%& ?=+-]|[a-zA-Z0-9]+)", String.Char), @@ -403,7 +403,7 @@ class HyLexer(RegexLexer): (r'0[xX][a-fA-F0-9]+', Number.Hex), # strings, symbols and characters - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r"'" + valid_name, String.Symbol), (r"\\(.|[a-z]+)", String.Char), (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), @@ -1490,7 +1490,7 @@ class NewLispLexer(RegexLexer): (r'\s+', Text), # strings, symbols and characters - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # braces (r'\{', String, "bracestring"), @@ -2385,7 +2385,7 @@ class CPSALexer(SchemeLexer): # (r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number), # strings, symbols and characters - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r"'" + valid_name, String.Symbol), (r"#\\([()/'\"._!§$%& ?=+-]|[a-zA-Z0-9]+)", String.Char), @@ -2598,7 +2598,7 @@ class XtlangLexer(RegexLexer): (r'(#b|#o|#x)[\d.]+', Number), # strings - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # true/false constants (r'(#t|#f)', Name.Constant), @@ -2669,7 +2669,7 @@ class FennelLexer(RegexLexer): (r'-?\d+\.\d+', Number.Float), (r'-?\d+', Number.Integer), - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r"'(\\\\|\\'|[^'])*'", String), # these are technically strings, but it's worth visually diff --git a/pygments/lexers/make.py b/pygments/lexers/make.py index f67f109..8b9477d 100644 --- a/pygments/lexers/make.py +++ b/pygments/lexers/make.py @@ -93,8 +93,8 @@ class BaseMakefileLexer(RegexLexer): (r'([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), # strings - (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double), - (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), # targets (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), 'block-header'), diff --git a/pygments/lexers/parsers.py b/pygments/lexers/parsers.py index 8bcbfc5..bada329 100644 --- a/pygments/lexers/parsers.py +++ b/pygments/lexers/parsers.py @@ -65,10 +65,10 @@ class RagelLexer(RegexLexer): (r'[+-]?[0-9]+', Number.Integer), ], 'literals': [ - (r'"(\\\\|\\"|[^"])*"', String), # double quote string - (r"'(\\\\|\\'|[^'])*'", String), # single quote string - (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals - (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), + (r'\[(\\\\|\\[^\\]|[^\\\]])*\]', String), # square bracket literals + (r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', String.Regex), # regular expressions ], 'identifiers': [ (r'[a-zA-Z_]\w*', Name.Variable), @@ -107,15 +107,15 @@ class RagelLexer(RegexLexer): r'[^\\]\\[{}]', # allow escaped { or } # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string + r'"(\\\\|\\[^\\]|[^"\\])*"', + r"'(\\\\|\\[^\\]|[^'\\])*'", r'//.*$\n?', # single line comment r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment r'\#.*$\n?', # ruby comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. - r'/(?!\*)(\\\\|\\/|[^/])*/', + r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # / is safe now that we've handled regex and javadoc comments r'/', @@ -148,12 +148,12 @@ class RagelEmbeddedLexer(RegexLexer): r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string + r'"(\\\\|\\[^\\]|[^"\\])*"', + r"'(\\\\|\\[^\\]|[^'\\])*'", r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment r'//.*$\n?', # single line comment r'\#.*$\n?', # ruby/ragel comment - r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression + r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # regular expression # / is safe now that we've handled regex and javadoc comments r'/', @@ -183,7 +183,7 @@ class RagelEmbeddedLexer(RegexLexer): # specifically allow regex followed immediately by * # so it doesn't get mistaken for a comment - r'/(?!\*)(\\\\|\\/|[^/])*/\*', + r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/\*', # allow / as long as it's not followed by another / or by a * r'/(?=[^/*]|$)', @@ -194,9 +194,9 @@ class RagelEmbeddedLexer(RegexLexer): )) + r')+', # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal + r'"(\\\\|\\[^\\]|[^"\\])*"', + r"'(\\\\|\\[^\\]|[^'\\])*'", + r"\[(\\\\|\\[^\\]|[^\]\\])*\]", # square bracket literal r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment r'//.*$\n?', # single line comment r'\#.*$\n?', # ruby/ragel comment @@ -422,8 +422,8 @@ class AntlrLexer(RegexLexer): (r':', Punctuation), # literals - (r"'(\\\\|\\'|[^'])*'", String), - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r'<<([^>]|>[^>])>>', String), # identifiers # Tokens start with capital letter. @@ -462,14 +462,14 @@ class AntlrLexer(RegexLexer): r'[^${}\'"/\\]+', # exclude unsafe characters # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string + r'"(\\\\|\\[^\\]|[^"\\])*"', + r"'(\\\\|\\[^\\]|[^'\\])*'", r'//.*$\n?', # single line comment r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. - r'/(?!\*)(\\\\|\\/|[^/])*/', + r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # backslashes are okay, as long as we are not backslashing a % r'\\(?!%)', @@ -489,14 +489,14 @@ class AntlrLexer(RegexLexer): r'[^$\[\]\'"/]+', # exclude unsafe characters # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string + r'"(\\\\|\\[^\\]|[^"\\])*"', + r"'(\\\\|\\[^\\]|[^'\\])*'", r'//.*$\n?', # single line comment r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. - r'/(?!\*)(\\\\|\\/|[^/])*/', + r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # Now that we've handled regex and javadoc comments # it's safe to let / through. @@ -736,8 +736,8 @@ class TreetopBaseLexer(RegexLexer): 'rule': [ include('space'), include('end'), - (r'"(\\\\|\\"|[^"])*"', String.Double), - (r"'(\\\\|\\'|[^'])*'", String.Single), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)), (r'[A-Za-z_]\w*', Name), (r'[()]', Punctuation), diff --git a/pygments/lexers/php.py b/pygments/lexers/php.py index bd4a237..8b49de7 100644 --- a/pygments/lexers/php.py +++ b/pygments/lexers/php.py @@ -79,8 +79,8 @@ class ZephirLexer(RegexLexer): (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), - (r'"(\\\\|\\"|[^"])*"', String.Double), - (r"'(\\\\|\\'|[^'])*'", String.Single), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), ] } diff --git a/pygments/lexers/prolog.py b/pygments/lexers/prolog.py index 7078362..ee5bf2e 100644 --- a/pygments/lexers/prolog.py +++ b/pygments/lexers/prolog.py @@ -227,7 +227,7 @@ class LogtalkLexer(RegexLexer): # Existential quantifier (r'\^', Operator), # Strings - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # Punctuation (r'[()\[\],.|]', Text), # Atoms @@ -277,7 +277,7 @@ class LogtalkLexer(RegexLexer): (r"[a-z][a-zA-Z0-9_]*", Text), (r"'", String, 'quoted_atom'), # Strings - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # End of entity-opening directive (r'([)]\.)', Text, 'root'), # Scope operator diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py index 8bcbde6..ac79a58 100644 --- a/pygments/lexers/ruby.py +++ b/pygments/lexers/ruby.py @@ -108,7 +108,7 @@ class RubyLexer(ExtendedRegexLexer): # easy ones (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol), (words(RUBY_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol), - (r":'(\\\\|\\'|[^'])*'", String.Symbol), + (r":'(\\\\|\\[^\\]|[^'\\])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), (r':"', String.Symbol, 'simple-sym'), (r'([a-zA-Z_]\w*)(:)(?!:)', @@ -451,26 +451,26 @@ class FancyLexer(RegexLexer): tokens = { # copied from PerlLexer: 'balanced-regex': [ - (r'/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex, '#pop'), - (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'), + (r'/(\\\\|\\[^\\]|[^/\\])*/[egimosx]*', String.Regex, '#pop'), + (r'!(\\\\|\\[^\\]|[^!\\])*![egimosx]*', String.Regex, '#pop'), (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'), - (r'\{(\\\\|\\\}|[^}])*\}[egimosx]*', String.Regex, '#pop'), - (r'<(\\\\|\\>|[^>])*>[egimosx]*', String.Regex, '#pop'), - (r'\[(\\\\|\\\]|[^\]])*\][egimosx]*', String.Regex, '#pop'), - (r'\((\\\\|\\\)|[^)])*\)[egimosx]*', String.Regex, '#pop'), - (r'@(\\\\|\\@|[^@])*@[egimosx]*', String.Regex, '#pop'), - (r'%(\\\\|\\%|[^%])*%[egimosx]*', String.Regex, '#pop'), - (r'\$(\\\\|\\\$|[^$])*\$[egimosx]*', String.Regex, '#pop'), + (r'\{(\\\\|\\[^\\]|[^}\\])*\}[egimosx]*', String.Regex, '#pop'), + (r'<(\\\\|\\[^\\]|[^>\\])*>[egimosx]*', String.Regex, '#pop'), + (r'\[(\\\\|\\[^\\]|[^\]\\])*\][egimosx]*', String.Regex, '#pop'), + (r'\((\\\\|\\[^\\]|[^)\\])*\)[egimosx]*', String.Regex, '#pop'), + (r'@(\\\\|\\[^\\]|[^@\\])*@[egimosx]*', String.Regex, '#pop'), + (r'%(\\\\|\\[^\\]|[^%\\])*%[egimosx]*', String.Regex, '#pop'), + (r'\$(\\\\|\\[^\\]|[^$\\])*\$[egimosx]*', String.Regex, '#pop'), ], 'root': [ (r'\s+', Text), # balanced delimiters (copied from PerlLexer): - (r's\{(\\\\|\\\}|[^}])*\}\s*', String.Regex, 'balanced-regex'), - (r's<(\\\\|\\>|[^>])*>\s*', String.Regex, 'balanced-regex'), - (r's\[(\\\\|\\\]|[^\]])*\]\s*', String.Regex, 'balanced-regex'), - (r's\((\\\\|\\\)|[^)])*\)\s*', String.Regex, 'balanced-regex'), - (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex), + (r's\{(\\\\|\\[^\\]|[^}\\])*\}\s*', String.Regex, 'balanced-regex'), + (r's<(\\\\|\\[^\\]|[^>\\])*>\s*', String.Regex, 'balanced-regex'), + (r's\[(\\\\|\\[^\\]|[^\]\\])*\]\s*', String.Regex, 'balanced-regex'), + (r's\((\\\\|\\[^\\]|[^)\\])*\)\s*', String.Regex, 'balanced-regex'), + (r'm?/(\\\\|\\[^\\]|[^///\n])*/[gcimosx]*', String.Regex), (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'), # Comments @@ -478,9 +478,9 @@ class FancyLexer(RegexLexer): # Symbols (r'\'([^\'\s\[\](){}]+|\[\])', String.Symbol), # Multi-line DoubleQuotedString - (r'"""(\\\\|\\"|[^"])*"""', String), + (r'"""(\\\\|\\[^\\]|[^\\])*?"""', String), # DoubleQuotedString - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # keywords (r'(def|class|try|catch|finally|retry|return|return_local|match|' r'case|->|=>)\b', Keyword), diff --git a/pygments/lexers/scripting.py b/pygments/lexers/scripting.py index a340f8e..90701e7 100644 --- a/pygments/lexers/scripting.py +++ b/pygments/lexers/scripting.py @@ -284,7 +284,7 @@ class ChaiscriptLexer(RegexLexer): (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), (r'"', String.Double, 'dqstring'), - (r"'(\\\\|\\'|[^'])*'", String.Single), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), ], 'dqstring': [ (r'\$\{[^"}]+?\}', String.Interpol), @@ -690,7 +690,7 @@ class AppleScriptLexer(RegexLexer): (r'\b(%s)s?\b' % '|'.join(StudioClasses), Name.Builtin), (r'\b(%s)\b' % '|'.join(StudioCommands), Name.Builtin), (r'\b(%s)\b' % '|'.join(References), Name.Builtin), - (r'"(\\\\|\\"|[^"])*"', String.Double), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), (r'\b(%s)\b' % Identifiers, Name.Variable), (r'[-+]?(\d+\.\d*|\d*\.\d+)(E[-+][0-9]+)?', Number.Float), (r'[-+]?\d+', Number.Integer), @@ -834,7 +834,7 @@ class MOOCodeLexer(RegexLexer): # Numbers (r'(0|[1-9][0-9_]*)', Number.Integer), # Strings - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), # exceptions (r'(E_PERM|E_DIV)', Name.Exception), # db-refs @@ -925,7 +925,7 @@ class HybrisLexer(RegexLexer): 'Runnable', 'CGI', 'ClientSocket', 'Socket', 'ServerSocket', 'File', 'Console', 'Directory', 'Exception'), suffix=r'\b'), Keyword.Type), - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), diff --git a/pygments/lexers/supercollider.py b/pygments/lexers/supercollider.py index d0d033a..ed4218a 100644 --- a/pygments/lexers/supercollider.py +++ b/pygments/lexers/supercollider.py @@ -84,7 +84,7 @@ class SuperColliderLexer(RegexLexer): (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), - (r'"(\\\\|\\"|[^"])*"', String.Double), - (r"'(\\\\|\\'|[^'])*'", String.Single), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), ] } diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py index d909662..6cfed37 100644 --- a/pygments/lexers/templates.py +++ b/pygments/lexers/templates.py @@ -179,8 +179,8 @@ class SmartyLexer(RegexLexer): (r'(true|false|null)\b', Keyword.Constant), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), - (r'"(\\\\|\\"|[^"])*"', String.Double), - (r"'(\\\\|\\'|[^'])*'", String.Single), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r'[a-zA-Z_]\w*', Name.Attribute) ] } @@ -252,8 +252,8 @@ class VelocityLexer(RegexLexer): (r'\$!?\{?', Punctuation, 'variable'), (r'\s+', Text), (r'[,:]', Punctuation), - (r'"(\\\\|\\"|[^"])*"', String.Double), - (r"'(\\\\|\\'|[^'])*'", String.Single), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r"\b[0-9]+\b", Number), (r'(true|false|null)\b', Keyword.Constant), @@ -373,8 +373,8 @@ class DjangoLexer(RegexLexer): (r'(loop|block|super|forloop)\b', Name.Builtin), (r'[a-zA-Z_][\w-]*', Name.Variable), (r'\.\w+', Name.Variable), - (r':?"(\\\\|\\"|[^"])*"', String.Double), - (r":?'(\\\\|\\'|[^'])*'", String.Single), + (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r'([{}()\[\]+\-*/%,:~]|[><=]=?|!=)', Operator), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), @@ -1852,8 +1852,8 @@ class HandlebarsLexer(RegexLexer): include('variable'), # borrowed from DjangoLexer - (r':?"(\\\\|\\"|[^"])*"', String.Double), - (r":?'(\\\\|\\'|[^'])*'", String.Single), + (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), ] @@ -2165,8 +2165,8 @@ class TwigLexer(RegexLexer): (_ident_inner, Name.Variable), (r'\.' + _ident_inner, Name.Variable), (r'\.[0-9]+', Number), - (r':?"(\\\\|\\"|[^"])*"', String.Double), - (r":?'(\\\\|\\'|[^'])*'", String.Single), + (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r'([{}()\[\]+\-*/,:~%]|\.\.|\?|:|\*\*|\/\/|!=|[><=]=?)', Operator), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), @@ -2245,8 +2245,8 @@ class Angular2Lexer(RegexLexer): # Literals (r':?(true|false)', String.Boolean), - (r':?"(\\\\|\\"|[^"])*"', String.Double), - (r":?'(\\\\|\\'|[^'])*'", String.Single), + (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), diff --git a/pygments/lexers/textedit.py b/pygments/lexers/textedit.py index 3c6fb57..04c8015 100644 --- a/pygments/lexers/textedit.py +++ b/pygments/lexers/textedit.py @@ -69,8 +69,8 @@ class AwkLexer(RegexLexer): (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), - (r'"(\\\\|\\"|[^"])*"', String.Double), - (r"'(\\\\|\\'|[^'])*'", String.Single), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), ] } diff --git a/pygments/lexers/urbi.py b/pygments/lexers/urbi.py index 72349cb..28b358d 100644 --- a/pygments/lexers/urbi.py +++ b/pygments/lexers/urbi.py @@ -117,11 +117,11 @@ class UrbiscriptLexer(ExtendedRegexLexer): ], 'string.double': [ (r'((?:\\\\|\\"|[^"])*?)(\\B\((\d+)\)\()', blob_callback), - (r'(\\\\|\\"|[^"])*?"', String.Double, '#pop'), + (r'(\\\\|\\[^\\]|[^"\\])*?"', String.Double, '#pop'), ], 'string.single': [ (r"((?:\\\\|\\'|[^'])*?)(\\B\((\d+)\)\()", blob_callback), - (r"(\\\\|\\'|[^'])*?'", String.Single, '#pop'), + (r"(\\\\|\\[^\\]|[^'\\])*?'", String.Single, '#pop'), ], # from http://pygments.org/docs/lexerdevelopment/#changing-states 'comment': [ diff --git a/pygments/lexers/webmisc.py b/pygments/lexers/webmisc.py index b39334b..a1a73f4 100644 --- a/pygments/lexers/webmisc.py +++ b/pygments/lexers/webmisc.py @@ -855,8 +855,8 @@ class QmlLexer(RegexLexer): (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), - (r'"(\\\\|\\"|[^"])*"', String.Double), - (r"'(\\\\|\\'|[^'])*'", String.Single), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), ] } diff --git a/pygments/lexers/x10.py b/pygments/lexers/x10.py index eac87b1..317c66c 100644 --- a/pygments/lexers/x10.py +++ b/pygments/lexers/x10.py @@ -62,7 +62,7 @@ class X10Lexer(RegexLexer): (r'\b(%s)\b' % '|'.join(types), Keyword.Type), (r'\b(%s)\b' % '|'.join(values), Keyword.Constant), (r'\b(%s)\b' % '|'.join(modifiers), Keyword.Declaration), - (r'"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), (r'.', Text) ], -- 1.8.3.1