# Seq Pygments lexer based on Python's import re from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ default, words, combined, do_insertions from pygments.util import get_bool_opt, shebang_matches from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic, Other, Error from pygments import unistring as uni __all__ = ['SeqLexer'] line_re = re.compile('.*?\n') class SeqLexer(RegexLexer): name = 'Seq' aliases = ['seq'] filenames = ['*.seq'] mimetypes = [] flags = re.MULTILINE | re.UNICODE uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue) def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (still valid in Py3) (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsux%]', String.Interpol), # the new style '{}'.format(...) string formatting (r'\{' '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name '(\![sra])?' # conversion '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' '\}', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%{\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%|(\{{1,2})', ttype) # newlines are an error (use "nl" state) ] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'\A#!.+$', Comment.Hashbang), (r'#.*$', Comment.Single), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator), include('keywords'), (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'fromimport'), (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'import'), include('builtins'), include('magicfuncs'), include('magicvars'), include('backtick'), ('([skprR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'), ("([skprR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'), ('([skprR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'), ("([skprR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'), ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), include('name'), include('numbers'), ], 'funcname': [ include('magicfuncs'), ('[a-zA-Z_]\w*', Name.Function, '#pop'), default('#pop'), ], 'stringescape': [ (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'strings-single': innerstring_rules(String.Single), 'strings-double': innerstring_rules(String.Double), 'dqs': [ (r'"', String.Double, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings include('strings-double') ], 'sqs': [ (r"'", String.Single, '#pop'), (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings include('strings-single') ], 'tdqs': [ (r'"""', String.Double, '#pop'), include('strings-double'), (r'\n', String.Double) ], 'tsqs': [ (r"'''", String.Single, '#pop'), include('strings-single'), (r'\n', String.Single) ], } tokens['keywords'] = [ (words(( 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif', 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', 'as', 'with', 'match', 'case', 'pydef', 'type', 'extend', 'print', 'cimport', 'pyimport'), suffix=r'\b'), Keyword), (words(( 'True', 'False', 'None'), suffix=r'\b'), Keyword.Constant), ] seqwords = ['__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray', 'bytes', 'chr', 'classmethod', 'cmp', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'filter', 'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'map', 'max', 'memoryview', 'min', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'range', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'vars', 'zip', 'seq', 'byte', 'ptr', 'array', 'Kmer', 'Int', 'UInt', 'optional'] tokens['builtins'] = [ (words(seqwords, prefix=r'(?