-
Notifications
You must be signed in to change notification settings - Fork 53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use Pygments' highlighting for PTX and GCN #659
base: master
Are you sure you want to change the base?
Conversation
Codecov ReportAttention: Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## master #659 +/- ##
==========================================
- Coverage 72.84% 71.89% -0.96%
==========================================
Files 24 24
Lines 3300 3369 +69
==========================================
+ Hits 2404 2422 +18
- Misses 896 947 +51 ☔ View full report in Codecov by Sentry. |
36f743c
to
eb857fb
Compare
Your PR no longer requires formatting changes. Thank you for your contribution! |
eb857fb
to
01fc0fd
Compare
01fc0fd
to
157ad9f
Compare
157ad9f
to
8355d2e
Compare
8355d2e
to
e13d4b5
Compare
e13d4b5
to
31dbfaf
Compare
31dbfaf
to
95fe459
Compare
95fe459
to
9a5a25a
Compare
Thanks for the PR (and sorry for abusing it to debug the formatter). I'm not particularly fond on relying on Pygments for highlighting, but since we already do, we may as well improve the situation. Using Highlights.jl would probably be preferable. Here's lexers proposed by LLMs: @lexer PTXLexer let
string = r"\"[^\"]*?\""
followsym = r"[a-zA-Z0-9_$]"
identifier = "(" * raw"[-a-zA-Z$._][\w\-$.]*|" * string * ")"
block_label = "(" * identifier * raw"|(\d+))"
instruction_keywords = [
"abs", "discard", "min", "shf", "vadd",
"activemask", "div", "mma", "shfl", "vadd2",
"add", "dp2a", "mov", "shl", "vadd4",
"addc", "dp4a", "movmatrix", "shr", "vavrg2",
"alloca", "elect", "mul", "sin", "vavrg4",
"and", "ex2", "mul24", "slct", "vmad",
"applypriority", "exit", "multimem", "sqrt", "vmax",
"atom", "fence", "nanosleep", "st", "vmax2",
"bar", "fma", "neg", "stackrestore", "vmax4",
"barrier", "fns", "not", "stacksave", "vmin",
"bfe", "getctarank", "or", "stmatrix", "vmin2",
"bfi", "griddepcontrol", "pmevent", "sub", "vmin4",
"bfind", "isspacep", "popc", "subc", "vote",
"bmsk", "istypep", "prefetch", "suld", "vset",
"bra", "ld", "prefetchu", "suq", "vset2",
"brev", "ldmatrix", "prmt", "sured", "vset4",
"brkpt", "ldu", "rcp", "sust", "vshl",
"brx", "lg2", "red", "szext", "vshr",
"call", "lop3", "redux", "tanh", "vsub",
"clz", "mad", "rem", "testp", "vsub2",
"cnot", "mad24", "ret", "tex", "vsub4",
"copysign", "madc", "rsqrt", "tld4", "wgmma",
"cos", "mapa", "sad", "trap", "wmma",
"cp", "match", "selp", "txq", "xor",
"createpolicy", "max", "set", "vabsdiff", "cvt",
"mbarrier", "setmaxnreg", "vabsdiff2", "cvta",
"membar", "setp", "vabsdiff4"
]
state_spaces = [
"reg", ".sreg", ".const", ".global",
".local", ".param", ".shared", ".tex",
".wide", ".loc"
]
directives = [
".address_size", ".explicitcluster", ".maxnreg", ".section",
".alias", ".extern", ".maxntid", ".shared",
".align", ".file", ".minnctapersm", ".sreg",
".branchtargets", ".func", ".noreturn", ".target",
".callprototype", ".global", ".param", ".tex",
".calltargets", ".loc", ".pragma", ".version",
".common", ".local", ".reg", ".visible",
".const", ".maxclusterrank", ".reqnctapercluster", ".weak",
".entry", ".maxnctapersm", ".reqntid"
]
types = [
".s8", ".s16", ".s32", ".s64",
".u8", ".u16", ".u32", ".u64",
".f16", ".f16x2", ".f32", ".f64",
".b8", ".b16", ".b32", ".b64",
".pred"
]
Dict(
:name => "PTX",
:description => "A lexer for NVIDIA PTX (Parallel Thread Execution) source code.",
:aliases => ["ptx"],
:filenames => ["*.ptx"],
:mimetypes => ["text/x-ptx"],
:tokens => Dict(
:root => [
(r"\s+", TEXT),
(r"//.*?\n", COMMENT_SINGLE),
(block_label * raw"\s*:", NAME_LABEL),
(words(instruction_keywords; prefix="\\b", suffix="\\b"), KEYWORD),
(words(state_spaces; prefix="\\b", suffix="\\b"), KEYWORD_PSEUDO),
(words(directives; prefix="\\b", suffix="\\b"), KEYWORD_RESERVED),
(words(types; prefix="\\b", suffix="\\b"), KEYWORD_TYPE),
(r"%[-a-zA-Z$._][\w\-$.]*", NAME_VARIABLE),
(r"%\d+", NAME_VARIABLE),
(r"c?\"[^\"]*?\"", STRING),
(r"[-a-zA-Z$._][\w\-$.]*", NAME_VARIABLE),
(r"0[xX][a-fA-F0-9]+", NUMBER_HEX),
(r"-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?", NUMBER_FLOAT),
(r"[=<>{}\[\]()*.,!]|x\b", PUNCTUATION),
(r"[*+-/]", OPERATOR),
(r";", PUNCTUATION),
],
),
)
end @lexer AMDGPULexer Dict(
:name => "AMDGPU",
:description => "A lexer for AMD GPU assembly code.",
:aliases => ["amdgpu"],
:filenames => ["*.isa"],
:tokens => Dict(
:root => [
(r"\s+", WHITESPACE),
(r"[\r\n]+", TEXT),
(r"([a-z_0-9])*:([a-z_0-9])*", NAME_ATTRIBUTE),
(r"[\[\]\(\),:\&]", TEXT),
(r"[;#]|//.*?\n", COMMENT_SINGLE),
(r"((s_)?(scratch|ds|buffer|flat|image)_[a-z0-9_]+)", KEYWORD_RESERVED),
(r"(_lo|_hi)", NAME_VARIABLE),
(r"(vmcnt|lgkmcnt|expcnt)", NAME_ATTRIBUTE),
(r"(attr[0-9]\.[a-z])", NAME_ATTRIBUTE),
(words([
"op", "vaddr", "vdata", "off", "soffset", "srsrc", "format",
"offset", "offen", "idxen", "glc", "dlc", "slc", "tfe", "lds",
"lit", "unorm"
], suffix=r"\b"), NAME_ATTRIBUTE),
(r"(label_[a-z0-9]+)", KEYWORD),
(r"(_L[0-9]*)", NAME_VARIABLE),
(r"(s|v)_[a-z0-9_]+", KEYWORD),
(r"(v[0-9.]+|vcc|exec|v)", NAME_VARIABLE),
(r"s[0-9.]+|s", NAME_VARIABLE),
(r"[0-9]+\.[^0-9]+", NUMBER_FLOAT),
(r"(0[xX][a-z0-9]+)|([0-9]+)", NUMBER_INTEGER)
]
)
) They don't render beautifully in the terminal though, so probably needs some tweaking (or a theme to accompany the |
As mentioned in #655, Pygments supports PTX starting from version 2.16, and GCN starting from version 2.8.
Before any highlighting is done, we check the version of Pygments and print a message if it is not installed or if a lexer isn't supported.
I added some very basic tests, but I do not know if Pygments is installed by default on GitHub's runners.
Upon testing on my Windows machine, I encountered some issues with pipes and Python (which occurred even before this PR), I am using some additional language packs so this might be the reason why. I went with the safe path of using temp files instead of pipes on Windows.
In the end it still takes about 0.2 to 0.5 seconds to run the highlighter.
It would be a good idea to have a solution in pure Julia, and Highlights.jl seems the closest to Pygments, but it supports only HTML or LaTeX output (even though there is an ANSI formatter in the docs).