Skip to content

Commit

Permalink
feat: added new bib parser in lua
Browse files Browse the repository at this point in the history
This is similar to the Vimscript parser ("vim"), but since it is in Lua
it is much faster and comparable to the current fastest parser
("bibtex").

refer: #2786
  • Loading branch information
lervag committed Oct 25, 2023
1 parent e626412 commit 6b42188
Show file tree
Hide file tree
Showing 15 changed files with 358 additions and 53 deletions.
13 changes: 13 additions & 0 deletions .luarc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"$schema": "https://raw.githubusercontent.com/LuaLS/vscode-lua/master/setting/schema.json",
"runtime": {
"version": "LuaJIT"
},
"workspace": {
"library": [
"$VIMRUNTIME",
"${3rd}/luv/library"
],
"checkThirdParty": false
}
}
5 changes: 5 additions & 0 deletions .stylua.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
column_width = 80
indent_type = "Spaces"
indent_width = 2
quote_style = "AutoPreferDouble"
call_parentheses = "None"
11 changes: 7 additions & 4 deletions autoload/vimtex/context/cite.vim
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ function! s:handler.get_actions() abort dict " {{{1
call vimtex#paths#pushd(b:vimtex.root)
let l:entries = []
for l:file in vimtex#bib#files()
let l:entries += vimtex#parser#bib(l:file, {'backend': 'vim'})
let l:entries += vimtex#parser#bib(
\ l:file,
\ {'backend': has('nvim') ? 'lua' : 'vim'}
\)
endfor
call vimtex#paths#popd()

Expand Down Expand Up @@ -119,7 +122,7 @@ function! s:actions.show() abort dict " {{{1
\ ['Normal', ','],
\])

for l:x in ['key', 'type', 'vimtex_lnum', 'vimtex_file']
for l:x in ['key', 'type', 'source_lnum', 'source_file']
if has_key(l:entry, l:x)
call remove(l:entry, l:x)
endif
Expand All @@ -144,10 +147,10 @@ endfunction

" }}}1
function! s:actions.edit() abort dict " {{{1
execute 'edit' self.entry.vimtex_file
execute 'edit' self.entry.source_file
filetype detect

call vimtex#pos#set_cursor(self.entry.vimtex_lnum, 0)
call vimtex#pos#set_cursor(self.entry.source_lnum, 0)
normal! zv
endfunction

Expand Down
4 changes: 3 additions & 1 deletion autoload/vimtex/options.vim
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,9 @@ function! vimtex#options#init() abort " {{{1
call s:init_option('vimtex_lint_chktex_ignore_warnings',
\ '-n1 -n3 -n8 -n25 -n36')

call s:init_option('vimtex_parser_bib_backend', 'bibtex')
call s:init_option('vimtex_parser_bib_backend',
\ has('nvim') ? 'lua' : 'bibtex'
\)
call s:init_option('vimtex_parser_cmd_separator_check',
\ 'vimtex#cmd#parser_separator_check')

Expand Down
18 changes: 15 additions & 3 deletions autoload/vimtex/parser/bib.vim
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,18 @@ endfunction

" }}}1

function! s:parse_with_lua(file) abort " {{{1
if !has('nvim')
call vimtex#log#error(
\ 'bib parser backend "lua" only works with neovim!')
return []
endif

return luaeval('require("vimtex.bibparser").parse(_A)', a:file)
endfunction

" }}}1

function! s:parse_with_vim(file) abort " {{{1
" Adheres to the format description found here:
" http://www.bibtex.org/Format/
Expand Down Expand Up @@ -297,8 +309,8 @@ function! s:parse_type(file, lnum, line, current, strings, entries) abort " {{{1

let a:current.level = 1
let a:current.body = ''
let a:current.vimtex_file = a:file
let a:current.vimtex_lnum = a:lnum
let a:current.source_file = a:file
let a:current.source_lnum = a:lnum

if l:type ==# 'string'
return s:parse_string(l:matches[2], a:current, a:strings)
Expand Down Expand Up @@ -420,7 +432,7 @@ function! s:get_value_string(body, head, strings) abort " {{{1
elseif a:body[a:head] ==# '"'
let l:index = match(a:body, '\\\@<!"', a:head+1)
if l:index < 0
return ['s:get_value_string failed', '']
return ['s:get_value_string failed', -1]
endif

let l:value = a:body[a:head+1:l:index-1]
Expand Down
18 changes: 12 additions & 6 deletions doc/vimtex.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1310,8 +1310,14 @@ OPTIONS *vimtex-options*
This option sets the desired default backend for parsing bibliographies.
This is used e.g. for gathering completion candidates. Possible values:

`bibtex`: The fastest, but most hacky solution. Should work well in most
cases.
`bibtex`: The fastest, but most "hacky" solution. Still, time has proved
that this works well!

`vim`: The slowest but perhaps most robust solution, as it does not
require any external utilities.

`lua`: A Lua implementation of the Vim backend. About as fast as the
`bibtex` parser, but this only works on Neovim.

`bibparse`: Also fast, but might be more robust.

Expand All @@ -1335,17 +1341,17 @@ OPTIONS *vimtex-options*
(see |if_pyth| and |py3|) and that the `bibtexparser`
Python module is installed and available.

`vim`: The slowest but perhaps most robust solution, as it does not
require any external utilities.

Some people may want to conditionally change this option if a backend is
available. For example: >vim

if executable('bibparse')
let g:vimtex_parser_bib_backend = 'bibparse'
endif
<
Default value: `bibtex`
Default value:

Vim: `bibtex`
Neovim: `lua`

*g:vimtex_parser_cmd_separator_check*
This option specifies the policy for deciding whether successive groups of
Expand Down
235 changes: 235 additions & 0 deletions lua/vimtex/bibparser.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
-- VimTeX - LaTeX plugin for Vim
--
-- Maintainer: Karl Yngve Lervåg
-- Email: [email protected]
--

---Parse input line as middle or tail part of an entry
---@param item table The current entry
---@param line string The new line to parse
---@return table item Current entry with updated body
local function parse_tail(item, line)
item.level = item.level
+ line:gsub("[^{]", ""):len()
- line:gsub("[^}]", ""):len()
if item.level > 0 then
item.body = item.body .. line
else
item.body = item.body .. vim.fn.matchstr(line, [[.*\ze}]])
item.parsed = true
end

return item
end

---Parse the head part of an entry
---@param file string The path to the bibtex file-asd
---@param lnum integer The line number for the entry
---@param line string The line content of the entry
---@return table item Current entry with updated body
local function parse_head(file, lnum, line)
local matches = vim.fn.matchlist(line, [[\v^\@(\w+)\s*\{\s*(.*)]])
if #matches == 0 then
return {}
end

local type = string.lower(matches[2])
if type == "preamble" or type == "comment" then
return {}
end

return parse_tail({
level = 1,
body = "",
source_file = file,
source_lnum = lnum,
type = type,
}, matches[3])
end

---Parse the value part of a bib entry tag until separating comma or end.
---The value is likely a quoted string and may possibly be a concatenation of
---strings. The value may also contain abbreviations defined by @string
---entries.
---@param body string
---@param head integer
---@param strings table<string, string>
---@param pre_value string
---@return string value The parsed value
---@return integer head New head position
local function get_tag_value_concat(body, head, strings, pre_value)
local value = ""
local new_head = head

if body:sub(head + 1, head + 1) == "{" then
local sum = 1
local i = head + 1
local n = #body

while sum > 0 and i <= n do
local char = body:sub(i + 1, i + 1)
if char == "{" then
sum = sum + 1
elseif char == "}" then
sum = sum - 1
end

i = i + 1
end

value = body:sub(head + 2, i - 1)
new_head = vim.fn.matchend(body, [[^\s*]], i)
elseif body:sub(head + 1, head + 1) == [["]] then
local index = vim.fn.match(body, [[\\\@<!"]], head + 1)
if index < 0 then
return "bibparser.lua: get_tag_value_concat failed", -1
end

value = body:sub(head + 1 + 1, index - 1 + 1)
new_head = vim.fn.matchend(body, [[^\s*]], index + 1)
elseif vim.fn.match(body, [[^\w]], head) >= 0 then
value = vim.fn.matchstr(body, [[^\w[0-9a-zA-Z_-]*]], head)
new_head = vim.fn.matchend(body, [[^\s*]], head + vim.fn.strlen(value))
value = vim.fn.get(strings, value, [[@(]] .. value .. [[)]])
end

if body:sub(new_head + 1, new_head + 1) == "#" then
new_head = vim.fn.matchend(body, [[^\s*]], new_head + 1)
return get_tag_value_concat(body, new_head, strings, pre_value .. value)
end

return pre_value .. value, vim.fn.matchend(body, [[^,\s*]], new_head)
end

---Parse the value part of a bib entry tag until separating comma or end.
---@param body string
---@param head integer
---@param strings table<string, string>
---@return string value The parsed value
---@return integer head New head position
local function get_tag_value(body, head, strings)
-- First check if the value is simply a number
if vim.regex([[\d]]):match_str(body:sub(head + 1, head + 1)) then
local value = vim.fn.matchstr(body, [[^\d\+]], head)
local new_head =
vim.fn.matchend(body, [[^\s*,\s*]], head + vim.fn.len(value))
return value, new_head
end

return get_tag_value_concat(body, head, strings, "")
end

---Parse tag from string (e.g. author, title, etc)
---@param body string Raw text in which to find tag
---@param head integer Where to start search for tag
---@return string tag_name The parsed tag
---@return integer head New head position
local function get_tag_name(body, head)
local matches = vim.fn.matchlist(body, [[^\v([-_:0-9a-zA-Z]+)\s*\=\s*]], head)
if #matches == 0 then
return "", -1
end

return string.lower(matches[2]), head + vim.fn.strlen(matches[1])
end

---Parse an item
---@param item table
---@param strings table<string, string>
---@return nil
local function parse_item(item, strings)
local parts = vim.fn.matchlist(item.body, [[\v^([^, ]*)\s*,\s*(.*)]])

item.key = parts[2]
if item.key == nil or item.key == "" then
return nil
end

item.level = nil
item.parsed = nil
item.body = nil

local body = parts[3]
local tag = ""
local value
local head = 0
while head >= 0 do
if tag == "" then
tag, head = get_tag_name(body, head)
else
value, head = get_tag_value(body, head, strings)
item[tag] = value
tag = ""
end
end

return item
end

---Parse a string entry
---@param raw_string string
---@return string key
---@return string value
local function parse_string(raw_string)
local matches =
vim.fn.matchlist(raw_string, [[\v^\s*(\S+)\s*\=\s*"(.*)"\s*$]])
if vim.fn.empty(matches[3]) == 0 then
return matches[2], matches[3]
end

matches = vim.fn.matchlist(raw_string, [[\v^\s*(\S+)\s*\=\s*\{(.*)\}\s*$]])
if vim.fn.empty(matches[3]) == 0 then
return matches[2], matches[3]
end

return "", ""
end

local M = {}

---Parse the specified bibtex file
---The parser adheres to the format description found here:
---http://www.bibtex.org/Format/
---@param file string
---@return table[]
M.parse = function(file)
if file == nil or not vim.fn.filereadable(file) then
return {}
end

local items = {}
local strings = {}

local item = {}
local key, value
local lines = vim.fn.readfile(file)
for lnum = 1, #lines do
local line = lines[lnum]

if vim.tbl_isempty(item) then
item = parse_head(file, lnum, line)
else
item = parse_tail(item, line)
end

if item.parsed then
if item.type == "string" then
key, value = parse_string(item.body)
if key ~= "" then
strings[key] = value
end
else
table.insert(items, item)
end
item = {}
end
end

local result = {}
for _, x in ipairs(items) do
table.insert(result, parse_item(x, strings))
end
return result
end

return M
3 changes: 2 additions & 1 deletion test/test-completion-bibtex-speed/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
MYVIM ?= nvim --clean --headless

test:
@INMAKE=1 $(MYVIM) -u bibspeed.vim
@INMAKE=1 BACKEND=bibtex $(MYVIM) -u bibspeed.vim
@INMAKE=1 BACKEND=vim $(MYVIM) -u bibspeed.vim
@INMAKE=1 BACKEND=lua $(MYVIM) -u bibspeed.vim
@#INMAKE=1 BACKEND=bibparse $(MYVIM) -u bibspeed.vim
@#INMAKE=1 BACKEND=bibtexparser $(MYVIM) -u bibspeed.vim
@rm -f nvim_servernames.log
Loading

0 comments on commit 6b42188

Please sign in to comment.