-
-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
Copy pathbib-file-generator.py
45 lines (36 loc) · 1.79 KB
/
bib-file-generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# Creates "large-library.bib" with 100k entries.
# The file is written in UTF8 and makes use of the unicode character U+0304 (https://www.compart.com/en/unicode/U+0304)
# to create an overline on large roman numbers using the technicue "Vinculum" (https://en.wikipedia.org/wiki/Roman_numerals#Vinculum).
# The numbers are used in the journal title.
# For pseudonymization BibTeX files, org.jabref.logic.pseudonymization.PseudonymizationTest#pseudonymizeLibraryFiley can be used.
number_of_entries = 100_000
# Adapted from: https://stackoverflow.com/a/50012689/873282
def int_to_roman(num):
_values = [
1000000, 900000, 500000, 400000, 100000, 90000, 50000, 40000, 10000, 9000, 5000, 4000, 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1]
_strings = [
'M', 'CM', 'D', 'CD', 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV', "M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"]
result = ""
decimal = num
while decimal > 0:
for i in range(len(_values)):
if decimal >= _values[i]:
if _values[i] > 1000:
result += u'\u0304'.join(list(_strings[i])) + u'\u0304'
else:
result += _strings[i]
decimal -= _values[i]
break
return result
with open("generated-large-library.bib", 'w', encoding='utf-8') as file:
for i in range(1, number_of_entries + 1):
year = 1900 + (i - 1) % (2025 - 1900)
entry = f"""@article{{id{i:06d},
title = {{This is my title{i}}},
author = {{FirstnameA{i} LastnameA{i} and FirstnameB{i} LastnameB{i} and FirstnameC{i} LastnameC{i}}},
journal = {{Journal Title {int_to_roman(i)}}},
volume = {{{i}}},
year = {{{year}}},
}}
"""
file.write(entry)