-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDokuwikiXMLExport.php
289 lines (246 loc) · 8.77 KB
/
DokuwikiXMLExport.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
<?php
/**
* This is the Dokuwiki export for FINDOLOGIC.
* If any bugs occur, please submit a new issue
* @see https://github.com/findologic/dokuwiki-plugin-findologic-xml-export/issues/new
* @author Dominik Brader <[email protected]>
*/
if (!defined('DOKU_INC')) {
define('DOKU_INC', realpath(dirname(__FILE__) . '/../../../') . '/');
}
require_once(DOKU_INC . 'inc/init.php');
require_once(__DIR__ . '/PageGetter.php');
require(__DIR__ . '/vendor/autoload.php');
use FINDOLOGIC\Export\Exporter;
use FINDOLOGIC\Export\Data\Ordernumber;
use FINDOLOGIC\Export\Data\Attribute;
use FINDOLOGIC\Export\Data\Keyword;
class DokuwikiXMLExport
{
/**
* Default value for a price. DokuWiki pages do not have a price and this is just a placeholder.
* FINDOLOGIC requires the price attribute, so this is the reason why it is exported.
*/
const PRICE_PLACEHOLDER = 0.0;
/**
* This value is needed to tell FINDOLOGIC this is a category.
*/
const CATEGORY_KEY = 'cat';
/**
* Delimiter for category depth.
*/
const CATEGORY_DELIMITER = '_';
/**
* In the DokuWiki, the Keyword seperator is a space.
* To be able to have tags for multiple words, add an '_'
*/
const KEYWORD_SPACE = '_';
/**
* DokuWiki saves keywords/tags in the subject of the page.
* The subject is an array with all keywords/tags from the page in it.
*/
const KEYWORD_KEY = 'subject';
/**
* The default usergroup is an empty string.
*/
const DEFAULT_USERGROUP = '';
/**
* @var array $conf DokuWiki configuration.
*/
protected $conf;
/**
* @var array $pages All pageIds.
*/
protected $pages;
/**
* DokuwikiXMLExport constructor.
* @param $conf array DokuWiki configuration array.
*/
public function __construct($conf)
{
$this->conf = $conf;
$this->pages = $this->getPageIds();
}
/**
* Returns all pageIds, excluding those who were set in the configuration.
*
* @return array pageIds.
*/
private function getPageIds()
{
$indexer = new Doku_Indexer();
$pagesAndDeletedPages = $indexer->getPages();
// Get all pages that do have a description and a title set
$pagesAndDeletedPages = array_filter($pagesAndDeletedPages, function ($page, $k) {
$pageDescriptionIsNotEmpty = !empty(p_get_metadata($page)['description']);
$pageTitleIsNotEmpty = !empty(p_get_metadata($page)['title']);
return $pageDescriptionIsNotEmpty && $pageTitleIsNotEmpty;
}, ARRAY_FILTER_USE_BOTH);
$excludedPages = $this->splitConfigToArray($this->conf['plugin']['findologicxmlexport']['excludePages']);
$ids = array_diff($pagesAndDeletedPages, $excludedPages);
return array_values($ids);
}
/**
* Formats Config string to an array.
*
* @param string $config Excluded pages in a string.
* @return array Returns the pages that should be excluded as array.
*/
private function splitConfigToArray($config)
{
return preg_split('/\s*,\s*/', $config);
}
/**
* Generate the entire XML Export based on the DokuWiki metadata.
*
* @param $start integer Determines the first item (offset) to be exported.
* @param $submittedCount integer Determines the interval size / number of items to be exported.
* @return string Returns the XML as string.
*/
public function generateXMLExport($start, $submittedCount)
{
$exporter = Exporter::create(Exporter::TYPE_XML, $submittedCount);
$total = count($this->pages);
$count = min($total, $submittedCount); // The count can't be higher then the total number of pages.
$this->pages = array_slice($this->pages, $start, $count);
$items = [];
foreach ($this->pages as $key => $page) {
$item = $exporter->createItem($start + $key);
$this->fillDataToItem($page, $item);
$items[] = $item;
}
return $exporter->serializeItems($items, $start, $submittedCount, $total);
}
/**
* Gets the Name of the current page.
*
* @param $pageId string Id of the DokuWiki page.
* @return string Returns the Name/Title of the page.
*/
private function getName($pageId)
{
$metadata = p_get_metadata($pageId);
return $metadata['title'];
}
/**
* Gets the Summary of the current page.
*
* @param $pageId string Id of the DokuWiki page.
* @return string Returns the Summary of the page.
*/
private function getSummary($pageId)
{
$metadata = p_get_metadata($pageId);
return $metadata['description']['abstract'];
}
/**
* Gets the Description of the current page.
*
* @param $pageId string Id of the DokuWiki page.
* @return string Returns the Description of the page.
*/
private function getDescription($pageId)
{
return rawWiki($pageId);
}
/**
* Gets the Url of the current page.
*
* @param $pageId string Id of the DokuWiki page.
* @return string Returns the Url of the page.
*/
private function getUrl($pageId)
{
$url = wl($pageId, '', true);
return $url;
}
/**
* Gets the DateTime of the current page.
*
* @param $pageId string Id of the DokuWiki page.
* @return DateTime Returns the Date formatted in ATOM DateTime of the page.
*/
private function getDateAdded($pageId)
{
$metadata = p_get_metadata($pageId);
$date = new DateTime();
$date->setTimestamp($metadata['date']['created']);
return $date;
}
/**
* Returns the id of a given page.
* Note: This function is trivial, but is used for legibility reasons.
*
* @param $pageId string Id of the DokuWiki page.
* @return string Returns the pageId.
*/
private function getPageId($pageId)
{
return $pageId;
}
/**
* Gets the Category Attribute of the current page.
*
* Formats DokuWiki IDs to categories (FINDOLOGIC scheme).
*
* Examples:
*
* "customer_account:synonyms" -> "customer account:synonyms" -> "customer account_synonyms" -> "Customer account_Synonyms"
* "plugin:dokuwiki-plugin-findologic-xml-export" -> "plugin:dokuwiki-plugin-findologic-xml-export" -> "plugin_findologicxmlexport" -> "Plugin_Findologicxmlexport"
* "wiki:syntax" -> "wiki:syntax" -> "wiki_syntax" -> "Wiki_Syntax"
*
* @param $pageId string Id of the DokuWiki page.
* @return array Returns the category attribute based on the export scheme.
*/
private function getAttributesCategory($pageId)
{
$attribute = str_replace(self::CATEGORY_DELIMITER, ' ', $pageId); // Replace underscores with spaces
$attribute = str_replace(':', self::CATEGORY_DELIMITER, $attribute); // Replace colons with underscores
$attribute = ucwords($attribute, self::CATEGORY_DELIMITER); // Capitalize each category
return (array($attribute));
}
/**
* Gets the Keywords of the current page.
*
* @param $pageId string Id of the DokuWiki page.
* @return array Returns all Keywords for the given page.
*/
private function getKeywords($pageId)
{
$metadata = p_get_metadata($pageId);
$allKeywords = $metadata[self::KEYWORD_KEY];
if (empty($allKeywords)) {
return [];
}
$keywords = [];
foreach ($allKeywords as $key => $keyword) {
// Keywords with multiple words are separated by an underscore.
// To export them correctly, those underscores will be replaced by spaces.
$keyword = str_replace(self::KEYWORD_SPACE, ' ', $keyword);
$keywords[] = new Keyword($keyword);
}
$keywords = [self::DEFAULT_USERGROUP => $keywords];
return $keywords;
}
/**
* @param $page int Page number.
* @param $item FINDOLOGIC\Export\Data\Item Item without data.
*
* @return FINDOLOGIC\Export\Data\Item Item with filled data.
*/
public function fillDataToItem($page, $item)
{
$item->addName($this->getName($page));
$item->addSummary($this->getSummary($page));
$item->addDescription($this->getDescription($page));
$item->addPrice(self::PRICE_PLACEHOLDER);
$item->addUrl($this->getUrl($page));
$item->addDateAdded($this->getDateAdded($page));
$item->addOrdernumber(new Ordernumber($this->getPageId($page)));
$keywordsData = $this->getKeywords($page);
$item->setAllKeywords($keywordsData);
$attributeCategory = new Attribute(self::CATEGORY_KEY, $this->getAttributesCategory($page));
$item->addAttribute($attributeCategory);
return $item;
}
}