-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathcheck_parser.js
133 lines (107 loc) · 3.35 KB
/
check_parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// cd /d D:\USB\cgi-bin\program\wiki && node check_parser.js
/*
test if parser working properly
2015/10/2 20:19:48 see [[w:zh:WikiProject:错误检查]], https://checkwiki.toolforge.org/checkwiki.cgi?project=enwiki&view=high
2015/10/11 17:54:33 初版試營運
*/
'use strict';
globalThis.no_task_date_warning = true;
// Load CeJS library and modules.
require('./wiki loader.js');
// ---------------------------------------------------------------------//
// 先創建出/準備好本任務獨有的目錄,以便後續將所有的衍生檔案,如記錄檔、cache 等置放此目錄下。
prepare_directory(base_directory);
if (false) {
var t = "{{Tl|a<ref>[http://a.a.a b|c {{!}} {{CURRENTHOUR}}]</ref>}}",
p = CeL.wiki.parser(t).parse(), ts = p.toString();
CeL.log(p);
CeL.log(ts + '\n' + (t === ts ? 'OK' : 'NG!!!'));
throw 0;
}
var skip_exists = false,
//
check_OK = 0, check_error = 0;
function check_page(page_data) {
if (!CeL.wiki.content_of.page_exists(page_data))
return;
var title = page_data.title,
// 正規化檔名。
file_name_prefix = base_directory + 'page/' + title.replace(/\//g, '_');
if (skip_exists)
try {
if (!node_fs.statSync(file_name_prefix + '.json').isFile())
return;
} catch (e) {
return;
}
file_name_prefix += '_';
var content = CeL.wiki.content_of(page_data),
//
wiki_page = CeL.wiki.parser(content).parse({
end_mark : '»',
postfix : function(wikitext) {
if (false)
node_fs.writeFile(file_name_prefix + 'text.txt',
//
wikitext.replace(/\0/g, '«'));
}
}),
//
parsed_String = wiki_page.toString();
if (false) {
wiki_page.each_text(function(token) {
if (token = token.trim())
CeL.log(token);
});
CeL.log('-'.repeat(70) + '' + parsed_String);
}
if (content === parsed_String) {
CeL.log('[[' + title + ']]: OK');
check_OK++;
} else {
CeL.warn('[[' + title + ']]: different contents!');
node_fs.writeFile(file_name_prefix + 'original.txt', content);
node_fs.writeFile(file_name_prefix + 'parsed.txt', parsed_String);
if (check_error++ > 9)
throw new Error('check_page: Too many errors');
}
}
function process_page(title) {
CeL.wiki.cache([ {
type : 'page',
list : title,
prefix : base_directory,
operator : check_page
} ]);
}
var checkwiki_api_URL = 'https://tools.wmflabs.org/checkwiki/cgi-bin/checkwiki.cgi?project='
+ 'zhwiki' + '&view=bots&offset=0&id=';
var node_fs = require('fs');
function check_index(index) {
CeL.get_URL_cache(checkwiki_api_URL + index, function(page_list) {
page_list = JSON.parse(page_list);
// CeL.set_debug(3);
if (page_list.length === 0)
return;
// process pages
// page_list = 'Microsoft Surface Pro|本-古里安国际机场|麒麟之翼'.split('|');
page_list.forEach(process_page);
}, {
file_name : 'WPCHECK/list_' + index + '.json',
postprocessor : function(data) {
data = data.toString();
if (data.startsWith('<')) {
// 僅取得 <pre> 間的 data。
data = data.between('<pre>', '</pre>');
}
data = data.trim().split(/\r?\n/);
return JSON.stringify(data);
}
});
}
var lists = node_fs.readdirSync(base_directory);
lists.forEach(function(filename) {
var matched = filename.match(/list_(\d+)\.json/);
if (matched)
check_index(matched[1]);
});