Skip to content

Commit

Permalink
[refactor]: optimize scan, put TODO note
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Balashov <[email protected]>
  • Loading branch information
0x009922 committed Nov 22, 2023
1 parent c01f77d commit 21e0050
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions etc/validate-links.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,22 @@ async function findFiles(root: string): Promise<string[]> {
return globby(path.join(root, '**/*.html'))
}

const ANCHORS_QUERY = cssSelect.compile('main [id]')

const LINKS_QUERY = cssSelect.compile('main a[href]')

/**
* TODO: Here we only look into `<main>`. There are also links in `<aside>` and `<header>`, but unlike `<main>`, they
* repeat from page to page. Current scan-validate logic doesn't handle such repetition and the report will
* look cumbersome.
*/
function scanLinksAndAnchorsInHTML(html: string): {
links: string[]
anchors: Set<string>
} {
const doc = htmlparser.parseDocument(html)

const links = cssSelect.selectAll('main a[href]', doc.children).map((elem) => {
const links = cssSelect.selectAll(LINKS_QUERY, doc.children).map((elem) => {
return match(elem)
.with({ name: 'a', attribs: { href: P.select(P.string) } }, (href) => href)
.otherwise(() => {
Expand All @@ -171,7 +180,7 @@ function scanLinksAndAnchorsInHTML(html: string): {
})

const anchors = new Set(
cssSelect.selectAll('main [id]', doc.children).map((elem) => {
cssSelect.selectAll(ANCHORS_QUERY, doc.children).map((elem) => {
return match(elem)
.with({ attribs: { id: P.select(P.string) } }, (id) => id)
.otherwise(() => {
Expand Down

0 comments on commit 21e0050

Please sign in to comment.