From 4e683786d866dd908e14ebb238efa2547efcc8d6 Mon Sep 17 00:00:00 2001
From: John Turner <john@turnerscience.us>
Date: Fri, 6 Sep 2024 12:25:36 -0400
Subject: [PATCH] reworking to remove scripts from this repo and move to
 NIGMS-Sandbox

---
 .github/workflows/check-links.yaml   |  31 +----
 .github/workflows/check_links.py     | 168 ---------------------------
 .github/workflows/lint.py            |  53 ---------
 .github/workflows/notebook-lint.yaml |  37 +-----
 .github/workflows/requirements.txt   |   1 -
 5 files changed, 9 insertions(+), 281 deletions(-)
 delete mode 100644 .github/workflows/check_links.py
 delete mode 100644 .github/workflows/lint.py
 delete mode 100644 .github/workflows/requirements.txt
diff --git a/.github/workflows/check-links.yaml b/.github/workflows/check-links.yaml
index 20ba4b8..166bb1c 100644
--- a/.github/workflows/check-links.yaml
+++ b/.github/workflows/check-links.yaml
@@ -1,34 +1,13 @@
 name: 'Check Links'
 on:
-  workflow_call:
-    inputs:
-      directory:
-        required: false
-        type: string
-      repo_link_ignore_list:
-        required: true
-        type: string
-    secrets:
-      PAT:
-        required: false
   push:
   pull_request:
+  workflow_dispatch:
+
 
 jobs:
   link_check:
     name: 'Link Check'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Link Check
-        run: |
-          echo ${{jobs.link_check.uses}}
-
-      - name: Link Check
-        run: |
-          python3 ${{github.action_path}}/check_links.py
-        env:
-          LINK_IGNORE_LIST: https://www.sciencedirect.com,https://portlandpress.com
-          PAT: ${{ secrets.PAT }}
\ No newline at end of file
+    uses: NIGMS/NIGMS-Sandbox/.github/workflows/check-links.yaml@main
+    with:
+      repo_link_ignore_list: ""
\ No newline at end of file
diff --git a/.github/workflows/check_links.py b/.github/workflows/check_links.py
deleted file mode 100644
index 967fe7f..0000000
--- a/.github/workflows/check_links.py
+++ /dev/null
@@ -1,168 +0,0 @@
-import http.client
-import urllib.request, urllib.error
-import os
-import sys
-import re
-
-
-
-# set some default variables
-remove_characters = ['**', '\\n']
-
-# text that tends to be at the end of the url that we need truncate everything past them
-end_characters = [')',",","'",'`',"\"",'</a>','</div>',"\\",">","]"]
-
-big_regex = re.compile('|'.join(map(re.escape, remove_characters)))
-
-# if there are any URLs to ignore add here
-link_ignore_list = []
-link_ignore_list_env = os.getenv("LINK_IGNORE_LIST")
-if link_ignore_list_env and len(link_ignore_list_env) > 0:
-    link_ignore_list = link_ignore_list_env.split(',')
-
-# Add any repo specific ignores
-link_ignore_list_env_2 = os.getenv("inputs.repo_link_ignore_list")
-if link_ignore_list_env_2 and len(link_ignore_list_env_2) > 0:
-    link_ignore_list.extend(link_ignore_list_env.split(','))
-
-print_valid = os.getenv("print_valid_links") is not None
-
-# If we are given a directory then use it, otherwise assume path is current directory
-path = "."
-if len(sys.argv) >1  and os.path.exists(sys.argv[1]):
-    path = sys.argv[1]
-
-# directory environment overrides the system arguments and default.
-directory_env = os.getenv("inputs.directory")
-if directory_env and len(directory_env) > 0:
-    path = directory_env
-
-pat_env = os.getenv("INPUT_PAT")
-if directory_env and len(directory_env) > 0:
-    path = directory_env
-
-# list which stores all links to check
-links_to_check = []
-link_file_map = {}
-# Get the response code of the url to see if it exists
-def getResponseCode(url):
-    content = None
-    try:
-        req = urllib.request.Request(url,
-                                     headers={'User-Agent': 'Mozilla/5.0'})
-        conn = urllib.request.urlopen(req)
-        # Only get HTML if we have a potential anchor link
-        if "#" in url and "pdf" not in url:
-            content = conn.read().decode("utf-8")
-    except urllib.error.HTTPError as e:
-        return [e.code, content]
-    except urllib.error.URLError as e:
-        return [404, content]
-    except http.client.InvalidURL:
-        return [200, content]
-    return [conn.getcode(), content]
-
-def clean_link(link):
-    if link.endswith("."):
-        link = link[:link.rfind(".")]
-    if link.endswith("'"):
-        link = link[:link.rfind("'")]
-    if link.endswith("\""):
-        link = link[:link.rfind("\"")]
-    link_stripped = big_regex.sub("", link.strip())
-    for end_c in end_characters:
-        end_index = link_stripped.find(end_c)
-        if end_index != -1:
-            link_stripped = link_stripped[:end_index]
-    return link_stripped
-
-def add_link(loc,link):
-    # this is a command being ran so difficult to validate in this script, skip it
-    if '$(uname' in link:
-        return False
-
-    # get just from the http portion if there was more in from of the string we grabbed
-    link = link[link.find("http"):]
-
-    # if there is a period at the end, truncate to that period. Other periods may be valid
-    # strip various characters that may be in the string
-    link_stripped = clean_link(link)
-    while link_stripped != link:
-        link = link_stripped
-        link_stripped = clean_link(link)
-
-    # add link to be checked
-    links_to_check.append(link_stripped)
-
-    # store where the link is so we can fix it
-    link_file_map[link_stripped] = loc
-def check_link(link):
-    # try and get the url, if its 404 or 500 then its invalid, let us know and trigger the error flag
-    code = getResponseCode(link)
-    loc =link_file_map[link]
-    if code[0] in [404, 403, 500]:
-
-        # If the link failed, but we are ignoring it then just mention that
-        for ignored_link in link_ignore_list:
-            if ignored_link in link:
-                print(
-                    loc + ", " + link + ", Ignored")
-                return False
-
-        # print(file+" Code:"+str(code[0])+" Line "+str(line_num)+"("+str(char)+"):"+item_stripped)
-        print(
-            loc + ", " + link + ", Failed")
-        return True
-
-    # check for missing anchors
-    elif "#" in link and \
-        code[1] is not None \
-        and 'href=\"' + link[link.find("#"):] + '\"' not in \
-        code[1]:
-        print(
-            loc + ", " + link + ", Failed - Anchor")
-    # print(file + " Missing Anchor Line " + str(
-    #     line_num) + "(" + str(
-    #     char) + "):" + item_stripped)
-    elif print_valid:
-        print(
-            loc + ", " + link + ", Valid")
-    return True
-
-
-if __name__ == "__main__":
-    err = 0
-    print("Directory is "+path)
-    # Loop through all files in path
-    for root, dirs, files in os.walk(path):
-        for file in files:
-            #  only read file that match template ( txt, md or python notebook)
-            if file.endswith(".md") or file.endswith(".txt") or file.endswith(
-                ".ipynb"):
-
-                # get content and separate into lines and then separate by spaces
-                raw_content = open(os.path.join(root, file), "r").read()
-                content = raw_content.split("\n")
-                content = [x.split(" ") for x in content]
-                loc = os.path.join(root, file)
-                # have an incrementer for line number later export
-                for line in content:
-                    for item in line:
-
-                        if "https://" in item or "http://" in item:
-                            if "](" in item:
-                                add_link(loc,item[item.find("]"):])
-                                # if we get any error  then add it
-                                if item[item.find("("):] == item[item.find("]"):]:
-                                    continue
-                                add_link(loc,item[item.find("("):])
-                            else:
-                                add_link(loc,item)
-
-    for link in set(links_to_check):
-        # if we get any error  then add to err variable
-        err = check_link(link) + err
-    # if the error is > 1 then set it to 1 to error as 1
-    if err > 1:
-        err = 1
-    exit(err)
diff --git a/.github/workflows/lint.py b/.github/workflows/lint.py
deleted file mode 100644
index 5808266..0000000
--- a/.github/workflows/lint.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import os
-import shutil
-import nbformat
-from nbformat.v4 import new_notebook
-
-def clean_notebook(file_path):
-    with open(file_path, 'r', encoding='utf-8') as f:
-        notebook = nbformat.read(f, as_version=4)
-
-    # Clean cells
-    for cell in notebook.cells:
-        if 'outputs' in cell:
-            cell['outputs'] = []
-        if 'execution_count' in cell:
-            cell['execution_count'] = None
-        if 'metadata' in cell:
-            cell['metadata'] = {}
-
-    # Clean notebook metadata
-    if 'metadata' in notebook:
-        notebook['metadata'] = {}
-
-    with open(file_path, 'w', encoding='utf-8') as f:
-        nbformat.write(notebook, f)
-
-def delete_checkpoints_dirs(root_dir):
-    # Walk through the directory tree
-    for dirpath, dirnames, filenames in os.walk(root_dir):
-        for dirname in dirnames:
-            # Check if the directory name is 'checkpoints'
-            if dirname == '.ipynb_checkpoints':
-                # Construct the full path to the directory
-                dir_to_delete = os.path.join(dirpath, dirname)
-                # Delete the directory
-                shutil.rmtree(dir_to_delete)
-                print(f'Deleted {dir_to_delete}')
-                print('Consider adding .ipynb_checkpoints to your .gitignore file!')
-
-
-if __name__ == "__main__":
-    # Change this to the directory containing your notebooks
-    notebook_dir = '../../'
-
-    for root, dirs, files in os.walk(notebook_dir):
-        for file in files:
-            if file.endswith('.ipynb'):
-                file_path = os.path.join(root, file)
-                clean_notebook(file_path)
-                print(f'Cleaned {file_path}')
-
-    # Delete all 'checkpoints' directories
-    delete_checkpoints_dirs(notebook_dir)
-
diff --git a/.github/workflows/notebook-lint.yaml b/.github/workflows/notebook-lint.yaml
index 4af3fd9..2038a72 100644
--- a/.github/workflows/notebook-lint.yaml
+++ b/.github/workflows/notebook-lint.yaml
@@ -1,11 +1,7 @@
 name: 'Lint Notebook'
 on:
-  workflow_call:
-    inputs:
-      directory:
-        required: false
-        type: string
   push:
+  workflow_dispatch:
 permissions:
   contents: write
   id-token: write
@@ -13,31 +9,6 @@ permissions:
 jobs:
   lint:
     name: 'Linting'
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Install requirements.txt
-        run: |
-          python3 -m pip install --upgrade pip
-          pip3 install nbformat
-
-      - name: Notebook Linting
-        working-directory: .github/workflows
-        run: |
-          python3 lint.py
-
-      - name: Commit changes
-        uses: EndBug/add-and-commit@v9
-        with:
-          author_name: github-action
-          author_email: cbiit-github-action@github.com
-          message: 'Github Action: Refresh stats'
+    uses: NIGMS/NIGMS-Sandbox/.github/workflows/notebook-lint.yaml@main
+    with:
+      directory: .
diff --git a/.github/workflows/requirements.txt b/.github/workflows/requirements.txt
deleted file mode 100644
index d0537a8..0000000
--- a/.github/workflows/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-nbformat==5.10.4