From 4ad12f204ad0b85580fc32137c647baaff044e95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Krassowski?= <5832902+krassowski@users.noreply.github.com> Date: Wed, 17 Jan 2024 21:58:35 +0000 Subject: [PATCH] Merge pull request from GHSA-4qhp-652w-c22x * Add auth decorators, add traversal guard * Fix mocks resolving most test failures; `test_listeners` still fails not sure how to fix it * Address review comments * add tests for (un)authn'd REST and WebSocket handlers * Restore old import for 1.x compat, remove a log * handle advertised jupyter-server 1.x version * Lint (isort any mypy) * More tests for paths --------- Co-authored-by: Nicholas Bollweg --- .../jupyter_lsp/jupyter_lsp/handlers.py | 61 ++++++++- .../jupyter_lsp/jupyter_lsp/paths.py | 14 ++- .../jupyter_lsp/serverextension.py | 10 +- .../jupyter_lsp/jupyter_lsp/tests/conftest.py | 5 +- .../jupyter_lsp/tests/test_auth.py | 117 ++++++++++++++++++ .../jupyter_lsp/tests/test_paths.py | 41 +++++- .../tests/test_virtual_documents_shadow.py | 17 ++- .../jupyter_lsp/virtual_documents_shadow.py | 7 +- requirements/utest.txt | 1 - scripts/bump_versions.py | 1 - 10 files changed, 260 insertions(+), 14 deletions(-) create mode 100644 python_packages/jupyter_lsp/jupyter_lsp/tests/test_auth.py diff --git a/python_packages/jupyter_lsp/jupyter_lsp/handlers.py b/python_packages/jupyter_lsp/jupyter_lsp/handlers.py index 0ff6ff0b8..75f2eb4cf 100644 --- a/python_packages/jupyter_lsp/jupyter_lsp/handlers.py +++ b/python_packages/jupyter_lsp/jupyter_lsp/handlers.py @@ -2,14 +2,32 @@ """ from typing import Optional, Text +from jupyter_core.utils import ensure_async from jupyter_server.base.handlers import APIHandler -from jupyter_server.base.zmqhandlers import WebSocketHandler, WebSocketMixin from jupyter_server.utils import url_path_join as ujoin +from tornado import web +from tornado.websocket import WebSocketHandler + +try: + from jupyter_server.auth.decorator import authorized +except ImportError: + + def authorized(method): # type: ignore + """A no-op fallback for `jupyter_server 1.x`""" + return method + + +try: + from jupyter_server.base.websocket import WebSocketMixin +except ImportError: + from jupyter_server.base.zmqhandlers import WebSocketMixin from .manager import LanguageServerManager from .schema import SERVERS_RESPONSE from .specs.utils import censored_spec +AUTH_RESOURCE = "lsp" + class BaseHandler(APIHandler): manager = None # type: LanguageServerManager @@ -21,10 +39,43 @@ def initialize(self, manager: LanguageServerManager): class LanguageServerWebSocketHandler( # type: ignore WebSocketMixin, WebSocketHandler, BaseHandler ): - """Setup tornado websocket to route to language server sessions""" + """Setup tornado websocket to route to language server sessions. + + The logic of `get` and `pre_get` methods is derived from jupyter-server ws handlers, + and should be kept in sync to follow best practice established by upstream; see: + https://github.com/jupyter-server/jupyter_server/blob/v2.12.5/jupyter_server/services/kernels/websocket.py#L36 + """ + + auth_resource = AUTH_RESOURCE language_server: Optional[Text] = None + async def pre_get(self): + """Handle a pre_get.""" + # authenticate first + # authenticate the request before opening the websocket + user = self.current_user + if user is None: + self.log.warning("Couldn't authenticate WebSocket connection") + raise web.HTTPError(403) + + if not hasattr(self, "authorizer"): + return + + # authorize the user. + is_authorized = await ensure_async( + self.authorizer.is_authorized(self, user, "execute", AUTH_RESOURCE) + ) + if not is_authorized: + raise web.HTTPError(403) + + async def get(self, *args, **kwargs): + """Get an event socket.""" + await self.pre_get() + res = super().get(*args, **kwargs) + if res is not None: + await res + async def open(self, language_server): await self.manager.ready() self.language_server = language_server @@ -47,11 +98,11 @@ class LanguageServersHandler(BaseHandler): Response should conform to schema in schema/servers.schema.json """ + auth_resource = AUTH_RESOURCE validator = SERVERS_RESPONSE - def initialize(self, *args, **kwargs): - super().initialize(*args, **kwargs) - + @web.authenticated + @authorized async def get(self): """finish with the JSON representations of the sessions""" await self.manager.ready() diff --git a/python_packages/jupyter_lsp/jupyter_lsp/paths.py b/python_packages/jupyter_lsp/jupyter_lsp/paths.py index 5c9cb43e6..625226dc1 100644 --- a/python_packages/jupyter_lsp/jupyter_lsp/paths.py +++ b/python_packages/jupyter_lsp/jupyter_lsp/paths.py @@ -1,6 +1,7 @@ import os -import pathlib import re +from pathlib import Path +from typing import Union from urllib.parse import unquote, urlparse RE_PATH_ANCHOR = r"^file://([^/]+|/[A-Z]:)" @@ -12,7 +13,7 @@ def normalized_uri(root_dir): Special care must be taken around windows paths: the canonical form of windows drives and UNC paths is lower case """ - root_uri = pathlib.Path(root_dir).expanduser().resolve().as_uri() + root_uri = Path(root_dir).expanduser().resolve().as_uri() root_uri = re.sub( RE_PATH_ANCHOR, lambda m: "file://{}".format(m.group(1).lower()), root_uri ) @@ -33,3 +34,12 @@ def file_uri_to_path(file_uri): else: result = file_uri_path_unquoted # pragma: no cover return result + + +def is_relative(root: Union[str, Path], path: Union[str, Path]) -> bool: + """Return if path is relative to root""" + try: + Path(path).resolve().relative_to(Path(root).resolve()) + return True + except ValueError: + return False diff --git a/python_packages/jupyter_lsp/jupyter_lsp/serverextension.py b/python_packages/jupyter_lsp/jupyter_lsp/serverextension.py index 2640e429f..194e9efec 100644 --- a/python_packages/jupyter_lsp/jupyter_lsp/serverextension.py +++ b/python_packages/jupyter_lsp/jupyter_lsp/serverextension.py @@ -4,6 +4,7 @@ from pathlib import Path import traitlets +from tornado import ioloop from .handlers import add_handlers from .manager import LanguageServerManager @@ -73,4 +74,11 @@ def load_jupyter_server_extension(nbapp): page_config.update(rootUri=root_uri, virtualDocumentsUri=virtual_documents_uri) add_handlers(nbapp) - nbapp.io_loop.call_later(0, initialize, nbapp, virtual_documents_uri) + + if hasattr(nbapp, "io_loop"): + io_loop = nbapp.io_loop + else: + # handle jupyter_server 1.x + io_loop = ioloop.IOLoop.current() + + io_loop.call_later(0, initialize, nbapp, virtual_documents_uri) diff --git a/python_packages/jupyter_lsp/jupyter_lsp/tests/conftest.py b/python_packages/jupyter_lsp/jupyter_lsp/tests/conftest.py index fd8e0b93a..71109a947 100644 --- a/python_packages/jupyter_lsp/jupyter_lsp/tests/conftest.py +++ b/python_packages/jupyter_lsp/jupyter_lsp/tests/conftest.py @@ -5,6 +5,7 @@ from jupyter_server.serverapp import ServerApp from pytest import fixture +from tornado.httpserver import HTTPRequest from tornado.httputil import HTTPServerRequest from tornado.queues import Queue from tornado.web import Application @@ -141,9 +142,11 @@ def send_ping(self): class MockHandler(LanguageServersHandler): _payload = None + _jupyter_current_user = "foo" # type:ignore[assignment] def __init__(self): - pass + self.request = HTTPRequest("GET") + self.application = Application() def finish(self, payload): self._payload = payload diff --git a/python_packages/jupyter_lsp/jupyter_lsp/tests/test_auth.py b/python_packages/jupyter_lsp/jupyter_lsp/tests/test_auth.py new file mode 100644 index 000000000..0c0b2e885 --- /dev/null +++ b/python_packages/jupyter_lsp/jupyter_lsp/tests/test_auth.py @@ -0,0 +1,117 @@ +"""Integration tests of authorization running under jupyter-server.""" +import json +import os +import socket +import subprocess +import time +import uuid +from typing import Generator, Tuple +from urllib.error import HTTPError, URLError +from urllib.request import urlopen + +import pytest + +from .conftest import KNOWN_SERVERS + +LOCALHOST = "127.0.0.1" +REST_ROUTES = ["/lsp/status"] +WS_ROUTES = [f"/lsp/ws/{ls}" for ls in KNOWN_SERVERS] + + +@pytest.mark.parametrize("route", REST_ROUTES) +def test_auth_rest(route: str, a_server_url_and_token: Tuple[str, str]) -> None: + """Verify a REST route only provides access to an authenticated user.""" + base_url, token = a_server_url_and_token + + verify_response(base_url, route) + + url = f"{base_url}{route}" + + with urlopen(f"{url}?token={token}") as response: + raw_body = response.read().decode("utf-8") + + decode_error = None + + try: + json.loads(raw_body) + except json.decoder.JSONDecodeError as err: + decode_error = err + assert not decode_error, f"the response for {url} was not JSON" + + +@pytest.mark.parametrize("route", WS_ROUTES) +def test_auth_websocket(route: str, a_server_url_and_token: Tuple[str, str]) -> None: + """Verify a WebSocket does not provide access to an unauthenticated user.""" + verify_response(a_server_url_and_token[0], route) + + +@pytest.fixture(scope="module") +def a_server_url_and_token( + tmp_path_factory: pytest.TempPathFactory, +) -> Generator[Tuple[str, str], None, None]: + """Start a temporary, isolated jupyter server.""" + token = str(uuid.uuid4()) + port = get_unused_port() + + root_dir = tmp_path_factory.mktemp("root_dir") + home = tmp_path_factory.mktemp("home") + server_conf = home / "etc/jupyter/jupyter_config.json" + + server_conf.parent.mkdir(parents=True) + extensions = {"jupyter_lsp": True, "jupyterlab": False, "nbclassic": False} + app = {"jpserver_extensions": extensions, "token": token} + config_data = {"ServerApp": app, "IdentityProvider": {"token": token}} + + server_conf.write_text(json.dumps(config_data), encoding="utf-8") + args = ["jupyter-server", f"--port={port}", "--no-browser"] + env = dict(os.environ) + env.update( + HOME=str(home), + USERPROFILE=str(home), + JUPYTER_CONFIG_DIR=str(server_conf.parent), + ) + proc = subprocess.Popen(args, cwd=str(root_dir), env=env, stdin=subprocess.PIPE) + url = f"http://{LOCALHOST}:{port}" + retries = 20 + while retries: + time.sleep(1) + try: + urlopen(f"{url}/favicon.ico") + break + except URLError: + print(f"[{retries} / 20] ...", flush=True) + retries -= 1 + continue + yield url, token + proc.terminate() + proc.communicate(b"y\n") + proc.wait() + assert proc.returncode is not None, "jupyter-server probably still running" + + +def get_unused_port(): + """Get an unused port by trying to listen to any random port. + + Probably could introduce race conditions if inside a tight loop. + """ + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.bind((LOCALHOST, 0)) + sock.listen(1) + port = sock.getsockname()[1] + sock.close() + return port + + +def verify_response(base_url: str, route: str, expect: int = 403): + """Verify that a response returns the expected error.""" + error = None + body = None + url = f"{base_url}{route}" + try: + with urlopen(url) as res: + body = res.read() + except HTTPError as err: + error = err + assert error, f"no HTTP error for {url}: {body}" + http_code = error.getcode() + assert http_code == expect, f"{url} HTTP code was unexpected: {body}" diff --git a/python_packages/jupyter_lsp/jupyter_lsp/tests/test_paths.py b/python_packages/jupyter_lsp/jupyter_lsp/tests/test_paths.py index 3360da4e0..6c91d16d3 100644 --- a/python_packages/jupyter_lsp/jupyter_lsp/tests/test_paths.py +++ b/python_packages/jupyter_lsp/jupyter_lsp/tests/test_paths.py @@ -4,7 +4,7 @@ import pytest -from ..paths import file_uri_to_path, normalized_uri +from ..paths import file_uri_to_path, is_relative, normalized_uri WIN = platform.system() == "Windows" HOME = pathlib.Path("~").expanduser() @@ -17,6 +17,45 @@ def test_normalize_posix_path_home(root_dir, expected_root_uri): # pragma: no c assert normalized_uri(root_dir) == expected_root_uri +@pytest.mark.skipif(WIN, reason="can't test POSIX paths on Windows") +@pytest.mark.parametrize( + "root, path", + [["~", "~/a"], ["~", "~/a/../b/"], ["/", "/"], ["/a", "/a/b"], ["/a", "/a/b/../c"]], +) +def test_is_relative_ok(root, path): + assert is_relative(root, path) + + +@pytest.mark.skipif(WIN, reason="can't test POSIX paths on Windows") +@pytest.mark.parametrize( + "root, path", + [ + ["~", "~/.."], + ["~", "/"], + ["/a", "/"], + ["/a/b", "/a"], + ["/a/b", "/a/b/.."], + ["/a", "/a/../b"], + ["/a", "a//"], + ], +) +def test_is_relative_not_ok(root, path): + assert not is_relative(root, path) + + +@pytest.mark.skipif(not WIN, reason="can't test Windows paths on POSIX") +@pytest.mark.parametrize( + "root, path", + [ + ["c:\\Users\\user1", "c:\\Users\\"], + ["c:\\Users\\user1", "d:\\"], + ["c:\\Users", "c:\\Users\\.."], + ], +) +def test_is_relative_not_ok_win(root, path): + assert not is_relative(root, path) + + @pytest.mark.skipif(PY35, reason="can't test non-existent paths on py35") @pytest.mark.skipif(WIN, reason="can't test POSIX paths on Windows") @pytest.mark.parametrize( diff --git a/python_packages/jupyter_lsp/jupyter_lsp/tests/test_virtual_documents_shadow.py b/python_packages/jupyter_lsp/jupyter_lsp/tests/test_virtual_documents_shadow.py index 5cce1ac6b..0d4c4b53f 100644 --- a/python_packages/jupyter_lsp/jupyter_lsp/tests/test_virtual_documents_shadow.py +++ b/python_packages/jupyter_lsp/jupyter_lsp/tests/test_virtual_documents_shadow.py @@ -210,6 +210,21 @@ def run_shadow(message): ) +@pytest.mark.asyncio +async def test_shadow_traversal(shadow_path, manager): + file_beyond_shadow_root_uri = (Path(shadow_path) / ".." / "test.py").as_uri() + + shadow = setup_shadow_filesystem(Path(shadow_path).as_uri()) + + def run_shadow(message): + return shadow("client", message, "python-lsp-server", manager) + + with pytest.raises( + ShadowFilesystemError, match="is not relative to shadow filesystem root" + ): + await run_shadow(did_open(file_beyond_shadow_root_uri, "content")) + + @pytest.fixture def forbidden_shadow_path(tmpdir): path = Path(tmpdir) / "no_permission_dir" @@ -238,7 +253,7 @@ def send_change(): # no message should be emitted during the first two attempts assert caplog.text == "" - # a wargning should be emitted on third failure + # a warning should be emitted on third failure with caplog.at_level(logging.WARNING): assert await send_change() is None assert "initialization of shadow filesystem failed three times" in caplog.text diff --git a/python_packages/jupyter_lsp/jupyter_lsp/virtual_documents_shadow.py b/python_packages/jupyter_lsp/jupyter_lsp/virtual_documents_shadow.py index abdf22ae5..b78a3130d 100644 --- a/python_packages/jupyter_lsp/jupyter_lsp/virtual_documents_shadow.py +++ b/python_packages/jupyter_lsp/jupyter_lsp/virtual_documents_shadow.py @@ -8,7 +8,7 @@ from tornado.gen import convert_yielded from .manager import lsp_message_listener -from .paths import file_uri_to_path +from .paths import file_uri_to_path, is_relative from .types import LanguageServerManagerAPI # TODO: make configurable @@ -171,6 +171,11 @@ async def shadow_virtual_documents(scope, message, language_server, manager): initialized = True path = file_uri_to_path(uri) + if not is_relative(shadow_filesystem, path): + raise ShadowFilesystemError( + f"Path {path} is not relative to shadow filesystem root" + ) + editable_file = EditableFile(path) await editable_file.read() diff --git a/requirements/utest.txt b/requirements/utest.txt index 726fccae4..9a8dd4813 100644 --- a/requirements/utest.txt +++ b/requirements/utest.txt @@ -6,4 +6,3 @@ pytest-cov pytest-flake8 pytest-runner python-lsp-server -pluggy<1.0,>=0.12 # Python 3.5 CI Travis, may need update with new pytest releases, see issue 259 diff --git a/scripts/bump_versions.py b/scripts/bump_versions.py index a70c73f38..28eef97cb 100755 --- a/scripts/bump_versions.py +++ b/scripts/bump_versions.py @@ -47,7 +47,6 @@ def maybe_change_version(self, dry: bool): self.change_version(new_version=version, dry=dry) def change_version(self, new_version: str, dry: bool): - changelog = CHANGELOG.read_text(encoding="utf-8") if new_version not in changelog: raise Exception(