Skip to content

Commit

Permalink
ls-url: add support for --tree/--level (#10664)
Browse files Browse the repository at this point in the history
  • Loading branch information
skshetry authored Jan 9, 2025
1 parent a43c1b3 commit 2c6b917
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 28 deletions.
5 changes: 1 addition & 4 deletions dvc/commands/ls/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,7 @@ def _build_tree_structure(

num_entries = len(entries)
for i, (name, entry) in enumerate(entries.items()):
# show full path for root, otherwise only show the name
if _depth > 0:
entry["path"] = name

entry["path"] = name
is_last = i >= num_entries - 1
tree_part = ""
if _depth > 0:
Expand Down
40 changes: 36 additions & 4 deletions dvc/commands/ls_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,45 @@
from dvc.cli.utils import DictAction, append_doc_link
from dvc.log import logger

from .ls import show_entries
from .ls import show_entries, show_tree

logger = logger.getChild(__name__)


class CmdListUrl(CmdBaseNoRepo):
def run(self):
from dvc.config import Config
def _show_tree(self, config):
from dvc.fs import parse_external_url
from dvc.repo.ls import _ls_tree

fs, fs_path = parse_external_url(
self.args.url, fs_config=self.args.fs_config, config=config
)
entries = _ls_tree(fs, fs_path, maxdepth=self.args.level)
show_tree(entries, with_color=True, with_size=self.args.size)
return 0

def _show_list(self, config):
from dvc.repo import Repo

entries = Repo.ls_url(
self.args.url,
recursive=self.args.recursive,
maxdepth=self.args.level,
fs_config=self.args.fs_config,
config=Config.from_cwd(),
config=config,
)
if entries:
show_entries(entries, with_color=True, with_size=self.args.size)
return 0

def run(self):
from dvc.config import Config

config = Config.from_cwd()
if self.args.tree:
return self._show_tree(config=config)
return self._show_list(config=config)


def add_parser(subparsers, parent_parser):
LS_HELP = "List directory contents from URL."
Expand All @@ -40,6 +59,19 @@ def add_parser(subparsers, parent_parser):
lsurl_parser.add_argument(
"-R", "--recursive", action="store_true", help="Recursively list files."
)
lsurl_parser.add_argument(
"-T",
"--tree",
action="store_true",
help="Recurse into directories as a tree.",
)
lsurl_parser.add_argument(
"-L",
"--level",
metavar="depth",
type=int,
help="Limit the depth of recursion.",
)
lsurl_parser.add_argument("--size", action="store_true", help="Show sizes.")
lsurl_parser.add_argument(
"--fs-config",
Expand Down
31 changes: 20 additions & 11 deletions dvc/repo/ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ def ls_tree(
path = path or ""
fs: DVCFileSystem = repo.dvcfs
fs_path = fs.from_os_path(path)
return _ls_tree(fs, fs_path, dvc_only, maxdepth)
return _ls_tree(
fs, fs_path, maxdepth=maxdepth, dvc_only=dvc_only, dvcfiles=True
)


def _ls(
Expand Down Expand Up @@ -145,27 +147,34 @@ def _ls(
return ret_list


def _ls_tree(
fs, path, dvc_only: bool = False, maxdepth: Optional[int] = None, _info=None
):
ret = {}
def _ls_tree(fs, path, maxdepth=None, _info=None, **fs_kwargs):
info = _info or fs.info(path)
if _info is None:
# preserve the original path name
name = path
if not name:
name = os.curdir if fs.protocol == "local" else fs.root_marker
path = info["name"]
else:
name = path.rsplit(fs.sep, 1)[-1]

path = info["name"].rstrip(fs.sep) or os.curdir
name = path.rsplit("/", 1)[-1]
ret = {}
ls_info = _adapt_info(info)
ls_info["path"] = path

recurse = maxdepth is None or maxdepth > 0
if recurse and info["type"] == "directory":
infos = fs.ls(path, dvcfiles=True, dvc_only=dvc_only, detail=True)
try:
infos = fs.ls(path, detail=True, **fs_kwargs)
except FileNotFoundError:
# broken symlink?
infos = []

infos.sort(key=lambda f: f["name"])
maxdepth = maxdepth - 1 if maxdepth is not None else None
contents = {}
for info in infos:
d = _ls_tree(
fs, info["name"], dvc_only=dvc_only, maxdepth=maxdepth, _info=info
)
d = _ls_tree(fs, info["name"], maxdepth=maxdepth, _info=info, **fs_kwargs)
contents.update(d)
ls_info["contents"] = contents

Expand Down
21 changes: 16 additions & 5 deletions dvc/repo/ls_url.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
from fsspec.implementations.local import LocalFileSystem as _LocalFileSystem

from dvc.exceptions import URLMissingError
from dvc.fs import parse_external_url
from dvc.fs import LocalFileSystem, parse_external_url


def ls_url(url, *, fs_config=None, recursive=False, config=None):
def ls_url(url, *, fs_config=None, recursive=False, maxdepth=None, config=None):
fs, fs_path = parse_external_url(url, fs_config=fs_config, config=config)
try:
info = fs.info(fs_path)
except FileNotFoundError as exc:
raise URLMissingError(url) from exc
if info["type"] != "directory":
if maxdepth == 0 or info["type"] != "directory":
return [{"path": info["name"], "isdir": False}]

if isinstance(fs, LocalFileSystem):
# dvc's LocalFileSystem does not support maxdepth yet
walk = _LocalFileSystem().walk
else:
walk = fs.walk

ret = []
for _, dirs, files in fs.walk(fs_path, detail=True):
if not recursive:
for root, dirs, files in walk(fs_path, detail=True, maxdepth=maxdepth):
parts = fs.relparts(root, fs_path)
if parts == (".",):
parts = ()
if not recursive or (maxdepth and len(parts) >= maxdepth - 1):
files.update(dirs)

for info in files.values():
Expand Down
33 changes: 33 additions & 0 deletions dvc/testing/workspace_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,39 @@ def test_recursive(self, cloud):
],
)

result = ls_url(
str(cloud / "dir"), fs_config=cloud.config, recursive=True, maxdepth=0
)
match_files(
fs,
result,
[{"path": (cloud / "dir").fs_path, "isdir": False}],
)

result = ls_url(
str(cloud / "dir"), fs_config=cloud.config, recursive=True, maxdepth=1
)
match_files(
fs,
result,
[
{"path": "foo", "isdir": False},
{"path": "subdir", "isdir": True},
],
)

result = ls_url(
str(cloud / "dir"), fs_config=cloud.config, recursive=True, maxdepth=2
)
match_files(
fs,
result,
[
{"path": "foo", "isdir": False},
{"path": "subdir/bar", "isdir": False},
],
)

def test_nonexistent(self, cloud):
with pytest.raises(URLMissingError):
ls_url(str(cloud / "dir"), fs_config=cloud.config)
Expand Down
64 changes: 63 additions & 1 deletion tests/func/test_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@

import pytest

from dvc.fs import MemoryFileSystem
from dvc.repo import Repo
from dvc.repo.ls import ls_tree
from dvc.repo.ls import _ls_tree, ls_tree
from dvc.scm import CloneError

FS_STRUCTURE = {
Expand Down Expand Up @@ -998,3 +999,64 @@ def test_ls_tree_maxdepth(M, tmp_dir, scm, dvc):
"structure.xml.dvc": None,
}
}


def test_fs_ls_tree():
fs = MemoryFileSystem(global_store=False)
fs.pipe({f: content.encode() for f, content in FS_STRUCTURE.items()})
root = fs.root_marker

files = _ls_tree(fs, "README.md")
assert _simplify_tree(files) == {"README.md": None}
files = _ls_tree(fs, root)
expected = {
root: {
".gitignore": None,
"README.md": None,
"model": {
"script.py": None,
"train.py": None,
},
}
}
assert _simplify_tree(files) == expected

files = _ls_tree(fs, "model")
assert _simplify_tree(files) == {
"model": {
"script.py": None,
"train.py": None,
}
}


def test_fs_ls_tree_maxdepth():
fs = MemoryFileSystem(global_store=False)
fs.pipe({f: content.encode() for f, content in FS_STRUCTURE.items()})

files = _ls_tree(fs, "/", maxdepth=0)
assert _simplify_tree(files) == {"/": None}

files = _ls_tree(fs, "/", maxdepth=1)
assert _simplify_tree(files) == {
"/": {
".gitignore": None,
"README.md": None,
"model": None,
}
}

files = _ls_tree(fs, "/", maxdepth=2)
assert _simplify_tree(files) == {
"/": {
".gitignore": None,
"README.md": None,
"model": {
"script.py": None,
"train.py": None,
},
}
}

files = _ls_tree(fs, "README.md", maxdepth=3)
assert _simplify_tree(files) == {"README.md": None}
22 changes: 19 additions & 3 deletions tests/unit/command/test_ls_url.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dvc.cli import parse_args
from dvc.commands.ls_url import CmdListUrl
from dvc.config import Config
from dvc.fs import LocalFileSystem


def test_ls_url(mocker, M):
Expand All @@ -12,18 +13,33 @@ def test_ls_url(mocker, M):
assert cmd.run() == 0

m.assert_called_once_with(
"src", recursive=False, fs_config=None, config=M.instance_of(Config)
"src",
recursive=False,
maxdepth=None,
fs_config=None,
config=M.instance_of(Config),
)


def test_recursive(mocker, M):
cli_args = parse_args(["ls-url", "-R", "src"])
cli_args = parse_args(["ls-url", "-R", "-L", "2", "src"])
assert cli_args.func == CmdListUrl
cmd = cli_args.func(cli_args)
m = mocker.patch("dvc.repo.Repo.ls_url", autospec=True)

assert cmd.run() == 0

m.assert_called_once_with(
"src", recursive=True, fs_config=None, config=M.instance_of(Config)
"src", recursive=True, maxdepth=2, fs_config=None, config=M.instance_of(Config)
)


def test_tree(mocker, M):
cli_args = parse_args(["ls-url", "--tree", "--level", "2", "src"])
assert cli_args.func == CmdListUrl
cmd = cli_args.func(cli_args)
m = mocker.patch("dvc.repo.ls._ls_tree", autospec=True)

assert cmd.run() == 0

m.assert_called_once_with(M.instance_of(LocalFileSystem), "src", maxdepth=2)

0 comments on commit 2c6b917

Please sign in to comment.