draft for path and path_list #513

muescha · 2024-01-04T06:44:31Z

just to play around how to implement --path-list

TODO:

muescha · 2024-01-04T06:45:24Z

this is working:

echo "/abc/def/gh.txt:/xyz/uvw/ab.app" \
| jc --path-list -p

[
  {
    "url": "/abc/def/gh.txt",
    "scheme": null,
    "netloc": null,
    "path": "/abc/def/gh.txt",
    "parent": "/abc/def",
    "filename": "gh.txt",
    "stem": "gh",
    "extension": "txt",
    "path_list": [
      "abc",
      "def",
      "gh.txt"
    ],
    "query": null,
    "query_obj": null,
    "fragment": null,
    "username": null,
    "password": null,
    "hostname": null,
    "port": null,
    "encoded": {
      "url": "/abc/def/gh.txt",
      "scheme": null,
      "netloc": null,
      "path": "/abc/def/gh.txt",
      "parent": "/abc/def",
      "filename": "gh.txt",
      "stem": "gh",
      "extension": "txt",
      "path_list": [
        "abc",
        "def",
        "gh.txt"
      ],
      "query": null,
      "fragment": null,
      "username": null,
      "password": null,
      "hostname": null,
      "port": null
    },
    "decoded": {
      "url": "/abc/def/gh.txt",
      "scheme": null,
      "netloc": null,
      "path": "/abc/def/gh.txt",
      "parent": "/abc/def",
      "filename": "gh.txt",
      "stem": "gh",
      "extension": "txt",
      "path_list": [
        "abc",
        "def",
        "gh.txt"
      ],
      "query": null,
      "fragment": null,
      "username": null,
      "password": null,
      "hostname": null,
      "port": null
    }
  },
  {
    "url": "/xyz/uvw/ab.app",
    "scheme": null,
    "netloc": null,
    "path": "/xyz/uvw/ab.app",
    "parent": "/xyz/uvw",
    "filename": "ab.app",
    "stem": "ab",
    "extension": "app",
    "path_list": [
      "xyz",
      "uvw",
      "ab.app"
    ],
    "query": null,
    "query_obj": null,
    "fragment": null,
    "username": null,
    "password": null,
    "hostname": null,
    "port": null,
    "encoded": {
      "url": "/xyz/uvw/ab.app",
      "scheme": null,
      "netloc": null,
      "path": "/xyz/uvw/ab.app",
      "parent": "/xyz/uvw",
      "filename": "ab.app",
      "stem": "ab",
      "extension": "app",
      "path_list": [
        "xyz",
        "uvw",
        "ab.app"
      ],
      "query": null,
      "fragment": null,
      "username": null,
      "password": null,
      "hostname": null,
      "port": null
    },
    "decoded": {
      "url": "/xyz/uvw/ab.app",
      "scheme": null,
      "netloc": null,
      "path": "/xyz/uvw/ab.app",
      "parent": "/xyz/uvw",
      "filename": "ab.app",
      "stem": "ab",
      "extension": "app",
      "path_list": [
        "xyz",
        "uvw",
        "ab.app"
      ],
      "query": null,
      "fragment": null,
      "username": null,
      "password": null,
      "hostname": null,
      "port": null
    }
  }
]

slurp this is also working:

echo "/abc/def/gh.txt:/xyz/uvw/ab.app\n/def/hij/klm.txt:/efe/app.txt" \
| jc --path-list -p -s | jq ".[][] .path"
"/abc/def/gh.txt"
"/xyz/uvw/ab.app"
"/def/hij/klm.txt"
"/efe/app.txt"

echo "/abc/def/gh.txt:/xyz/uvw/ab.app\n/def/hij/klm.txt:/efe/app.txt" \
| jc --path-list -p -s

[
  [
    {
      "url": "/abc/def/gh.txt",
      "scheme": null,
      "netloc": null,
      "path": "/abc/def/gh.txt",
      "parent": "/abc/def",
      "filename": "gh.txt",
      "stem": "gh",
      "extension": "txt",
      "path_list": [
        "abc",
        "def",
        "gh.txt"
      ],
      "query": null,
      "query_obj": null,
      "fragment": null,
      "username": null,
      "password": null,
      "hostname": null,
      "port": null,
      "encoded": {
        "url": "/abc/def/gh.txt",
        "scheme": null,
        "netloc": null,
        "path": "/abc/def/gh.txt",
        "parent": "/abc/def",
        "filename": "gh.txt",
        "stem": "gh",
        "extension": "txt",
        "path_list": [
          "abc",
          "def",
          "gh.txt"
        ],
        "query": null,
        "fragment": null,
        "username": null,
        "password": null,
        "hostname": null,
        "port": null
      },
      "decoded": {
        "url": "/abc/def/gh.txt",
        "scheme": null,
        "netloc": null,
        "path": "/abc/def/gh.txt",
        "parent": "/abc/def",
        "filename": "gh.txt",
        "stem": "gh",
        "extension": "txt",
        "path_list": [
          "abc",
          "def",
          "gh.txt"
        ],
        "query": null,
        "fragment": null,
        "username": null,
        "password": null,
        "hostname": null,
        "port": null
      }
    },
    {
      "url": "/xyz/uvw/ab.app",
      "scheme": null,
      "netloc": null,
      "path": "/xyz/uvw/ab.app",
      "parent": "/xyz/uvw",
      "filename": "ab.app",
      "stem": "ab",
      "extension": "app",
      "path_list": [
        "xyz",
        "uvw",
        "ab.app"
      ],
      "query": null,
      "query_obj": null,
      "fragment": null,
      "username": null,
      "password": null,
      "hostname": null,
      "port": null,
      "encoded": {
        "url": "/xyz/uvw/ab.app",
        "scheme": null,
        "netloc": null,
        "path": "/xyz/uvw/ab.app",
        "parent": "/xyz/uvw",
        "filename": "ab.app",
        "stem": "ab",
        "extension": "app",
        "path_list": [
          "xyz",
          "uvw",
          "ab.app"
        ],
        "query": null,
        "fragment": null,
        "username": null,
        "password": null,
        "hostname": null,
        "port": null
      },
      "decoded": {
        "url": "/xyz/uvw/ab.app",
        "scheme": null,
        "netloc": null,
        "path": "/xyz/uvw/ab.app",
        "parent": "/xyz/uvw",
        "filename": "ab.app",
        "stem": "ab",
        "extension": "app",
        "path_list": [
          "xyz",
          "uvw",
          "ab.app"
        ],
        "query": null,
        "fragment": null,
        "username": null,
        "password": null,
        "hostname": null,
        "port": null
      }
    }
  ],
  [
    {
      "url": "/def/hij/klm.txt",
      "scheme": null,
      "netloc": null,
      "path": "/def/hij/klm.txt",
      "parent": "/def/hij",
      "filename": "klm.txt",
      "stem": "klm",
      "extension": "txt",
      "path_list": [
        "def",
        "hij",
        "klm.txt"
      ],
      "query": null,
      "query_obj": null,
      "fragment": null,
      "username": null,
      "password": null,
      "hostname": null,
      "port": null,
      "encoded": {
        "url": "/def/hij/klm.txt",
        "scheme": null,
        "netloc": null,
        "path": "/def/hij/klm.txt",
        "parent": "/def/hij",
        "filename": "klm.txt",
        "stem": "klm",
        "extension": "txt",
        "path_list": [
          "def",
          "hij",
          "klm.txt"
        ],
        "query": null,
        "fragment": null,
        "username": null,
        "password": null,
        "hostname": null,
        "port": null
      },
      "decoded": {
        "url": "/def/hij/klm.txt",
        "scheme": null,
        "netloc": null,
        "path": "/def/hij/klm.txt",
        "parent": "/def/hij",
        "filename": "klm.txt",
        "stem": "klm",
        "extension": "txt",
        "path_list": [
          "def",
          "hij",
          "klm.txt"
        ],
        "query": null,
        "fragment": null,
        "username": null,
        "password": null,
        "hostname": null,
        "port": null
      }
    },
    {
      "url": "/efe/app.txt",
      "scheme": null,
      "netloc": null,
      "path": "/efe/app.txt",
      "parent": "/efe",
      "filename": "app.txt",
      "stem": "app",
      "extension": "txt",
      "path_list": [
        "efe",
        "app.txt"
      ],
      "query": null,
      "query_obj": null,
      "fragment": null,
      "username": null,
      "password": null,
      "hostname": null,
      "port": null,
      "encoded": {
        "url": "/efe/app.txt",
        "scheme": null,
        "netloc": null,
        "path": "/efe/app.txt",
        "parent": "/efe",
        "filename": "app.txt",
        "stem": "app",
        "extension": "txt",
        "path_list": [
          "efe",
          "app.txt"
        ],
        "query": null,
        "fragment": null,
        "username": null,
        "password": null,
        "hostname": null,
        "port": null
      },
      "decoded": {
        "url": "/efe/app.txt",
        "scheme": null,
        "netloc": null,
        "path": "/efe/app.txt",
        "parent": "/efe",
        "filename": "app.txt",
        "stem": "app",
        "extension": "txt",
        "path_list": [
          "efe",
          "app.txt"
        ],
        "query": null,
        "fragment": null,
        "username": null,
        "password": null,
        "hostname": null,
        "port": null
      }
    }
  ]
]

muescha · 2024-01-04T06:50:42Z

I know the raw_output type and the _process type is wrong
maybe I don't need the _process - I just take it over because there is an empty _process in url.py.

muescha · 2024-01-04T07:39:51Z

should it makes sense to have an path parser with uses the url parser and then with the _process strip down some fields which is only needed for a path?

did we need encoded and decoded?

    [
      {
        "path": "/abc/def/gh.txt",
        "parent": "/abc/def",
        "filename": "gh.txt",
        "stem": "gh",
        "extension": "txt",
        "path_list": [
          "abc",
          "def",
          "gh.txt"
        ],
        "encoded": {
          "path": "/abc/def/gh.txt",
          "parent": "/abc/def",
          "filename": "gh.txt",
          "stem": "gh",
          "extension": "txt",
          "path_list": [
            "abc",
            "def",
            "gh.txt"
          ],
        },
        "decoded": {
          "path": "/abc/def/gh.txt",
          "parent": "/abc/def",
          "filename": "gh.txt",
          "stem": "gh",
          "extension": "txt",
          "path_list": [
            "abc",
            "def",
            "gh.txt"
          ],
        }
      }
    ]

muescha · 2024-01-04T07:40:23Z

Edge case:

I think on windows there must be an ; as path delimiter and it can have an drive name like C:\Program Files;C:\Winnt;C:\Winnt\System32?

muescha · 2024-01-04T12:55:39Z

73bbfac: added _process to remove some fields ( with --raw all the fields are visible)

muescha · 2024-01-04T13:04:15Z

Edge Case:

there are also paths like ./path/to/file.txt?

echo "./path/to/file.txt" | jc --path-list -p

note: the path_list how has a .path:

[
  {
    "path": "./path/to/file.txt",
    "parent": "path/to",
    "filename": "file.txt",
    "stem": "file",
    "extension": "txt",
    "path_list": [
      ".path",
      "to",
      "file.txt"
    ]
  }
]

the same with ~ and ..:

echo "~/path/to/file.txt" | jc --path-list -p

[
  {
    "path": "~/path/to/file.txt",
    "parent": "~/path/to",
    "filename": "file.txt",
    "stem": "file",
    "extension": "txt",
    "path_list": [
      "~path",
      "to",
      "file.txt"
    ]
  }
]

muescha · 2024-01-04T13:14:33Z

jc/parsers/path_list.py

+    raw_output: List[Dict] = []
+    if jc.utils.has_data(data):
+        for line in data.split(":"):
+            parsed_line = url.parse(
+                line,
+                raw=raw,
+                quiet=quiet
+            )
+            raw_output.append(parsed_line)


suggestion from chatgpt - it is shorter but is it better readable?

Suggested change

raw_output: List[Dict] = []

if jc.utils.has_data(data):

for line in data.split(":"):

parsed_line = url.parse(

line,

raw=raw,

quiet=quiet

)

raw_output.append(parsed_line)

raw_output = [

url.parse(line, raw=raw, quiet=quiet)

for line in data.split(":")

if jc.utils.has_data(data)

]

muescha · 2024-01-04T14:00:32Z

The more I consider it, the more it seems preferable to create a path parser using pathlib rather than repurposing the url parser for paths. pathlib offers all the necessary functions for handling various edge cases effortlessly.

kellyjonbrazil · 2024-01-04T17:03:48Z

The more I consider it, the more it seems preferable to create a path parser using pathlib rather than repurposing the url parser for paths. pathlib offers all the necessary functions for handling various edge cases effortlessly.

I agree. I'm also curious how this parser would/could work with slurp? If you have a list of pathlists, then they could get bundled into and array of arrays. Another approach would be to have slurp use extend rather than append so it stays an array of objects.

muescha · 2024-01-04T22:28:11Z

I anticipate that the --slurp option should generate arrays of arrays when the initial command produces an array.

I'm uncertain whether a --slurp-flat command should be created.

A workaround involves eliminating the nested array using jq "[ .[] .[] ]":

echo "/abc/def/gh.txt:/xyz/uvw/ab.app\n/def/hij/klm.txt:/efe/app.txt" \
| jc --path-list -p -s | jq "[ .[] .[] ]"

[
  {
    "path": "/abc/def/gh.txt",
    "parent": "/abc/def",
    "filename": "gh.txt",
    "stem": "gh",
    "extension": "txt",
    "path_list": [
      "abc",
      "def",
      "gh.txt"
    ]
  },
  {
    "path": "/xyz/uvw/ab.app",
    "parent": "/xyz/uvw",
    "filename": "ab.app",
    "stem": "ab",
    "extension": "app",
    "path_list": [
      "xyz",
      "uvw",
      "ab.app"
    ]
  },
  {
    "path": "/def/hij/klm.txt",
    "parent": "/def/hij",
    "filename": "klm.txt",
    "stem": "klm",
    "extension": "txt",
    "path_list": [
      "def",
      "hij",
      "klm.txt"
    ]
  },
  {
    "path": "/efe/app.txt",
    "parent": "/efe",
    "filename": "app.txt",
    "stem": "app",
    "extension": "txt",
    "path_list": [
      "efe",
      "app.txt"
    ]
  }
]

…path-list

muescha · 2024-01-06T16:51:46Z

I'd use PurePosixPath as I did for the URL parser. I ran into the same issue and have not yet investigated why there is different behavior on different platforms. I think we can document this parser is for POSIX compliant paths only.

On Windows systems, paths are parsed using Path as a Windows path, resulting in the use of \\ as the path delimiter instead of /. However, with PurePosixPath, the path is parsed following the conventions of *nix systems.

muescha · 2024-01-06T17:02:02Z

POSIX

I did not know POSIX before - should it be named better Unix styled path instead of POSIX styled path?

muescha · 2024-01-06T17:05:19Z

I anticipate that the --slurp option should generate arrays of arrays when the initial command produces an array.

Makes sense - I can see how you might want to treat each array of paths separately instead of having them all in one bucket.

Also, if a parser returns a List instead of a Dict, this segment of code won't function as expected and may result in unexpected behavior.

I backed out the slurp flattening code.

👍

muescha · 2024-01-06T20:07:21Z

I added some windows path checks in 66830e4
(debug prints still there to see what the windows CI produces)

Note: On mac I need to use single quotes otherwise the by \\x or \\f or get excaped...

echo "C:\\\\Windows\\\\Program Files\\\\xfolder\\\\file.txt" | jc --path -p
echo 'C:\\Windows\\Program Files\\xfolder\\file.txt' | jc --path -p

This are the drive and root values for windows:

 path: /Library/Application Support/Script Editor/Templates/Cocoa-AppleScript Applet.app/Contents/Info.plist
drive: 
 root: /
 
 path: C:\Windows\Program Files\xfolder\file.txt
drive: C:
 root: \

muescha · 2024-01-06T20:09:38Z

Fun fact: PureWindowsPath and PurePosixPath did see an \n at the end on command line and on tests

muescha · 2024-01-08T05:48:41Z

fun fact after this PR

find . -type f -name '*.json' | wc -l
    1000

🎉

kellyjonbrazil · 2024-01-29T01:51:00Z

Will you be ready for me to merge this soon?

…path-list

muescha · 2024-01-31T01:46:39Z

yes it is ready to merge

muescha · 2024-01-31T02:06:11Z

I expect update to the docs at README.md and path.md and path_list.md done by docgen.sh and should not be included in this PR.

kellyjonbrazil · 2024-01-31T04:04:36Z

Looks nice, thanks!

* draft for path_list * updaate doc * add input check * fix types * fix schema: add missing properties * add _process * fix _process docs * refactor: extract path.py parser * swap order of names alphabetically * documentation and comments * path parser: add early return for nodata * path and path-list parser: add test and fixtures * typo in file name * add early return for nodata * add test and fixtures * typo in file name * rename fixtures * rename fixtures * refactor to pathlib.Path * failing on windows - use PurePosixPath * changed the way to strip dot from suffix * add POSIX to path * test commit to see results on windows is failing * test commit to see results on windows is failing * add windows path detection * somehow Path not like the newline from input line * add test with more items * remove debug print * wrap test loops into into subTest * remove print statements * add path and path-list to CHANGELOG --------- Co-authored-by: Kelly Brazil <[email protected]>

muescha added 2 commits January 4, 2024 07:28

draft for path_list

562c5d5

updaate doc

f0272b7

muescha added 5 commits January 4, 2024 12:01

add input check

dfcf56e

fix types

61d84d4

fix schema: add missing properties

47e1f60

add _process

73bbfac

fix _process docs

440a0b9

muescha commented Jan 4, 2024

View reviewed changes

muescha added 2 commits January 4, 2024 23:52

refactor: extract path.py parser

001c356

swap order of names alphabetically

fb3be80

muescha changed the title ~~draft for path_list~~ draft for path and path_list Jan 4, 2024

muescha added 10 commits January 5, 2024 00:17

documentation and comments

b3679ba

path parser: add early return for nodata

183f989

path and path-list parser: add test and fixtures

0ce8030

typo in file name

1df0012

add early return for nodata

0d35ab6

add test and fixtures

bc8f06b

typo in file name

74c49dc

Merge remote-tracking branch 'origin/feature/path-list' into feature/…

24ac8bd

…path-list

rename fixtures

9a56b30

rename fixtures

4ca43ce

add POSIX to path

8e2959b

muescha added 3 commits January 6, 2024 18:10

test commit to see results on windows is failing

c3944dd

test commit to see results on windows is failing

6f47b2a

add windows path detection

66830e4

muescha added 3 commits January 8, 2024 04:46

Merge branch 'dev' into feature/path-list

5efd5b8

somehow Path not like the newline from input line

6f97ee4

add test with more items

473eb52

muescha added 2 commits January 8, 2024 06:53

remove debug print

38ea8e5

Merge branch 'dev' into feature/path-list

ff20459

kellyjonbrazil and others added 4 commits January 29, 2024 01:51

Merge branch 'dev' into feature/path-list

845b758

wrap test loops into into subTest

182183f

Merge remote-tracking branch 'origin/feature/path-list' into feature/…

36ea468

…path-list

remove print statements

c3e8592

muescha marked this pull request as ready for review January 31, 2024 01:45

muescha added 2 commits January 31, 2024 02:53

Merge branch 'dev' into feature/path-list

3c1fb5d

add path and path-list to CHANGELOG

fdb948a

kellyjonbrazil merged commit d65d2af into kellyjonbrazil:dev Jan 31, 2024
21 checks passed

muescha deleted the feature/path-list branch February 1, 2024 17:22

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

draft for path and path_list #513

draft for path and path_list #513

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 4, 2024

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 4, 2024

muescha commented Jan 4, 2024 •

edited

Loading

muescha Jan 4, 2024

muescha commented Jan 4, 2024

kellyjonbrazil commented Jan 4, 2024

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 6, 2024

muescha commented Jan 6, 2024

muescha commented Jan 6, 2024

muescha commented Jan 6, 2024 •

edited

Loading

muescha commented Jan 6, 2024 •

edited

Loading

muescha commented Jan 8, 2024

kellyjonbrazil commented Jan 29, 2024

muescha commented Jan 31, 2024

muescha commented Jan 31, 2024

kellyjonbrazil commented Jan 31, 2024

draft for path and path_list #513

draft for path and path_list #513

Conversation

muescha commented Jan 4, 2024 • edited Loading

muescha commented Jan 4, 2024 • edited Loading

muescha commented Jan 4, 2024

muescha commented Jan 4, 2024 • edited Loading

muescha commented Jan 4, 2024 • edited Loading

muescha commented Jan 4, 2024

muescha commented Jan 4, 2024 • edited Loading

muescha Jan 4, 2024

Choose a reason for hiding this comment

muescha commented Jan 4, 2024

kellyjonbrazil commented Jan 4, 2024

muescha commented Jan 4, 2024 • edited Loading

muescha commented Jan 6, 2024

muescha commented Jan 6, 2024

muescha commented Jan 6, 2024

muescha commented Jan 6, 2024 • edited Loading

muescha commented Jan 6, 2024 • edited Loading

muescha commented Jan 8, 2024

kellyjonbrazil commented Jan 29, 2024

muescha commented Jan 31, 2024

muescha commented Jan 31, 2024

kellyjonbrazil commented Jan 31, 2024

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 4, 2024 •

edited

Loading

muescha commented Jan 6, 2024 •

edited

Loading

muescha commented Jan 6, 2024 •

edited

Loading