Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support parsing application/json-seq #110

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 49 additions & 25 deletions jq.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import json
import threading

from cpython.bytes cimport PyBytes_AsString
from cpython.bytes cimport PyBytes_AsStringAndSize
from libc.float cimport DBL_MAX
from libc.math cimport INFINITY, modf

Expand All @@ -18,6 +19,11 @@ cdef extern from "jv.h":
JV_KIND_ARRAY,
JV_KIND_OBJECT

ctypedef enum:
JV_PARSE_SEQ,
JV_PARSE_STREAMING,
JV_PARSE_STREAM_ERRORS

ctypedef struct jv:
pass

Expand Down Expand Up @@ -48,6 +54,7 @@ cdef extern from "jv.h":
jv_parser* jv_parser_new(int)
void jv_parser_free(jv_parser*)
void jv_parser_set_buf(jv_parser*, const char*, int, int)
int jv_parser_remaining(jv_parser*)
jv jv_parser_next(jv_parser*)

jv jv_parse(const char*)
Expand Down Expand Up @@ -246,27 +253,32 @@ cdef class _Program(object):
self._program_bytes = program_bytes
self._jq_state_pool = _JqStatePool(program_bytes, args=args)

def input(self, value=_NO_VALUE, text=_NO_VALUE):
def input(self, value=_NO_VALUE, text=_NO_VALUE, *, slurp=False):
if (value is _NO_VALUE) == (text is _NO_VALUE):
raise ValueError("Either the value or text argument should be set")

if text is not _NO_VALUE:
return self.input_text(text)
return self.input_text(text, slurp=slurp)
else:
return self.input_value(value)
return self.input_value(value, slurp=slurp)

def input_value(self, value):
return self.input_text(json.dumps(value))
def input_value(self, value, *, slurp=False):
return self.input_text(json.dumps(value), slurp=slurp)

def input_values(self, values):
def input_values(self, values, *, slurp=False):
fileobj = io.StringIO()
for value in values:
json.dump(value, fileobj)
fileobj.write("\n")
return self.input_text(fileobj.getvalue())
return self.input_text(fileobj.getvalue(), slurp=slurp)

def input_text(self, text, *, slurp=False):
return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"), slurp=slurp)
return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"),
slurp=slurp, seq=False)

def input_text_sequence(self, text, *, slurp=False):
return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"),
slurp=slurp, seq=True)

@property
def program_string(self):
Expand All @@ -290,17 +302,20 @@ cdef class _ProgramWithInput(object):
cdef _JqStatePool _jq_state_pool
cdef object _bytes_input
cdef bint _slurp
cdef bint _seq

def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp):
def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp, bint seq):
self._jq_state_pool = jq_state_pool
self._bytes_input = bytes_input
self._slurp = slurp
self._seq = seq

def __iter__(self):
return self._make_iterator()

cdef _ResultIterator _make_iterator(self):
return _ResultIterator(self._jq_state_pool, self._bytes_input, slurp=self._slurp)
return _ResultIterator(self._jq_state_pool, self._bytes_input,
slurp=self._slurp, seq=self._seq)

def text(self):
# Performance testing suggests that using _jv_to_python (within the
Expand All @@ -309,6 +324,9 @@ cdef class _ProgramWithInput(object):
# See: https://github.com/mwilliamson/jq.py/pull/50
return "\n".join(json.dumps(v) for v in self)

def text_sequence(self):
return "\x1e" + "\n\x1e".join(json.dumps(v) for v in self)

def all(self):
return list(self)

Expand All @@ -328,15 +346,18 @@ cdef class _ResultIterator(object):
self._jq_state_pool.release(self._jq)
jv_parser_free(self._parser)

def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *, bint slurp):
def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *,
bint slurp, bint seq):
self._jq_state_pool = jq_state_pool
self._jq = jq_state_pool.acquire()
self._bytes_input = bytes_input
self._slurp = slurp
self._ready = False
cdef jv_parser* parser = jv_parser_new(0)
cdef char* cbytes_input = PyBytes_AsString(bytes_input)
jv_parser_set_buf(parser, cbytes_input, len(bytes_input), 0)
cdef jv_parser* parser = jv_parser_new(JV_PARSE_SEQ if seq else 0)
cdef char* cbytes_input
cdef ssize_t clen_input
PyBytes_AsStringAndSize(bytes_input, &cbytes_input, &clen_input)
jv_parser_set_buf(parser, cbytes_input, clen_input, 0)
self._parser = parser

def __iter__(self):
Expand Down Expand Up @@ -381,17 +402,20 @@ cdef class _ResultIterator(object):
return 0

cdef inline jv _parse_next_input(self) except *:
cdef jv value = jv_parser_next(self._parser)
if jv_is_valid(value):
return value
elif jv_invalid_has_msg(jv_copy(value)):
error_message = jv_invalid_get_msg(value)
message = jv_string_to_py_string(error_message)
jv_free(error_message)
raise ValueError(u"parse error: " + message)
else:
jv_free(value)
raise StopIteration()
cdef jv value
while True:
value = jv_parser_next(self._parser)
if jv_is_valid(value):
return value
elif jv_invalid_has_msg(jv_copy(value)):
error_message = jv_invalid_get_msg(value)
message = jv_string_to_py_string(error_message)
jv_free(error_message)
raise ValueError(u"parse error: " + message)
else:
if not jv_parser_remaining(self._parser):
jv_free(value)
raise StopIteration()


def all(program, value=_NO_VALUE, text=_NO_VALUE):
Expand Down
51 changes: 51 additions & 0 deletions tests/jq_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,57 @@ def test_unicode_strings_can_be_used_as_input():
)


def test_record_separator_character_accepted_in_input():
assert_equal(
[],
list(jq.compile(".").input_text_sequence('\x1e'))
)
assert_equal(
[],
list(jq.compile(".").input_text_sequence('\x1e\x1e'))
)
assert_equal(
[{}],
list(jq.compile(".").input_text_sequence('\x1e{}'))
)
assert_equal(
[{}],
list(jq.compile(".").input_text_sequence('\x1e\x1e{}'))
)
assert_equal(
[],
list(jq.compile(".").input_text_sequence('{}\x1e'))
)
assert_equal(
[],
list(jq.compile(".").input_text_sequence('{}\x1e\x1e'))
)
assert_equal(
[{}],
list(jq.compile(".").input_text_sequence('\x1e{}\x1e'))
)
assert_equal(
[[]],
list(jq.compile(".").input_text_sequence('{}\x1e[]'))
)
assert_equal(
[[]],
list(jq.compile(".").input_text_sequence('{}\x1e\x1e[]'))
)
assert_equal(
[{},[]],
list(jq.compile(".").input_text_sequence('\x1e{}\x1e[]'))
)
assert_equal(
[[]],
list(jq.compile(".").input_text_sequence('{}\x1e[]\x1e'))
)
assert_equal(
[{},[]],
list(jq.compile(".").input_text_sequence('\x1e{}\x1e[]\x1e'))
)


def test_unicode_strings_can_be_used_as_programs():
assert_equal(
"Dragon‽",
Expand Down
Loading