From 4ec74ff2fc1ca0e062a291e75039c31c9f205d2e Mon Sep 17 00:00:00 2001 From: Nikolai Kondrashov Date: Tue, 8 Sep 2020 16:08:06 +0300 Subject: [PATCH] Unpack jv values into Python values directly Instead of dumping and parsing JSON, convert JQ's "jv" structures into Python values directly by recursively walking them. The naive implementation is still twice as fast. --- jq.pyx | 76 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 13 deletions(-) diff --git a/jq.pyx b/jq.pyx index 083faa7..cb64602 100644 --- a/jq.pyx +++ b/jq.pyx @@ -26,6 +26,15 @@ cdef extern from "jv.h": int jv_invalid_has_msg(jv) char* jv_string_value(jv) jv jv_dump_string(jv, int flags) + int jv_is_integer(jv) + double jv_number_value(jv) + int jv_array_length(jv) + jv jv_array_get(jv, int) + int jv_object_iter(jv) + int jv_object_iter_next(jv, int) + int jv_object_iter_valid(jv, int) + jv jv_object_iter_key(jv, int) + jv jv_object_iter_value(jv, int) cdef struct jv_parser: pass @@ -51,6 +60,55 @@ cdef extern from "jq.h": void jq_get_error_cb(jq_state *, jq_err_cb *, void **) +cdef object _jv_to_python(jv v): + """Unpack a jv value into a Python object""" + cdef jv_kind kind = jv_get_kind(v) + cdef int i + cdef jv ik + cdef jv iv + if kind == JV_KIND_INVALID: + raise ValueError("Invalid value") + elif kind == JV_KIND_NULL: + return None + elif kind == JV_KIND_FALSE: + return False + elif kind == JV_KIND_TRUE: + return True + elif kind == JV_KIND_NUMBER: + if jv_is_integer(v): + return int(jv_number_value(v)) + else: + return float(jv_number_value(v)) + elif kind == JV_KIND_STRING: + return jv_string_value(v).decode("utf-8") + elif kind == JV_KIND_ARRAY: + arr = [] + for i in range(0, jv_array_length(jv_copy(v))): + iv = jv_array_get(jv_copy(v), i) + try: + arr.append(_jv_to_python(iv)) + finally: + jv_free(iv) + return arr + elif kind == JV_KIND_OBJECT: + obj = {} + i = jv_object_iter(v) + while True: + if not jv_object_iter_valid(v, i): + break + ik = jv_object_iter_key(v, i) + iv = jv_object_iter_value(v, i) + try: + obj[jv_string_value(ik).decode("utf-8")] = _jv_to_python(iv) + finally: + jv_free(ik) + jv_free(iv) + i = jv_object_iter_next(v, i) + return obj + else: + raise ValueError("Invalid value kind: " + str(kind)) + + def compile(object program): cdef object program_bytes = program.encode("utf8") return _Program(program_bytes) @@ -199,13 +257,7 @@ cdef class _ProgramWithInput(object): return _ResultIterator(self._jq_state_pool, self._bytes_input) def text(self): - iterator = self._make_iterator() - results = [] - while True: - try: - results.append(iterator._next_string()) - except StopIteration: - return "\n".join(results) + return "\n".join(json.dumps(v) for v in self) def all(self): return list(self) @@ -239,9 +291,6 @@ cdef class _ResultIterator(object): return self def __next__(self): - return json.loads(self._next_string()) - - cdef unicode _next_string(self): cdef int dumpopts = 0 while True: if not self._ready: @@ -250,9 +299,10 @@ cdef class _ResultIterator(object): result = jq_next(self._jq) if jv_is_valid(result): - dumped = jv_dump_string(result, dumpopts) - value = jv_string_value(dumped).decode("utf8") - jv_free(dumped) + try: + value = _jv_to_python(result) + finally: + jv_free(result) return value elif jv_invalid_has_msg(jv_copy(result)): error_message = jv_invalid_get_msg(result)