Skip to content

Commit

Permalink
refactor: mp4 module (#652)
Browse files Browse the repository at this point in the history
* wip

* more naming refactoring

* simplify Sample and RawSample

* rename: import simple_mp4_parser as sparser

* refactor mp4 sample parser

* refactor camm parser

* refactor gpmf parser

* rename parse_ to extract_

* remove deprecated functions

* rename composition_timedelta to composition_offset

* move mp4 to a separate module

* add the missing __init__.py

* fix import

* update setup.py
  • Loading branch information
ptpt authored Nov 28, 2024
1 parent b37895e commit 68e5e31
Show file tree
Hide file tree
Showing 21 changed files with 411 additions and 506 deletions.
8 changes: 4 additions & 4 deletions mapillary_tools/geotag/blackvue_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pynmea2

from .. import geo
from . import simple_mp4_parser
from ..mp4 import simple_mp4_parser as sparser


LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -55,8 +55,8 @@ def _parse_gps_box(gps_data: bytes) -> T.Generator[geo.Point, None, None]:

def extract_camera_model(fp: T.BinaryIO) -> str:
try:
cprt_bytes = simple_mp4_parser.parse_mp4_data_first(fp, [b"free", b"cprt"])
except simple_mp4_parser.ParsingError:
cprt_bytes = sparser.parse_mp4_data_first(fp, [b"free", b"cprt"])
except sparser.ParsingError:
return ""

if cprt_bytes is None:
Expand Down Expand Up @@ -91,7 +91,7 @@ def extract_camera_model(fp: T.BinaryIO) -> str:


def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]:
gps_data = simple_mp4_parser.parse_mp4_data_first(fp, [b"free", b"gps "])
gps_data = sparser.parse_mp4_data_first(fp, [b"free", b"gps "])
if gps_data is None:
return None

Expand Down
6 changes: 4 additions & 2 deletions mapillary_tools/geotag/camm_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
import typing as T

from .. import geo, types
from ..mp4 import (
construct_mp4_parser as cparser,
mp4_sample_parser as sample_parser,
)

from . import (
camm_parser,
construct_mp4_parser as cparser,
mp4_sample_parser as sample_parser,
simple_mp4_builder as builder,
)
from .simple_mp4_builder import BoxDict
Expand Down
115 changes: 41 additions & 74 deletions mapillary_tools/geotag/camm_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,8 @@

import construct as C

from . import (
construct_mp4_parser as cparser,
geo,
mp4_sample_parser as sample_parser,
simple_mp4_parser as parser,
)
from . import geo
from ..mp4 import simple_mp4_parser as sparser, mp4_sample_parser as sample_parser


LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -82,12 +78,12 @@ class CAMMType(Enum):
def _parse_point_from_sample(
fp: T.BinaryIO, sample: sample_parser.Sample
) -> T.Optional[geo.Point]:
fp.seek(sample.offset, io.SEEK_SET)
data = fp.read(sample.size)
fp.seek(sample.raw_sample.offset, io.SEEK_SET)
data = fp.read(sample.raw_sample.size)
box = CAMMSampleData.parse(data)
if box.type == CAMMType.MIN_GPS.value:
return geo.Point(
time=sample.time_offset,
time=sample.exact_time,
lat=box.data[0],
lon=box.data[1],
alt=box.data[2],
Expand All @@ -97,7 +93,7 @@ def _parse_point_from_sample(
# Not using box.data.time_gps_epoch as the point timestamp
# because it is from another clock
return geo.Point(
time=sample.time_offset,
time=sample.exact_time,
lat=box.data.latitude,
lon=box.data.longitude,
alt=box.data.altitude,
Expand Down Expand Up @@ -148,15 +144,8 @@ def elst_entry_to_seconds(
return (media_time, duration)


def _extract_camm_samples(
s: T.BinaryIO,
maxsize: int = -1,
) -> T.Generator[sample_parser.Sample, None, None]:
samples = sample_parser.parse_samples_from_trak(s, maxsize=maxsize)
camm_samples = (
sample for sample in samples if sample.description["format"] == b"camm"
)
yield from camm_samples
def _is_camm_description(description: T.Dict) -> bool:
return description["format"] == b"camm"


def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]:
Expand All @@ -166,59 +155,37 @@ def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]:
"""

points = None
movie_timescale = None
media_timescale = None
elst_entries = None

for h, s in parser.parse_path(fp, [b"moov", [b"mvhd", b"trak"]]):
if h.type == b"trak":
trak_start_offset = s.tell()

descriptions = sample_parser.parse_descriptions_from_trak(
s, maxsize=h.maxsize
moov = sample_parser.MovieBoxParser.parse_stream(fp)
for track in moov.extract_tracks():
descriptions = track.extract_sample_descriptions()
if any(_is_camm_description(d) for d in descriptions):
maybe_points = (
_parse_point_from_sample(fp, sample)
for sample in track.extract_samples()
if _is_camm_description(sample.description)
)
camm_descriptions = [d for d in descriptions if d["format"] == b"camm"]
if camm_descriptions:
s.seek(trak_start_offset, io.SEEK_SET)
camm_samples = _extract_camm_samples(s, h.maxsize)

points_with_nones = (
_parse_point_from_sample(fp, sample)
for sample in camm_samples
if sample.description["format"] == b"camm"
)

points = [p for p in points_with_nones if p is not None]
if points:
s.seek(trak_start_offset)
elst_data = parser.parse_box_data_first(
s, [b"edts", b"elst"], maxsize=h.maxsize
)
if elst_data is not None:
elst_entries = cparser.EditBox.parse(elst_data)["entries"]

s.seek(trak_start_offset)
mdhd_data = parser.parse_box_data_firstx(
s, [b"mdia", b"mdhd"], maxsize=h.maxsize
)
mdhd = cparser.MediaHeaderBox.parse(mdhd_data)
media_timescale = mdhd["timescale"]
else:
assert h.type == b"mvhd"
if not movie_timescale:
mvhd = cparser.MovieHeaderBox.parse(s.read(h.maxsize))
movie_timescale = mvhd["timescale"]

# exit when both found
if movie_timescale is not None and points:
break

if points and movie_timescale and media_timescale and elst_entries:
segments = [
elst_entry_to_seconds(entry, movie_timescale, media_timescale)
for entry in elst_entries
]
points = list(filter_points_by_elst(points, segments))
points = [p for p in maybe_points if p is not None]
if points:
elst_boxdata = track.extract_elst_boxdata()
if elst_boxdata is not None:
elst_entries = elst_boxdata["entries"]
if elst_entries:
# media_timescale
mdhd_boxdata = track.extract_mdhd_boxdata()
media_timescale = mdhd_boxdata["timescale"]
# movie_timescale
mvhd_boxdata = moov.extract_mvhd_boxdata()
movie_timescale = mvhd_boxdata["timescale"]
segments = [
elst_entry_to_seconds(
entry,
movie_timescale=movie_timescale,
media_timescale=media_timescale,
)
for entry in elst_entries
]
points = list(filter_points_by_elst(points, segments))

return points

Expand All @@ -238,15 +205,15 @@ def parse_gpx(path: pathlib.Path) -> T.List[geo.Point]:
)


def _decode_quietly(data: bytes, h: parser.Header) -> str:
def _decode_quietly(data: bytes, h: sparser.Header) -> str:
try:
return data.decode("utf-8")
except UnicodeDecodeError:
LOG.warning("Failed to decode %s: %s", h, data[:512])
return ""


def _parse_quietly(data: bytes, h: parser.Header) -> bytes:
def _parse_quietly(data: bytes, h: sparser.Header) -> bytes:
try:
parsed = MakeOrModel.parse(data)
except C.ConstructError:
Expand All @@ -256,7 +223,7 @@ def _parse_quietly(data: bytes, h: parser.Header) -> bytes:


def extract_camera_make_and_model(fp: T.BinaryIO) -> T.Tuple[str, str]:
header_and_stream = parser.parse_path(
header_and_stream = sparser.parse_path(
fp,
[
b"moov",
Expand Down Expand Up @@ -296,7 +263,7 @@ def extract_camera_make_and_model(fp: T.BinaryIO) -> T.Tuple[str, str]:
# quit when both found
if make and model:
break
except parser.ParsingError:
except sparser.ParsingError:
pass

if make:
Expand Down
8 changes: 4 additions & 4 deletions mapillary_tools/geotag/geotag_videos_from_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
camm_parser,
gpmf_gps_filter,
gpmf_parser,
simple_mp4_parser as parser,
utils as video_utils,
)
from ..mp4 import simple_mp4_parser as sparser
from .geotag_from_generic import GeotagVideosFromGeneric

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -77,7 +77,7 @@ def _extract_video_metadata(
with video_path.open("rb") as fp:
try:
points = camm_parser.extract_points(fp)
except parser.ParsingError:
except sparser.ParsingError:
points = None

if points is not None:
Expand All @@ -100,7 +100,7 @@ def _extract_video_metadata(
with video_path.open("rb") as fp:
try:
points_with_fix = gpmf_parser.extract_points(fp)
except parser.ParsingError:
except sparser.ParsingError:
points_with_fix = None

if points_with_fix is not None:
Expand All @@ -123,7 +123,7 @@ def _extract_video_metadata(
with video_path.open("rb") as fp:
try:
points = blackvue_parser.extract_points(fp)
except parser.ParsingError:
except sparser.ParsingError:
points = None

if points is not None:
Expand Down
Loading

0 comments on commit 68e5e31

Please sign in to comment.