Skip to content

Commit

Permalink
Work in progress on tabular adapters (#142)
Browse files Browse the repository at this point in the history
  • Loading branch information
joshsh committed Dec 21, 2024
1 parent f509245 commit 4d42df8
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 0 deletions.
90 changes: 90 additions & 0 deletions hydra-ext/src/main/haskell/Hydra/Tools/Csv.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
-- | A collection of tools for working with comma-separated values (CSVs).
-- Note: no error handling yet; e.g.

module Hydra.Tools.Csv where

import Hydra.Kernel
import Hydra.Ext.Tabular
import Hydra.Tools.Tabular

import qualified Control.Monad as CM
import qualified Data.List as L
import qualified Data.Maybe as Y


-- | Encodes a single DataRow as a line of a CSV, and vice versa.
-- No support for optional cells; only empty strings. No attempt is made to check that rows are of a consistent width.
dataRowCsvCoder :: Coder s1 s2 (DataRow String) String
dataRowCsvCoder = Coder encode decode
where
encode (DataRow cells) = pure $ encodeCsvLine $ Y.catMaybes cells
decode line = pure $ DataRow $ fmap Just $ decodeCsvLine line

decodeCsvCell :: String -> String
decodeCsvCell ('"':xs) = decodeCsvQuotes (init xs) -- Remove surrounding quotes
decodeCsvCell cell = cell -- If not quoted, return as is

decodeCsvLine :: String -> [String]
decodeCsvLine xs = fmap decodeCsvCell (splitCsvLine xs)

decodeCsvQuotes :: String -> String
decodeCsvQuotes [] = []
decodeCsvQuotes ('"':'"':ys) = '"' : decodeCsvQuotes ys -- Handle escaped quotes
decodeCsvQuotes (y:ys) = y : decodeCsvQuotes ys

encodeCsvCell :: String -> String
encodeCsvCell cell
| any (`elem` cell) [',', '\n', '"'] = '"' : escapeCsvQuotes cell ++ "\""
| otherwise = cell

encodeCsvLine :: [String] -> String
encodeCsvLine = L.intercalate "," . fmap encodeCsvCell

escapeCsvQuotes :: String -> String
escapeCsvQuotes [] = []
escapeCsvQuotes (x:xs)
| x == '"' = '"' : '"' : escapeCsvQuotes xs
| otherwise = x : escapeCsvQuotes xs

splitCsvLine :: String -> [String]
splitCsvLine [] = []
splitCsvLine ('"':xs) = ('"':quoted ++ "\"") : splitCsvLine next
where
(quoted, rest) = span (/= '"') xs
afterQuote = drop 1 rest -- Skip closing quote
next = dropWhile (== ',') afterQuote -- Skip comma
splitCsvLine xs = cell : splitCsvLine (drop 1 rest) -- Skip comma
where
(cell, rest) = break (== ',') xs

tableCsvCoder :: Bool -> Coder s1 s2 (Table String) [String]
tableCsvCoder hasHeader = Coder encode decode
where
encode (Table mheader rows) = do
hrows <- headerRows
drows <- CM.mapM (coderEncode dataRowCsvCoder) rows
return $ hrows ++ drows
where
headerRows = if hasHeader
then case mheader of
Just (HeaderRow names) -> pure [encodeCsvLine names]
Nothing -> fail "missing header"
else pure []
decode rows = do
(mheader, rest) <- if hasHeader
then if L.null rows
then fail "missing header"
else pure (Just $ HeaderRow $ decodeCsvLine $ head rows, tail rows)
else pure (Nothing, rows)
drows <- CM.mapM (coderDecode dataRowCsvCoder) rest
return $ Table mheader drows



-- TODO: temporary
tryCsvEncoding :: String -> String
tryCsvEncoding = decodeCsvCell . encodeCsvCell

-- TODO: temporary
tryCsvLineEncoding :: String -> String
tryCsvLineEncoding = encodeCsvLine . decodeCsvLine
55 changes: 55 additions & 0 deletions hydra-ext/src/main/haskell/Hydra/Tools/Tabular.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
-- | A loose collection of tools for working with tabular data, including CSVs.

module Hydra.Tools.Tabular (
referenceCoder,
tabularAdapter,
) where

import Hydra.Kernel
import Hydra.Ext.Tabular
import Hydra.Lib.Io

import qualified Control.Monad as CM
import qualified Data.List as L
import qualified Data.Map as M


-- | Consumes a map of field names to value coders, producing a record coder.
-- The record coder maps records with concrete values on the left, to records with references on the right, and vice versa.
-- Fields not designated in the map are unaffected; they are simply copied as-is in either direction.
-- For fields designated in the map, terms on the right *must* be variables.
referenceCoder :: (M.Map Name (Coder s1 s2 Term Name)) -> Coder s1 s2 Record Record
referenceCoder fieldCoders = Coder encode decode
where
encode (Record tname fields) = Record tname <$> CM.sequence (encodeField <$> fields)
where
encodeField field@(Field fname fterm) = case M.lookup fname fieldCoders of
Just coder -> Field fname <$> (TermVariable <$> coderEncode coder fterm)
Nothing -> return field
decode (Record tname fields) = Record tname <$> CM.sequence (decodeField <$> fields)
where
decodeField field@(Field fname fterm) = case M.lookup fname fieldCoders of
Just coder -> case fterm of
TermVariable v -> Field fname <$> coderDecode coder v
_ -> unexpected "variable" (showTerm fterm)
Nothing -> return field

-- | Consumes a row type and a cell-level coder, producing a record coder.
-- The record coder maps data rows on the left to records on the right, and vice versa.
tabularAdapter :: RowType -> (Type -> Coder s1 s2 (Maybe v) Term) -> Coder s1 s2 (DataRow v) Record
tabularAdapter (RowType typeName fieldTypes) cellAdapter = Coder encode decode
where
cellCoders = cellAdapter <$> (fieldTypeType <$> fieldTypes)
encode (DataRow cells) = do
values <- CM.zipWithM coderEncode cellCoders cells
let fields = L.zipWith Field (fieldTypeName <$> fieldTypes) values
return $ Record typeName fields
decode (Record tname fields) = do
if (tname /= typeName)
then unexpected ("record of type " ++ unName typeName) ("record of type " ++ unName tname)
else DataRow <$> CM.sequence (fieldDecoders <*> fields)
fieldDecoders = L.zipWith decodeField fieldTypes cellCoders
where
decodeField fieldType coder field = if (fieldName field /= fieldTypeName fieldType)
then unexpected ("field " ++ unName (fieldTypeName fieldType)) ("field " ++ unName (fieldName field))
else coderDecode coder (fieldTerm field)

0 comments on commit 4d42df8

Please sign in to comment.