-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Work in progress on tabular adapters (#142)
- Loading branch information
Showing
2 changed files
with
145 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
-- | A collection of tools for working with comma-separated values (CSVs). | ||
-- Note: no error handling yet; e.g. | ||
|
||
module Hydra.Tools.Csv where | ||
|
||
import Hydra.Kernel | ||
import Hydra.Ext.Tabular | ||
import Hydra.Tools.Tabular | ||
|
||
import qualified Control.Monad as CM | ||
import qualified Data.List as L | ||
import qualified Data.Maybe as Y | ||
|
||
|
||
-- | Encodes a single DataRow as a line of a CSV, and vice versa. | ||
-- No support for optional cells; only empty strings. No attempt is made to check that rows are of a consistent width. | ||
dataRowCsvCoder :: Coder s1 s2 (DataRow String) String | ||
dataRowCsvCoder = Coder encode decode | ||
where | ||
encode (DataRow cells) = pure $ encodeCsvLine $ Y.catMaybes cells | ||
decode line = pure $ DataRow $ fmap Just $ decodeCsvLine line | ||
|
||
decodeCsvCell :: String -> String | ||
decodeCsvCell ('"':xs) = decodeCsvQuotes (init xs) -- Remove surrounding quotes | ||
decodeCsvCell cell = cell -- If not quoted, return as is | ||
|
||
decodeCsvLine :: String -> [String] | ||
decodeCsvLine xs = fmap decodeCsvCell (splitCsvLine xs) | ||
|
||
decodeCsvQuotes :: String -> String | ||
decodeCsvQuotes [] = [] | ||
decodeCsvQuotes ('"':'"':ys) = '"' : decodeCsvQuotes ys -- Handle escaped quotes | ||
decodeCsvQuotes (y:ys) = y : decodeCsvQuotes ys | ||
|
||
encodeCsvCell :: String -> String | ||
encodeCsvCell cell | ||
| any (`elem` cell) [',', '\n', '"'] = '"' : escapeCsvQuotes cell ++ "\"" | ||
| otherwise = cell | ||
|
||
encodeCsvLine :: [String] -> String | ||
encodeCsvLine = L.intercalate "," . fmap encodeCsvCell | ||
|
||
escapeCsvQuotes :: String -> String | ||
escapeCsvQuotes [] = [] | ||
escapeCsvQuotes (x:xs) | ||
| x == '"' = '"' : '"' : escapeCsvQuotes xs | ||
| otherwise = x : escapeCsvQuotes xs | ||
|
||
splitCsvLine :: String -> [String] | ||
splitCsvLine [] = [] | ||
splitCsvLine ('"':xs) = ('"':quoted ++ "\"") : splitCsvLine next | ||
where | ||
(quoted, rest) = span (/= '"') xs | ||
afterQuote = drop 1 rest -- Skip closing quote | ||
next = dropWhile (== ',') afterQuote -- Skip comma | ||
splitCsvLine xs = cell : splitCsvLine (drop 1 rest) -- Skip comma | ||
where | ||
(cell, rest) = break (== ',') xs | ||
|
||
tableCsvCoder :: Bool -> Coder s1 s2 (Table String) [String] | ||
tableCsvCoder hasHeader = Coder encode decode | ||
where | ||
encode (Table mheader rows) = do | ||
hrows <- headerRows | ||
drows <- CM.mapM (coderEncode dataRowCsvCoder) rows | ||
return $ hrows ++ drows | ||
where | ||
headerRows = if hasHeader | ||
then case mheader of | ||
Just (HeaderRow names) -> pure [encodeCsvLine names] | ||
Nothing -> fail "missing header" | ||
else pure [] | ||
decode rows = do | ||
(mheader, rest) <- if hasHeader | ||
then if L.null rows | ||
then fail "missing header" | ||
else pure (Just $ HeaderRow $ decodeCsvLine $ head rows, tail rows) | ||
else pure (Nothing, rows) | ||
drows <- CM.mapM (coderDecode dataRowCsvCoder) rest | ||
return $ Table mheader drows | ||
|
||
|
||
|
||
-- TODO: temporary | ||
tryCsvEncoding :: String -> String | ||
tryCsvEncoding = decodeCsvCell . encodeCsvCell | ||
|
||
-- TODO: temporary | ||
tryCsvLineEncoding :: String -> String | ||
tryCsvLineEncoding = encodeCsvLine . decodeCsvLine |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
-- | A loose collection of tools for working with tabular data, including CSVs. | ||
|
||
module Hydra.Tools.Tabular ( | ||
referenceCoder, | ||
tabularAdapter, | ||
) where | ||
|
||
import Hydra.Kernel | ||
import Hydra.Ext.Tabular | ||
import Hydra.Lib.Io | ||
|
||
import qualified Control.Monad as CM | ||
import qualified Data.List as L | ||
import qualified Data.Map as M | ||
|
||
|
||
-- | Consumes a map of field names to value coders, producing a record coder. | ||
-- The record coder maps records with concrete values on the left, to records with references on the right, and vice versa. | ||
-- Fields not designated in the map are unaffected; they are simply copied as-is in either direction. | ||
-- For fields designated in the map, terms on the right *must* be variables. | ||
referenceCoder :: (M.Map Name (Coder s1 s2 Term Name)) -> Coder s1 s2 Record Record | ||
referenceCoder fieldCoders = Coder encode decode | ||
where | ||
encode (Record tname fields) = Record tname <$> CM.sequence (encodeField <$> fields) | ||
where | ||
encodeField field@(Field fname fterm) = case M.lookup fname fieldCoders of | ||
Just coder -> Field fname <$> (TermVariable <$> coderEncode coder fterm) | ||
Nothing -> return field | ||
decode (Record tname fields) = Record tname <$> CM.sequence (decodeField <$> fields) | ||
where | ||
decodeField field@(Field fname fterm) = case M.lookup fname fieldCoders of | ||
Just coder -> case fterm of | ||
TermVariable v -> Field fname <$> coderDecode coder v | ||
_ -> unexpected "variable" (showTerm fterm) | ||
Nothing -> return field | ||
|
||
-- | Consumes a row type and a cell-level coder, producing a record coder. | ||
-- The record coder maps data rows on the left to records on the right, and vice versa. | ||
tabularAdapter :: RowType -> (Type -> Coder s1 s2 (Maybe v) Term) -> Coder s1 s2 (DataRow v) Record | ||
tabularAdapter (RowType typeName fieldTypes) cellAdapter = Coder encode decode | ||
where | ||
cellCoders = cellAdapter <$> (fieldTypeType <$> fieldTypes) | ||
encode (DataRow cells) = do | ||
values <- CM.zipWithM coderEncode cellCoders cells | ||
let fields = L.zipWith Field (fieldTypeName <$> fieldTypes) values | ||
return $ Record typeName fields | ||
decode (Record tname fields) = do | ||
if (tname /= typeName) | ||
then unexpected ("record of type " ++ unName typeName) ("record of type " ++ unName tname) | ||
else DataRow <$> CM.sequence (fieldDecoders <*> fields) | ||
fieldDecoders = L.zipWith decodeField fieldTypes cellCoders | ||
where | ||
decodeField fieldType coder field = if (fieldName field /= fieldTypeName fieldType) | ||
then unexpected ("field " ++ unName (fieldTypeName fieldType)) ("field " ++ unName (fieldName field)) | ||
else coderDecode coder (fieldTerm field) |