From 41bdd08f47d16db5d6055576a6ec075bebf151f7 Mon Sep 17 00:00:00 2001 From: Jordan Millar Date: Sun, 4 Nov 2018 13:04:45 -0400 Subject: [PATCH] Add parser that parses any data declaration (with or without records) and implement property test to verify this --- app/Main.hs | 21 +++-- package.yaml | 3 +- src/Parser.hs | 231 +++++++++++++++------------------------------ stack.yaml | 2 +- test/Test/Gen.hs | 30 +++++- test/Test/Props.hs | 22 +++-- 6 files changed, 133 insertions(+), 176 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 05b307a..c93d56a 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -3,16 +3,18 @@ module Main where --import Conduit --import qualified Data.Text as T --import System.FilePath (takeExtension) -import Data.Either (isRight) +import Data.Either (isRight) import Language.Haskell.Exts.Parser (fromParseResult, parseModuleWithMode) -import SrcManipulation (getDataDecls, printDeclarations, - returnListDecl, getNewTypeDecls, dataDecHasRecordAccessor) +import SrcManipulation (dataDecHasRecordAccessor, + getDataDecls, getNewTypeDecls, + printDeclarations, + returnListDecl) import Parser (defaultParseMode', - finalNormalDataTypeParser, - removeNewLines, - finalNewTypeParser) + finalNewTypeParser, + mixedDatatypeParser', + removeNewLines) {- main :: IO () main = @@ -64,8 +66,11 @@ main = do let recAccDataList = filter (isRight . dataDecHasRecordAccessor) (returnListDecl moduleSrsSpan) let dataRecAcc = removeNewLines $ printDeclarations recAccDataList - let dataStrings = map finalNormalDataTypeParser . removeNewLines $ printDeclarations dataList - let ntStrings = map finalNewTypeParser . removeNewLines $ printDeclarations newTypeList + let dataStrings = removeNewLines $ printDeclarations dataList + let ntStrings = removeNewLines $ printDeclarations newTypeList --print dataStrings --print ntStrings print dataRecAcc + print $ map mixedDatatypeParser' dataRecAcc + print $ map mixedDatatypeParser' dataStrings + print $ map finalNewTypeParser ntStrings diff --git a/package.yaml b/package.yaml index 38dae1d..5d92e9c 100644 --- a/package.yaml +++ b/package.yaml @@ -35,7 +35,7 @@ library: exposed-modules: - Parser - SrcManipulation - ghc-options: -Wall + ghc-options: -Weverything executables: repochart-exe: @@ -58,5 +58,6 @@ tests: - -threaded - -rtsopts - -with-rtsopts=-N + - -Weverything dependencies: - repochart diff --git a/src/Parser.hs b/src/Parser.hs index ba440a7..63f09ba 100644 --- a/src/Parser.hs +++ b/src/Parser.hs @@ -1,26 +1,10 @@ module Parser - ( dataTypeName - , dataTypeParser - , defaultParseMode' - , firstConstructor - , justType - , newTypeParser - , parseDataDeclarations - , parseNewTypes - , recordDTParser + ( defaultParseMode' , removeNewLines - , singleProductDTParser - , strictAndParentheses - , strictOnly - , sumDTParser , unParseDataDec - , parseBeforePipe - , equals - , dataTypeParser' , dataTypeParserConstructorsWithSubTypes , finalDataTypeParser - , normalDatatypeParser , unParseNormalDataDec , finalNormalDataTypeParser @@ -33,18 +17,25 @@ module Parser , recordAccessorDataTypeParser , unparseRecord , parseRecord + , unparseMixedDataType , parseMultipleRecords , unparseMultipleRecords + , mixedDatatypeParser' + , recordParse + , normalSubtypeParse + , unparseRecordsOrConstructors ) where -import Data.List (intercalate, intersperse, - unwords) +import Data.Char (isSpace) +import Data.List (dropWhileEnd, intercalate, + intersperse, replicate, + unwords, unzip, zipWith) import Language.Haskell.Exts.Extension (Extension (..), Language (..)) import Language.Haskell.Exts.Fixity (preludeFixities) import Language.Haskell.Exts.Parser (ParseMode (..)) import Text.Parsec (ParseError, Parsec, alphaNum, - anyChar, between, char, - eof, many, manyTill, newline, + anyChar, between, char, eof, + many, manyTill, newline, noneOf, parse, sepBy, sepBy1, sepEndBy, skipMany, space, string, try, (<|>)) @@ -54,113 +45,11 @@ import Text.Parsec (ParseError, Parsec, alphaNum, -- | Removes all newline characters. removeNewLines :: [String] -> [String] removeNewLines [] = [] -removeNewLines (x:xs) = do +removeNewLines (x:xs) = case parse (skipMany newline *> try (many $ noneOf ['\n'] <* skipMany newline)) "" x of Left parseError -> error $ show parseError Right str -> [str] ++ removeNewLines xs --- | Data type parsers - --- | Parses data type definitions. Parsers in this package are --- not interested in data constructors, only types existing in --- other types. --- TODO: Create a test for data type parsers via hedgehog --- Prob need generators to create sum types and product types. -parseDataDeclarations :: [String] -> [(String, [String])] -parseDataDeclarations [] = [] -parseDataDeclarations (x:xs) = - case parse (try dataTypeParser <|> sumDTParser) "" x of - Left _ -> parseDataDeclarations xs - Right result -> [(head $ words result, tail $ words result)] ++ parseDataDeclarations xs - - -dataTypeName :: Parsec String () String -dataTypeName = string "data" *> many space *> many alphaNum <* many space - -dataTypeParser :: Parsec String () String -dataTypeParser = ((try singleProductDTParser) <|> recordDTParser) - -firstConstructor :: Parsec String () String -firstConstructor = char '=' *> many space *> many alphaNum <* many space - -singleProductDTParser :: Parsec String () String -singleProductDTParser = do - typeName <- dataTypeName - _ <- firstConstructor - first <- firstParse - rest <- secondParse - pure $ typeName ++ " " ++ concat (intersperse " " ([first] ++ rest)) - where - firstParse = char '{' *> many space *> recordAccessorTypeParse - recordAccessorTypeParse = try (char '_') - *> (many alphaNum) - *> many (char ':') `sepBy` space - *> ((try strictAndParentheses) <|> strictOnly) - secondParse = many space *> manyTill (char ',' *> many space *> recordAccessorTypeParse) (many space *> char '}') - -strictAndParentheses :: Parsec String () String -strictAndParentheses = char '!' - *> char '(' - *> (concat <$> (sepBy1 (many alphaNum) (char ' '))) <* char ')' - -strictOnly :: Parsec String () String -strictOnly = char '!' *> many alphaNum <* many space - -justType :: Parsec String () String -justType = many alphaNum <* many space - -sumDTParser :: Parsec String () String -sumDTParser = do - typeName <- dataTypeName - firstTypeOfFirstConst <- char '=' *> many space *> many alphaNum *> many space *> many alphaNum <* many space - secondConstWithType <- concat <$> manyTill (char '|' *> many space *> (many alphaNum *> many space *> many alphaNum) `sepBy` (char ' ')) eof - pure $ typeName ++ " " ++ concat (intersperse " " ([firstTypeOfFirstConst] ++ secondConstWithType)) - - -recordDTParser :: Parsec String () String -recordDTParser = do - typeName <- dataTypeName - _ <- firstConstructor - first <- firstParse - rest <- restRecordAccessorType - pure $ typeName ++ " " ++ concat (intersperse " " ([first] ++ rest)) - where - firstParse = char '{' *> many space *> firstRecordAccessorType - firstRecordAccessorType = many space - *> many alphaNum *> many (char ':') `sepBy` space - *> ((try strictAndParentheses) <|> strictOnly <|> justType) - restRecordAccessorType = many space *> manyTill (char ',' *> many space *> firstRecordAccessorType) (many space *> char '}') - --- | Newtype parsers - --- | Parses newtype definitions. -parseNewTypes :: [String] -> [(String,[String])] -parseNewTypes [] = [] -parseNewTypes (x:xs) = - case parse newTypeParser "" x of - Left _ -> parseNewTypes xs - --TODO: You need a "super" parser to switch between "sub parsers" (type,data,newtype etc) so you can get your errors - -- (Data constructor , type constructors) - Right result -> [(head $ words result, tail $ words result)] ++ parseNewTypes xs - -newTypeParser :: Parsec String () String -newTypeParser = - string "newtype" *> (many alphaNum) `sepBy` space *> char '=' *> ((try recordNT) <|> derivingNT <|> normalNT) - -derivingNT :: Parsec String () String -derivingNT = manyTill anyChar (string "deriving") - -normalNT :: Parsec String () String -normalNT = many space *> many anyChar - -recordNT:: Parsec String () String -recordNT = do - ntName <- many space *> many alphaNum <* many space - ntSubtype <- between - (char '{') - (char '}') - ((many alphaNum) `sepBy` space *> many (char ':') *> many space *> many alphaNum <* many space) - pure $ ntName ++ " " ++ ntSubtype -------------- MISC ---------------- @@ -175,32 +64,6 @@ defaultParseMode' = ParseMode { ignoreFunctionArity = False } - --- RESTART -- - --- data MyType = Nullary | --- data MyType = Product One Two Three | -equals :: Parsec String () String -equals = string "=" *> many space - --- DataDecName, Constructors, Types -parseBeforePipe :: Parsec String () (String, [[String]]) -parseBeforePipe = do - dTName <- dataTypeName - _ <- equals - -- Gets constructors and potentially type constructors - -- before pipe - constructors <- (many alphaNum `sepBy` space) `sepBy` string "| " - pure (dTName, constructors) - - ------------- NEW PARSERS ------------ - - - --- you currently parse datatypes and newtypes --- TODO: record accessors - -- DATA DECLARATION PARSERS finalDataTypeParser :: String -> Either ParseError (String, [String]) @@ -303,13 +166,12 @@ recordAccessorDataTypeParser = do _ <- string "=" _ <- many space consName <- many alphaNum - _ <- many space accessorsAndConstructors <- betweenParser pure (typeName, consName, accessorsAndConstructors) betweenParser :: Parsec String () [(String, String)] betweenParser = do - _ <- string "{ " + _ <- string " { " first <- recordAccParser rest <- manyTill (string ", " *> recordAccParser) (string "}") pure (first : rest) @@ -333,9 +195,70 @@ unparseMultipleRecords (typeName, constructorName, records) = "data " ++ typeName ++ " = " ++ constructorName ++ " " ++ "{ " ++ helper records where helper :: [(String, String)] -> String - helper [] = [] - helper [x] = unparseRecord x ++ " }" + helper [] = [] + helper [x] = unparseRecord x ++ " }" helper (x : xs) = unparseRecord x ++ " , " ++ helper xs unparseRecord :: (String, String) -> String unparseRecord (recAccessor, recType) = concat [recAccessor, " :: ", recType] + +mixedDatatypeParser' :: String -> Either ParseError (String, [(String, [(String, String)])]) +mixedDatatypeParser' = parse mixedDatatypeParser "" + +mixedDatatypeParser :: Parsec String () (String, [(String, [(String, String)])]) +mixedDatatypeParser = do + _ <- string "data" + _ <- many space + typeName <- many alphaNum + _ <- many space + _ <- string "=" + _ <- many space + constructors <- constructorsParse `sepBy` string "|" + pure (typeName, constructors) + +constructorsParse :: Parsec String () (String, [(String, String)]) +constructorsParse = do + _ <- many space + consName <- many alphaNum + _ <- many space + constructors <- try recordParse <|> normalSubtypeParse + pure (consName, constructors) + +recordParse :: Parsec String () [(String, String)] +recordParse = do + _ <- string "{" + _ <- many space + rest <- try ((: []) <$> recordAccParser <* string "}") <|> multiRecodsParse + _ <- many space + pure rest + +multiRecodsParse :: Parsec String () [(String, String)] +multiRecodsParse = do + first <- recordAccParser + rest <- manyTill (string ", " *> recordAccParser) (string "}") + pure (first : rest) + +normalSubtypeParse :: Parsec String () [(String, String)] +normalSubtypeParse = do + subTypes <- try (many alphaNum `sepEndBy` space) + let filler = replicate (length subTypes) "" + pure $ zip filler subTypes + +unparseMixedDataType :: (String, [(String, [(String, String)])]) -> String +unparseMixedDataType (typeName, constructors) = do + let rest = intercalate " | " $ map unparseRecordsOrConstructors constructors + concat ["data ", typeName, " = ", rest ] + +unparseRecordsOrConstructors :: (String, [(String, String)]) -> String +unparseRecordsOrConstructors (_, []) = "" +unparseRecordsOrConstructors (const, [("", "")]) = + const +unparseRecordsOrConstructors (const, ("", subType) : rest) = do + let removeEmpties = filter (\x -> snd x /= "") rest + dropWhileEnd isSpace $ const ++ " " ++ subType ++ " " ++ unwords (map snd removeEmpties) +unparseRecordsOrConstructors (const, recordTypes) = do + let (records, assocTypes) = unzip recordTypes + let zipRecords = zipWith (\ a b -> a ++ " :: " ++ b) records assocTypes + let finalRecords = intercalate " , " zipRecords + concat [const, " { ", finalRecords, " }"] + diff --git a/stack.yaml b/stack.yaml index 361a2b2..a62e3bf 100644 --- a/stack.yaml +++ b/stack.yaml @@ -18,7 +18,7 @@ # # resolver: ./custom-snapshot.yaml # resolver: https://example.com/snapshots/2018-01-01.yaml -resolver: lts-12.10 +resolver: lts-12.16 # User packages to be built. # Various formats can be used as shown in the example below. diff --git a/test/Test/Gen.hs b/test/Test/Gen.hs index ad07f86..4236b5c 100644 --- a/test/Test/Gen.hs +++ b/test/Test/Gen.hs @@ -2,11 +2,12 @@ module Test.Gen ( genConstructorWithSubtypes , genDataDeclation , genDeclarationName + , genMixedDatatype , genNewTypeDeclaration , genNullaryDataDeclation , genConstructor , genRecord - , genRecordAccessorConstructor + , genNonPartialRecordAccessorDataType ) where import Data.List (intercalate, intersperse) import Hedgehog (Gen) @@ -24,13 +25,19 @@ genConstructorWithSubtypes = do constructs <- Gen.list (Range.constant 1 5) genDeclarationName pure . concat $ intersperse " " constructs +genConstructorWithRecords :: Gen String +genConstructorWithRecords = do + const <- genConstructor + records <- Gen.list (Range.constant 1 5) genRecord + pure $ concat [const, " { ", intercalate " , " records, " }"] + genNullaryDataDeclation :: Gen String genNullaryDataDeclation = do dta <- Gen.constant "data " dName <- genDeclarationName + equal <- Gen.constant " = " nullaryConstructors <- Gen.list (Range.constant 1 5) genDeclarationName pipe <- Gen.constant " | " - equal <- Gen.constant " = " let allConstructors = intersperse pipe nullaryConstructors pure . concat $ [dta, dName, equal] ++ allConstructors @@ -38,9 +45,9 @@ genDataDeclation :: Gen String genDataDeclation = do dta <- Gen.constant "data " dName <- genDeclarationName + equal <- Gen.constant " = " constructs <- Gen.list (Range.constant 1 5) genConstructorWithSubtypes pipe <- Gen.constant " | " - equal <- Gen.constant " = " let allConstructors = intersperse pipe constructs pure . concat $ [dta, dName, equal] ++ allConstructors @@ -68,11 +75,24 @@ genRecord = do typeConst <- genConstructor pure $ concat [record,doubleColon,typeConst] -genRecordAccessorConstructor :: Gen String -genRecordAccessorConstructor = do +genNonPartialRecordAccessorDataType :: Gen String +genNonPartialRecordAccessorDataType = do dta <- Gen.constant "data " dName <- genDeclarationName equal <- Gen.constant " = " const <- genConstructor records <- Gen.list (Range.constant 2 5) genRecord pure $ concat [dta, dName, equal, const, " { ", intercalate " , " records, " }"] + +-- Best representation of data declarations you are likely +-- to encounter in the wild. +genMixedDatatype :: Gen String +genMixedDatatype = do + dta <- Gen.constant "data " + dName <- genDeclarationName + equal <- Gen.constant " = " + let randomConstructor = Gen.choice [ genConstructorWithSubtypes + , genConstructorWithRecords + ] + constructors <- Gen.list (Range.linear 1 10) randomConstructor + pure $ concat [dta, dName, equal, intercalate " | " constructors] \ No newline at end of file diff --git a/test/Test/Props.hs b/test/Test/Props.hs index 29abea4..e0e80a0 100644 --- a/test/Test/Props.hs +++ b/test/Test/Props.hs @@ -12,19 +12,19 @@ import Hedgehog.Internal.TH (discover) import Parser (finalDataTypeParser, finalNewTypeParser, finalNormalDataTypeParser, + mixedDatatypeParser', newTypeUnparse, normalDatatypeParser, - parseDataDeclarations, parseMultipleRecords, parseRecord, unParseDataDec, unParseNormalDataDec, unparseMultipleRecords, - unparseRecord) + unparseRecord, unparseMixedDataType) -import Test.Gen (genDataDeclation, +import Test.Gen (genDataDeclation, genMixedDatatype, genNewTypeDeclaration, - genNullaryDataDeclation, genRecord, - genRecordAccessorConstructor) + genNonPartialRecordAccessorDataType, + genNullaryDataDeclation, genRecord) -- Potentially unnecessary as the parser tested in the last 2 properties -- works for nullary constructor only datatypes @@ -68,14 +68,22 @@ prop_parseUnparseSingleRecord = property $ do Right str -> unparseRecord str === record -- Only tests this string format: --- { auianfjlv :: RVETBtok , dcyqnvbgd :: DoYQ , eylexcij :: HQRUyN , hjmahiwegm :: GnvcHvVxJ } +-- data SomeType = SomeConstructor { auianfjlv :: RVETBtok , dcyqnvbgd :: DoYQ , eylexcij :: HQRUyN , hjmahiwegm :: GnvcHvVxJ } prop_parseUnparseMultipleRecords :: Property prop_parseUnparseMultipleRecords = property $ do - records <- forAll genRecordAccessorConstructor + records <- forAll genNonPartialRecordAccessorDataType case parseMultipleRecords records of Left err -> failWith Nothing $ show err Right str -> unparseMultipleRecords str === records +-- Only tests this string format: +-- data SomeType = SomeConstructor { eylexcij :: HQRUyN , hjmahiwegm :: GnvcHvVxJ } | AnotherConstructor +prop_parseUnparseDataDecl :: Property +prop_parseUnparseDataDecl = property $ do + records <- forAll genMixedDatatype + case mixedDatatypeParser' records of + Left err -> failWith Nothing $ show err + Right str -> unparseMixedDataType str === records tests :: IO Bool tests =