Skip to content

Commit 6d66351

Browse files
committed
Remove Maybe indirection in PatternSet (+ LambdaCase)
Instead of `Maybe . Set` just have `Set`, collapsing the two null values `Nothing` and `Just mempty`. The distinction of these wasn't used anywhere. We drop GHC 7.4 in favor of using LambdaCase.
1 parent e7398f4 commit 6d66351

File tree

5 files changed

+99
-50
lines changed

5 files changed

+99
-50
lines changed

.github/workflows/haskell-ci.yml

-5
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,6 @@ jobs:
9292
compilerVersion: 7.6.3
9393
setup-method: hvr-ppa
9494
allow-failure: false
95-
- compiler: ghc-7.4.2
96-
compilerKind: ghc
97-
compilerVersion: 7.4.2
98-
setup-method: hvr-ppa
99-
allow-failure: false
10095
fail-fast: false
10196
steps:
10297
- name: apt

lib/Text/Regex/TDFA/Pattern.hs

+62-21
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,31 @@
55
-- the parsed form of a Regular Expression.
66

77
module Text.Regex.TDFA.Pattern
8-
(Pattern(..)
9-
,PatternSet(..)
10-
,PatternSetCharacterClass(..)
11-
,PatternSetCollatingElement(..)
12-
,PatternSetEquivalenceClass(..)
13-
,GroupIndex
14-
,DoPa(..)
15-
,showPattern
16-
-- ** Internal use
17-
,starTrans
18-
-- ** Internal use, Operations to support debugging under ghci
19-
,starTrans',simplify',dfsPattern
8+
( Pattern(..)
9+
, PatternSet(..)
10+
, patternSetChars
11+
, patternSetCharacterClasses
12+
, patternSetCollatingElements
13+
, patternSetEquivalenceClasses
14+
, PatternSetCharacterClass(..)
15+
, PatternSetCollatingElement(..)
16+
, PatternSetEquivalenceClass(..)
17+
, GroupIndex
18+
, DoPa(..)
19+
, showPattern
20+
-- ** Internal use
21+
, starTrans
22+
-- ** Internal use, Operations to support debugging under ghci
23+
, starTrans', simplify', dfsPattern
2024
) where
2125

2226
{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
2327

2428
import Data.List(intersperse,partition)
2529
import qualified Data.Set as Set(toAscList,toList)
2630
import Data.Set(Set) -- XXX EnumSet
31+
32+
import Utils
2733
import Text.Regex.TDFA.Common(DoPa(..),GroupIndex,common_error)
2834

2935
err :: String -> a
@@ -92,19 +98,54 @@ showPattern pIn =
9298
-}
9399
paren s = ('(':s)++")"
94100

95-
data PatternSet = PatternSet (Maybe (Set Char))
96-
(Maybe (Set PatternSetCharacterClass))
97-
(Maybe (Set PatternSetCollatingElement))
98-
(Maybe (Set PatternSetEquivalenceClass))
99-
deriving (Eq)
101+
-- | Processed content of a bracket expression.
102+
data PatternSet = PatternSet
103+
{ _patternSetChars :: Set Char
104+
-- ^ Characters included in the pattern.
105+
, _patternSetCharacterClasses :: Set PatternSetCharacterClass
106+
-- ^ POSIX character classes included in the pattern.
107+
, _patternSetCollatingElements :: Set PatternSetCollatingElement
108+
-- ^ Collating elements included in the pattern.
109+
, _patternSetEquivalenceClasses :: Set PatternSetEquivalenceClass
110+
-- ^ Equivalence classes included in the pattern.
111+
}
112+
deriving (Eq)
113+
114+
instance Semigroup PatternSet where
115+
PatternSet a b c d <> PatternSet a' b' c' d' =
116+
PatternSet (a <> a') (b <> b') (c <> c') (d <> d')
117+
118+
instance Monoid PatternSet where
119+
mempty = PatternSet mempty mempty mempty mempty
120+
mappend = (<>)
121+
122+
-- | Lens for '_patternSetChars'.
123+
patternSetChars :: Lens' PatternSet (Set Char)
124+
patternSetChars f ps =
125+
f (_patternSetChars ps) <&> \ i -> ps{ _patternSetChars = i }
126+
127+
-- | Lens for '_patternSetCharacterClasses'.
128+
patternSetCharacterClasses :: Lens' PatternSet (Set PatternSetCharacterClass)
129+
patternSetCharacterClasses f ps =
130+
f (_patternSetCharacterClasses ps) <&> \ i -> ps{ _patternSetCharacterClasses = i }
131+
132+
-- | Lens for '_patternSetCollatingElements'.
133+
patternSetCollatingElements :: Lens' PatternSet (Set PatternSetCollatingElement)
134+
patternSetCollatingElements f ps =
135+
f (_patternSetCollatingElements ps) <&> \ i -> ps{ _patternSetCollatingElements = i }
136+
137+
-- | Lens for '_patternSetEquivalenceClasses'.
138+
patternSetEquivalenceClasses :: Lens' PatternSet (Set PatternSetEquivalenceClass)
139+
patternSetEquivalenceClasses f ps =
140+
f (_patternSetEquivalenceClasses ps) <&> \ i -> ps{ _patternSetEquivalenceClasses = i }
100141

101142
instance Show PatternSet where
102143
showsPrec i (PatternSet s scc sce sec) =
103-
let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
144+
let (special,normal) = partition (`elem` "]-") $ Set.toAscList s
104145
charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
105-
scc' = maybe "" ((concatMap show) . Set.toList) scc
106-
sce' = maybe "" ((concatMap show) . Set.toList) sce
107-
sec' = maybe "" ((concatMap show) . Set.toList) sec
146+
scc' = concatMap show $ Set.toList scc
147+
sce' = concatMap show $ Set.toList sce
148+
sec' = concatMap show $ Set.toList sec
108149
in shows charSpec
109150
. showsPrec i scc' . showsPrec i sce' . showsPrec i sec'
110151
. if '-' `elem` special then showChar '-' else id

lib/Text/Regex/TDFA/ReadRegex.hs

+28-16
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ import Text.ParserCombinators.Parsec((<|>), (<?>),
1414
try, runParser, many, getState, setState, CharParser, ParseError,
1515
sepBy1, option, notFollowedBy, many1, lookAhead, eof, between,
1616
string, noneOf, digit, char, anyChar)
17+
import Utils
1718

1819
import Control.Monad (liftM, guard)
1920

20-
import Data.Foldable (asum)
21-
import qualified Data.Set as Set(fromList)
21+
import Data.Foldable (asum, foldl')
22+
import qualified Data.Set as Set
2223

2324
-- | An element inside @[...]@, denoting a character class.
2425
data BracketElement
@@ -132,20 +133,31 @@ p_bracket :: P Pattern
132133
p_bracket = (char '[') >> ( (char '^' >> p_set True) <|> (p_set False) )
133134

134135
p_set :: Bool -> P Pattern
135-
p_set invert = do initial <- option "" (char ']' >> return "]")
136-
values <- if null initial then many1 p_set_elem else many p_set_elem
137-
_ <- char ']'
138-
ci <- char_index
139-
let chars = maybe'set $ concat $
140-
initial :
141-
[ c | BEChar c <- values ] :
142-
[ [start..end] | BERange start end <- values ]
143-
colls = maybe'set [PatternSetCollatingElement coll | BEColl coll <- values ]
144-
equivs = maybe'set [PatternSetEquivalenceClass equiv | BEEquiv equiv <- values]
145-
class's = maybe'set [PatternSetCharacterClass a'class | BEClass a'class <- values]
146-
maybe'set x = if null x then Nothing else Just (Set.fromList x)
147-
sets = PatternSet chars class's colls equivs
148-
sets `seq` return $ if invert then PAnyNot ci sets else PAny ci sets
136+
p_set invert = do
137+
-- A ] as first character after the opening [ is treated as alternative ']'
138+
-- rather than the closing bracket.
139+
initial <- option mempty $ Set.singleton <$> char ']'
140+
-- Parse remaining content of bracket expression.
141+
values <- if Set.null initial then many1 p_set_elem else many p_set_elem
142+
_ <- char ']'
143+
ci <- char_index
144+
-- Process the content of bracket expression into a PatternSet.
145+
let !sets = foldl' (flip addBracketElement) (mempty{ _patternSetChars = initial }) values
146+
return $ if invert then PAnyNot ci sets else PAny ci sets
147+
148+
addBracketElement :: BracketElement -> PatternSet -> PatternSet
149+
addBracketElement = \case
150+
BEChar c ->
151+
over patternSetChars $ Set.insert c
152+
BERange start end ->
153+
over patternSetChars $ (`Set.union` Set.fromDistinctAscList [start..end])
154+
-- Set.union is left-biased, [start..end] is considered the smaller set
155+
BEClass s ->
156+
over patternSetCharacterClasses $ Set.insert $ PatternSetCharacterClass s
157+
BEColl s ->
158+
over patternSetCollatingElements $ Set.insert $ PatternSetCollatingElement s
159+
BEEquiv s ->
160+
over patternSetEquivalenceClasses $ Set.insert $ PatternSetEquivalenceClass s
149161

150162
-- From here down the code is the parser and functions for pattern [ ] set things
151163

lib/Text/Regex/TDFA/TNFA.hs

+6-6
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ import Data.IntSet.EnumSet2(EnumSet)
5151
import qualified Data.IntSet.EnumSet2 as Set(singleton,toList,insert)
5252
import Data.Maybe(catMaybes,isNothing)
5353
import Data.Monoid as Mon(Monoid(..))
54-
import qualified Data.Set as S(Set,insert,toAscList,empty)
54+
import qualified Data.Set as S
5555

5656
import Text.Regex.TDFA.Common(QT(..),QNFA(..),QTrans,TagTask(..),TagUpdate(..),DoPa(..)
5757
,CompOption(..)
@@ -793,11 +793,11 @@ ADD ORPHAN ID check and make this a fatal error while testing
793793
-- | @decodePatternSet@ cannot handle collating element and treats
794794
-- equivalence classes as just their definition and nothing more.
795795
decodePatternSet :: PatternSet -> S.Set Char
796-
decodePatternSet (PatternSet msc mscc _ msec) =
797-
let baseMSC = maybe S.empty id msc
798-
withMSCC = foldl (flip S.insert) baseMSC (maybe [] (concatMap decodeCharacterClass . S.toAscList) mscc)
799-
withMSEC = foldl (flip S.insert) withMSCC (maybe [] (concatMap unSEC . S.toAscList) msec)
800-
in withMSEC
796+
decodePatternSet (PatternSet chars ccs _ eqcs) = S.unions
797+
[ chars
798+
, foldMap (S.fromList . decodeCharacterClass) ccs
799+
, foldMap (S.fromList . unSEC) eqcs
800+
]
801801

802802
-- | This returns the strictly ascending list of characters
803803
-- represented by @[: :]@ POSIX character classes.

regex-tdfa.cabal

+3-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ tested-with:
3737
GHC == 7.10.3
3838
GHC == 7.8.4
3939
GHC == 7.6.3
40-
GHC == 7.4.2
4140

4241
source-repository head
4342
type: git
@@ -99,7 +98,8 @@ library
9998
build-depends: fail == 4.9.*
10099
, semigroups == 0.18.* || == 0.19.*
101100
build-depends: array >= 0.4 && < 0.6
102-
, base >= 4.5 && < 5
101+
, base >= 4.6 && < 5
102+
-- GHC 7.6 required for LambdaCase
103103
, bytestring >= 0.9.2 && < 0.12
104104
, containers >= 0.4.2 && < 0.7
105105
, mtl >= 2.1.3 && < 2.4
@@ -114,6 +114,7 @@ library
114114
FlexibleInstances
115115
ForeignFunctionInterface
116116
FunctionalDependencies
117+
LambdaCase
117118
MagicHash
118119
MultiParamTypeClasses
119120
NondecreasingIndentation

0 commit comments

Comments
 (0)