{-# LANGUAGE UndecidableInstances #-} module Shrun.Configuration.Data.CommandLogging.ReadSize ( ReadSize (..), parseReadSize, ) where import Shrun.Configuration.Default (Default (def)) import Shrun.Prelude import Shrun.Utils qualified as U -- | Read size for command logs. newtype ReadSize = MkReadSize {ReadSize -> Bytes 'B Natural unReadSize :: Bytes B Natural} deriving stock (ReadSize -> ReadSize -> Bool (ReadSize -> ReadSize -> Bool) -> (ReadSize -> ReadSize -> Bool) -> Eq ReadSize forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a $c== :: ReadSize -> ReadSize -> Bool == :: ReadSize -> ReadSize -> Bool $c/= :: ReadSize -> ReadSize -> Bool /= :: ReadSize -> ReadSize -> Bool Eq, Int -> ReadSize -> ShowS [ReadSize] -> ShowS ReadSize -> String (Int -> ReadSize -> ShowS) -> (ReadSize -> String) -> ([ReadSize] -> ShowS) -> Show ReadSize forall a. (Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a $cshowsPrec :: Int -> ReadSize -> ShowS showsPrec :: Int -> ReadSize -> ShowS $cshow :: ReadSize -> String show :: ReadSize -> String $cshowList :: [ReadSize] -> ShowS showList :: [ReadSize] -> ShowS Show) instance ( k ~ An_Iso, a ~ Bytes B Natural, b ~ Bytes B Natural ) => LabelOptic "unReadSize" k ReadSize ReadSize a b where labelOptic :: Optic k NoIx ReadSize ReadSize a b labelOptic = (ReadSize -> a) -> (b -> ReadSize) -> Iso ReadSize ReadSize a b forall s a b t. (s -> a) -> (b -> t) -> Iso s t a b iso (\(MkReadSize Bytes 'B Natural x) -> a Bytes 'B Natural x) b -> ReadSize Bytes 'B Natural -> ReadSize MkReadSize instance Default ReadSize where -- NOTE: [Command log splitting] -- -- (Streamed) command logs can be split based on the values of poll-interval -- and read-size. For instance, suppose we are running a command that -- prints a total of 3 logs, at a rate of 1 log per second: -- -- "foo" -- "bar" -- "baz" -- -- Further, suppose that poll-interval is the usual 10,000 (1/100 of a -- second), and that read-size is 5 b. The timeline for what happens looks -- like (HH-MM-SS): -- -- [00:00:00.00] cmd prints "foo" -- process buffer has "foo" -- [00:00:00.01] shrun reads and prints "foo" -- [00:00:01.00] cmd prints "bar" -- process buffer has "bar" -- [00:00:01.01] shrun reads and prints "bar" -- [00:00:02.00] cmd prints "baz" -- process buffer has "baz" -- [00:00:02.01] shrun reads and prints "baz" -- -- Now, suppose that poll-interval is 5 seconds. The breakdown is: -- -- [00:00:00.00] cmd prints "foo" -- process buffer has "foo" -- [00:00:01.00] cmd prints "bar" -- process buffer has "foobar" -- [00:00:02.00] cmd prints "baz" -- process buffer has "foobarbaz" -- [00:00:05.00] shrun reads and prints "fooba" -- [00:00:10.00] shrun reads and prints "rbaz" -- -- That is, because cmd was printing logs faster than shrun was reading them, -- the buffer was able to grow larger than the read-size, so logs were cut -- off. How can we avoid this? Several options: -- -- 1. Make poll-interval faster so that we are less likely to accumulate -- multiple logs and thus break the read-size. The downside here is that -- the faster we make poll-interval, the higher the CPU usage. The current -- 1/100 a second (poll-interval := 10,000) was chosen since the CPU usage --- is low, and any faster starts to get high. -- -- 2. Increase the read-size. Experimentally, building GHC with a '1 kb' -- limit still sees logs get split. '16 kb', on the other hand, didn't -- show any log splitting (AFAICT; the log file is massive and not -- exhaustively checked). I tried with a limit of '1 mb' and that also -- seemed fine, though probably unnecessary. I really don't have any -- intuition for what a "good value" should be, other than -- trial-and-error. -- -- 3. Implement a more complicated "read line" scheme. There are two ways -- we could try this: -- -- i. Use the usual hGetNonBlocking, but only take up to the first new -- line. Save any remaining data and combine with the next read. -- I attempted this once but abandoned it after the logic became -- tricky. But maybe this is worth revisiting. -- -- ii. Implement our own primitive hGetLineNonBlocking e.g. read all -- bytes up to the first newline, but do not block if we don't have -- any data. I have not tried this as these low-level details do not -- excite me, but perhaps this is worth investigating. -- -- For now we choose option 2, increase the read-size as it is simple and -- appears to work well. We may choose to increase this in the future. -- -- UPDATE: There is another way logs can be split. In some cases, a command -- may output a "partial" line, with the expectation to be completed later. -- For instance, our test framework outputs lines like: -- -- Some test desc: OK (0.05s) -- -- But the line is outputted in two steps. First, the text description is -- printed. Second, the OK (0.05s) happens _after_ the test completes. -- If the time elapsed is long enough to outstrip the poll-interval, then -- this log will be broken. Increasing the poll-interval is an option, but -- that has other problems. And increasing the read-size will not help here. -- -- The only real solution to this is to implement the more complicated -- "read line" scheme described above. Of course this comes with its own -- complications and trade-offs. def :: ReadSize def = Bytes 'B Natural -> ReadSize MkReadSize (Bytes 'B Natural -> ReadSize) -> Bytes 'B Natural -> ReadSize forall a b. (a -> b) -> a -> b $ Natural -> Bytes 'B Natural forall (s :: Size) n. n -> Bytes s n MkBytes Natural 16_000 instance DecodeTOML ReadSize where tomlDecoder :: Decoder ReadSize tomlDecoder = Decoder Text -> Decoder ReadSize forall (m :: Type -> Type). MonadFail m => m Text -> m ReadSize parseReadSize Decoder Text forall a. DecodeTOML a => Decoder a tomlDecoder parseReadSize :: (MonadFail m) => m Text -> m ReadSize parseReadSize :: forall (m :: Type -> Type). MonadFail m => m Text -> m ReadSize parseReadSize m Text getTxt = do Text byteTxt <- m Text getTxt case Text -> Either Text (Bytes 'B Natural) U.parseByteText Text byteTxt of Right Bytes 'B Natural b -> ReadSize -> m ReadSize forall a. a -> m a forall (f :: Type -> Type) a. Applicative f => a -> f a pure (ReadSize -> m ReadSize) -> ReadSize -> m ReadSize forall a b. (a -> b) -> a -> b $ Bytes 'B Natural -> ReadSize MkReadSize Bytes 'B Natural b Left Text err -> String -> m ReadSize forall a. String -> m a forall (m :: Type -> Type) a. MonadFail m => String -> m a fail (String -> m ReadSize) -> String -> m ReadSize forall a b. (a -> b) -> a -> b $ String "Could not parse --command-log-read-size size: " String -> ShowS forall a. Semigroup a => a -> a -> a <> Text -> String unpack Text err