{-# LANGUAGE UndecidableInstances #-}

module Shrun.Configuration.Data.CommandLogging.ReadSize
  ( ReadSize (..),
    parseReadSize,
  )
where

import Shrun.Configuration.Default (Default (def))
import Shrun.Prelude
import Shrun.Utils qualified as U

-- | Read size for command logs.
newtype ReadSize = MkReadSize {ReadSize -> Bytes 'B Natural
unReadSize :: Bytes B Natural}
  deriving stock (ReadSize -> ReadSize -> Bool
(ReadSize -> ReadSize -> Bool)
-> (ReadSize -> ReadSize -> Bool) -> Eq ReadSize
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: ReadSize -> ReadSize -> Bool
== :: ReadSize -> ReadSize -> Bool
$c/= :: ReadSize -> ReadSize -> Bool
/= :: ReadSize -> ReadSize -> Bool
Eq, Int -> ReadSize -> ShowS
[ReadSize] -> ShowS
ReadSize -> String
(Int -> ReadSize -> ShowS)
-> (ReadSize -> String) -> ([ReadSize] -> ShowS) -> Show ReadSize
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> ReadSize -> ShowS
showsPrec :: Int -> ReadSize -> ShowS
$cshow :: ReadSize -> String
show :: ReadSize -> String
$cshowList :: [ReadSize] -> ShowS
showList :: [ReadSize] -> ShowS
Show)

instance
  ( k ~ An_Iso,
    a ~ Bytes B Natural,
    b ~ Bytes B Natural
  ) =>
  LabelOptic
    "unReadSize"
    k
    ReadSize
    ReadSize
    a
    b
  where
  labelOptic :: Optic k NoIx ReadSize ReadSize a b
labelOptic = (ReadSize -> a) -> (b -> ReadSize) -> Iso ReadSize ReadSize a b
forall s a b t. (s -> a) -> (b -> t) -> Iso s t a b
iso (\(MkReadSize Bytes 'B Natural
x) -> a
Bytes 'B Natural
x) b -> ReadSize
Bytes 'B Natural -> ReadSize
MkReadSize

instance Default ReadSize where
  -- NOTE: [Command log splitting]
  --
  -- (Streamed) command logs can be split based on the values of poll-interval
  -- and read-size. For instance, suppose we are running a command that
  -- prints a total of 3 logs, at a rate of 1 log per second:
  --
  -- "foo"
  -- "bar"
  -- "baz"
  --
  -- Further, suppose that poll-interval is the usual 10,000 (1/100 of a
  -- second), and that read-size is 5 b. The timeline for what happens looks
  -- like (HH-MM-SS):
  --
  -- [00:00:00.00] cmd prints "foo" -- process buffer has "foo"
  -- [00:00:00.01] shrun reads and prints "foo"
  -- [00:00:01.00] cmd prints "bar" -- process buffer has "bar"
  -- [00:00:01.01] shrun reads and prints "bar"
  -- [00:00:02.00] cmd prints "baz" -- process buffer has "baz"
  -- [00:00:02.01] shrun reads and prints "baz"
  --
  -- Now, suppose that poll-interval is 5 seconds. The breakdown is:
  --
  -- [00:00:00.00] cmd prints "foo" -- process buffer has "foo"
  -- [00:00:01.00] cmd prints "bar" -- process buffer has "foobar"
  -- [00:00:02.00] cmd prints "baz" -- process buffer has "foobarbaz"
  -- [00:00:05.00] shrun reads and prints "fooba"
  -- [00:00:10.00] shrun reads and prints "rbaz"
  --
  -- That is, because cmd was printing logs faster than shrun was reading them,
  -- the buffer was able to grow larger than the read-size, so logs were cut
  -- off. How can we avoid this? Several options:
  --
  -- 1. Make poll-interval faster so that we are less likely to accumulate
  --    multiple logs and thus break the read-size. The downside here is that
  --    the faster we make poll-interval, the higher the CPU usage. The current
  --    1/100 a second (poll-interval := 10,000) was chosen since the CPU usage
  ---   is low, and any faster starts to get high.
  --
  -- 2. Increase the read-size. Experimentally, building GHC with a '1 kb'
  --    limit still sees logs get split. '16 kb', on the other hand, didn't
  --    show any log splitting (AFAICT; the log file is massive and not
  --    exhaustively checked). I tried with a limit of '1 mb' and that also
  --    seemed fine, though probably unnecessary. I really don't have any
  --    intuition for what a "good value" should be, other than
  --    trial-and-error.
  --
  -- 3. Implement a more complicated "read line" scheme. There are two ways
  --    we could try this:
  --
  --      i. Use the usual hGetNonBlocking, but only take up to the first new
  --         line. Save any remaining data and combine with the next read.
  --         I attempted this once but abandoned it after the logic became
  --         tricky. But maybe this is worth revisiting.
  --
  --      ii. Implement our own primitive hGetLineNonBlocking e.g. read all
  --          bytes up to the first newline, but do not block if we don't have
  --          any data. I have not tried this as these low-level details do not
  --          excite me, but perhaps this is worth investigating.
  --
  -- For now we choose option 2, increase the read-size as it is simple and
  -- appears to work well. We may choose to increase this in the future.
  --
  -- UPDATE: There is another way logs can be split. In some cases, a command
  -- may output a "partial" line, with the expectation to be completed later.
  -- For instance, our test framework outputs lines like:
  --
  --    Some test desc:               OK (0.05s)
  --
  -- But the line is outputted in two steps. First, the text description is
  -- printed. Second, the OK (0.05s) happens _after_ the test completes.
  -- If the time elapsed is long enough to outstrip the poll-interval, then
  -- this log will be broken. Increasing the poll-interval is an option, but
  -- that has other problems. And increasing the read-size will not help here.
  --
  -- The only real solution to this is to implement the more complicated
  -- "read line" scheme described above. Of course this comes with its own
  -- complications and trade-offs.
  def :: ReadSize
def = Bytes 'B Natural -> ReadSize
MkReadSize (Bytes 'B Natural -> ReadSize) -> Bytes 'B Natural -> ReadSize
forall a b. (a -> b) -> a -> b
$ Natural -> Bytes 'B Natural
forall (s :: Size) n. n -> Bytes s n
MkBytes Natural
16_000

instance DecodeTOML ReadSize where
  tomlDecoder :: Decoder ReadSize
tomlDecoder = Decoder Text -> Decoder ReadSize
forall (m :: Type -> Type). MonadFail m => m Text -> m ReadSize
parseReadSize Decoder Text
forall a. DecodeTOML a => Decoder a
tomlDecoder

parseReadSize :: (MonadFail m) => m Text -> m ReadSize
parseReadSize :: forall (m :: Type -> Type). MonadFail m => m Text -> m ReadSize
parseReadSize m Text
getTxt = do
  Text
byteTxt <- m Text
getTxt
  case Text -> Either Text (Bytes 'B Natural)
U.parseByteText Text
byteTxt of
    Right Bytes 'B Natural
b -> ReadSize -> m ReadSize
forall a. a -> m a
forall (f :: Type -> Type) a. Applicative f => a -> f a
pure (ReadSize -> m ReadSize) -> ReadSize -> m ReadSize
forall a b. (a -> b) -> a -> b
$ Bytes 'B Natural -> ReadSize
MkReadSize Bytes 'B Natural
b
    Left Text
err -> String -> m ReadSize
forall a. String -> m a
forall (m :: Type -> Type) a. MonadFail m => String -> m a
fail (String -> m ReadSize) -> String -> m ReadSize
forall a b. (a -> b) -> a -> b
$ String
"Could not parse --command-log-read-size size: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
unpack Text
err