Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 46 additions & 5 deletions src/App.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ import Universum hiding (force)
import System.Environment (setEnv, lookupEnv, getEnvironment)
import System.Process (CreateProcess (..), StdStream (CreatePipe, UseHandle), proc, waitForProcess, createPipe, readCreateProcess, withCreateProcess)
import System.IO
( openBinaryFile, hSetBuffering, BufferMode(..), hFlush )
( openBinaryFile, hSetBuffering, BufferMode(..), hFlush, openTempFile )
import qualified System.FilePath as FilePath
import System.FilePath ((</>))
import System.Directory ( createDirectoryIfMissing, doesFileExist, getCurrentDirectory, createDirectory )
import System.Directory ( createDirectoryIfMissing, doesFileExist, getCurrentDirectory, createDirectory, removeFile )
import qualified Data.ByteString.Char8 as B8
import Control.Concurrent.Async (async, wait, cancel)
import Control.Exception.Base (handle)
Expand Down Expand Up @@ -44,6 +44,7 @@ import Control.Monad.EarlyReturn (withEarlyReturn, earlyReturn)
import Data.Time.Format.ISO8601 (iso8601Show)
import Data.Time.Clock (getCurrentTime)
import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Trace

getSettings :: IO Settings
getSettings = do
Expand Down Expand Up @@ -78,14 +79,21 @@ getSettings = do
, force = False
, quietMode
, githubTokenRefreshThresholdSeconds
, trace = False
, traceFiles = False
}

main :: IO ()
main = do
(args :: CliArgs) <- getCliArgs
settings' <- getSettings
let f = args.force
let settings = (settings' :: Settings) { force = f }
-- Only trace when explicitly requested via CLI, not when inherited via env.
-- The parent's fsatrace already traces the entire process tree via LD_PRELOAD.
let traceExplicit = args.trace || args.traceFiles
let settings = (settings' :: Settings) { force = f, trace = traceExplicit, traceFiles = args.traceFiles }

when traceExplicit Trace.checkFsatrace

let jobName = fromMaybe (FilePath.takeFileName args.cmd) args.name

Expand Down Expand Up @@ -127,6 +135,17 @@ main = do
responsePipeReadFd <- handleToFd responsePipeRead
hSetBuffering responsePipeWrite LineBuffering

m_traceFile <- if settings.trace then do
(fp, h) <- openTempFile settings.stateDirectory "trace.log"
hClose h
pure (Just fp)
else
pure Nothing

let (actualCmd, actualArgs) = case m_traceFile of
Just traceFile -> Trace.wrapWithFsatrace traceFile args.cmd args.args
Nothing -> (args.cmd, args.args)

-- Recursive: AppState is used before process is started (mostly for logging)
rec

Expand All @@ -146,13 +165,15 @@ main = do
-- TODO: should we use delegate_ctlc or DIY? See https://hackage.haskell.org/package/process-1.6.20.0/docs/System-Process.html#g:4
-- -> We should DIY because we need to flush stream etc.
(Nothing, Just stdoutPipe, Just stderrPipe, processHandle) <- Process.createProcess
(proc args.cmd args.args) { std_in = UseHandle devnull, std_out = CreatePipe
(proc actualCmd actualArgs) { std_in = UseHandle devnull, std_out = CreatePipe
, std_err = CreatePipe
, env=Just $ nubOrdOn fst $
[ ("BASH_FUNC_snapshot%%", "() {\n" <> $(embedStringFile "src/snapshot.sh") <> "\n}")
, ("_taskrunner_request_pipe", show requestPipeWriteFd)
, ("_taskrunner_response_pipe", show responsePipeReadFd)
] <> parentEnv
]
<> (if settings.trace then [("_taskrunner_trace", "1")] else [])
<> parentEnv
}

logDebug appState $ "Running command: " <> show (args.cmd : args.args)
Expand Down Expand Up @@ -183,6 +204,26 @@ main = do
logDebug appState $ "Command " <> show (args.cmd : args.args) <> " exited with code " <> show exitCode
logDebugParent m_parentRequestPipe $ "Subtask " <> toText jobName <> " finished with " <> show exitCode

whenJust m_traceFile \traceFile -> do
traceExists <- doesFileExist traceFile
if traceExists then do
traceContent <- Text.readFile traceFile
let entries = Trace.parseTraceOutput traceContent
let filtered = Trace.filterTraceEntries settings.rootDirectory entries
let format = if settings.traceFiles then Trace.formatFileReport else Trace.formatDirectoryReport
let report = format settings.rootDirectory filtered
Text.hPutStr toplevelStderr report

m_snapshotArgs' <- readIORef appState.snapshotArgsRef
whenJust m_snapshotArgs' \snapshotArgs -> do
let discrepancies = Trace.findDiscrepancies settings.rootDirectory cwd snapshotArgs.fileInputs filtered
unless (null discrepancies) do
Text.hPutStr toplevelStderr (Trace.formatDiscrepancies discrepancies)

removeFile traceFile
else
logWarn appState "Trace file not found after execution; fsatrace may have failed to start."

m_hashToSave <- readIORef appState.hashToSaveRef

when (skipped && isNothing m_hashToSave && appState.isToplevel) do
Expand Down
8 changes: 8 additions & 0 deletions src/CliArgs.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import Options.Applicative
data CliArgs = CliArgs
{ name :: Maybe String -- Optional name argument
, force :: Bool -- Skip cache
, trace :: Bool -- Trace file system access (requires fsatrace)
, traceFiles :: Bool -- Show individual files in trace (instead of directory summary)
, cmd :: String -- The command to run
, args :: [String] -- List of arguments for the command
} deriving (Show)
Expand All @@ -21,6 +23,12 @@ commandParser = CliArgs
( long "force"
<> short 'f'
<> help "Skip cache and fuzzy cache" )
<*> switch
( long "trace"
<> help "Trace file system access during task execution (requires fsatrace)" )
<*> switch
( long "trace-files"
<> help "Like --trace but show individual files instead of directory summary" )
<*> argument str
( metavar "CMD"
<> help "The command to run" )
Expand Down
180 changes: 180 additions & 0 deletions src/Trace.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
-- | File system tracing via fsatrace.
-- The approach of using fsatrace (LD_PRELOAD-based file system call interception)
-- to automatically discover file dependencies is taken from Rattle
-- (https://github.com/ndmitchell/rattle), a build system that uses it to
-- automatically track which files are read/written by commands rather than
-- requiring manual dependency declarations.
module Trace
( checkFsatrace
, wrapWithFsatrace
, parseTraceOutput
, TraceEntry(..)
, TraceOp(..)
, filterTraceEntries
, formatFileReport
, formatDirectoryReport
, findDiscrepancies
, formatDiscrepancies
) where

import Universum

import qualified Data.Text as Text
import qualified Data.Map.Strict as Map
import System.Directory (findExecutable)
import System.FilePath (makeRelative, isRelative, (</>), takeFileName)
import Data.List (nub)
import Utils (bail)

data TraceOp = TraceRead | TraceWrite | TraceMove | TraceDelete | TraceTouch | TraceQuery
deriving (Show, Eq, Ord)

data TraceEntry = TraceEntry
{ op :: TraceOp
, path :: FilePath
} deriving (Show, Eq, Ord)

checkFsatrace :: IO ()
checkFsatrace = do
m <- findExecutable "fsatrace"
when (isNothing m) $
bail "fsatrace is not installed. Install it from https://github.com/jacereda/fsatrace and ensure it is on your PATH."

wrapWithFsatrace :: FilePath -> String -> [String] -> (String, [String])
wrapWithFsatrace traceFile cmd args =
("fsatrace", ["rwmd", traceFile, "--", cmd] ++ args)

parseTraceOutput :: Text -> [TraceEntry]
parseTraceOutput content =
mapMaybe parseLine (Text.lines content)
where
parseLine line =
case Text.uncons line of
Just (opChar, rest) | Text.isPrefixOf "|" rest ->
case charToOp opChar of
Just op -> Just TraceEntry { op, path = toString (Text.drop 1 rest) }
Nothing -> Nothing
_ -> Nothing

charToOp 'r' = Just TraceRead
charToOp 'w' = Just TraceWrite
charToOp 'm' = Just TraceMove
charToOp 'd' = Just TraceDelete
charToOp 't' = Just TraceTouch
charToOp 'q' = Just TraceQuery
charToOp _ = Nothing

filterTraceEntries :: FilePath -> [TraceEntry] -> [TraceEntry]
filterTraceEntries rootDir entries =
nub $ filter isProjectFile entries
where
allSystemPrefixes = ["/usr", "/lib", "/lib64", "/etc", "/proc", "/dev", "/sys", "/tmp", "/nix", "/var"]
-- Don't exclude system prefixes that are ancestors of the root directory
systemPrefixes = filter (\sp -> not (sp `isPrefixOf` rootDir)) allSystemPrefixes

-- Paths within the project that should be excluded (not meaningful inputs)
excludedRelPrefixes = [".git/", ".taskrunner/"]
excludedExact = [".git", ".gitignore"]
excludedFileNames = [".gitignore"]

isProjectFile entry =
let p = entry.path
rel = makeRelative rootDir p
in rootDir `isPrefixOf` p
&& isRelative rel
&& not (any (`isPrefixOf` p) systemPrefixes)
&& not (any (`isPrefixOf` rel) excludedRelPrefixes)
&& rel `notElem` excludedExact
&& takeFileName rel `notElem` excludedFileNames

-- | Format trace report showing individual files (--trace-files)
formatFileReport :: FilePath -> [TraceEntry] -> Text
formatFileReport rootDir entries =
let reads_ = nub $ sort [makeRelative rootDir e.path | e <- entries, e.op == TraceRead]
writes = nub $ sort [makeRelative rootDir e.path | e <- entries, e.op == TraceWrite]

section :: Text -> [FilePath] -> Text
section _ [] = ""
section title paths = title <> "\n" <> Text.unlines (map (\p -> " " <> toText p) paths)

in "\n=== File System Trace Report ===\n\n"
<> section "Files read:" reads_
<> (if not (null reads_) && not (null writes) then "\n" else "")
<> section "Files written:" writes

-- | Format trace report showing directory-level summary (--trace, default)
formatDirectoryReport :: FilePath -> [TraceEntry] -> Text
formatDirectoryReport rootDir entries =
let reads_ = nub [makeRelative rootDir e.path | e <- entries, e.op == TraceRead]
writes = nub [makeRelative rootDir e.path | e <- entries, e.op == TraceWrite]

dirSummary :: [FilePath] -> [(FilePath, Int)]
dirSummary = sortOn fst . Map.toList . foldl' countDir Map.empty
where
countDir acc fp =
let dir = topLevelDir fp
in Map.insertWith (+) dir (1 :: Int) acc

topLevelDir :: FilePath -> FilePath
topLevelDir fp = case break (== '/') fp of
(_, '/':_) -> takeWhile (/= '/') fp <> "/"
_ -> fp -- file at root level, show as-is

section :: Text -> [(FilePath, Int)] -> Text
section _ [] = ""
section title dirs = title <> "\n" <> Text.unlines
(map (\(d, n) -> " " <> toText d <> " (" <> show n <> " files)") dirs)

in "\n=== File System Trace Report ===\n\n"
<> section "Directories read:" (dirSummary reads_)
<> (if not (null reads_) && not (null writes) then "\n" else "")
<> section "Directories written:" (dirSummary writes)

-- | Resolve snapshot input pathspecs to directories relative to rootDirectory.
-- Pathspecs: "." = cwd, ":/path" = from root, "relative" = relative to cwd
resolveInputPaths :: FilePath -> FilePath -> [FilePath] -> [FilePath]
resolveInputPaths rootDir cwd = map resolve
where
cwdRel = makeRelative rootDir cwd

resolve (':':'/':rest) = rest -- ":/libs/ps" -> "libs/ps"
resolve "." = cwdRel -- "." -> cwd relative to root
resolve p
| "/" `isPrefixOf` p = makeRelative rootDir p -- absolute path
| otherwise = cwdRel </> p -- relative to cwd

-- | Check if a file path is covered by any of the resolved input directories.
isCoveredBy :: FilePath -> [FilePath] -> Bool
isCoveredBy file inputs = any covers inputs
where
covers "." = True -- "." means repo root, covers everything
covers input
| input == file = True
| otherwise = (input <> "/") `isPrefixOf` file

-- | Find files that were read but not covered by declared snapshot inputs.
findDiscrepancies :: FilePath -> FilePath -> [FilePath] -> [TraceEntry] -> [FilePath]
findDiscrepancies rootDir cwd snapshotInputs entries =
let resolvedInputs = resolveInputPaths rootDir cwd snapshotInputs
reads_ = nub $ sort [makeRelative rootDir e.path | e <- entries, e.op == TraceRead]
-- Exclude the scripts themselves and taskrunner internals
excludePrefixes = [".taskrunner/", "scripts/"]
in filter (\f -> not (isCoveredBy f resolvedInputs)
&& not (any (`isPrefixOf` f) excludePrefixes))
reads_

-- | Format discrepancy warnings
formatDiscrepancies :: [FilePath] -> Text
formatDiscrepancies files =
let dirSummary = sortOn fst . Map.toList . foldl' countDir Map.empty $ files
where
countDir acc fp =
let dir = case break (== '/') fp of
(_, '/':_) -> takeWhile (/= '/') fp <> "/"
_ -> fp
in Map.insertWith (+) dir (1 :: Int) acc

in "\n=== Snapshot Discrepancies ===\n"
<> "Files read but NOT covered by snapshot inputs:\n"
<> Text.unlines (map (\(d, n) -> " " <> toText d <> " (" <> show n <> " files)") dirSummary)
<> "\n" <> Text.unlines (map (\f -> " " <> toText f) files)
2 changes: 2 additions & 0 deletions src/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ data Settings = Settings
, force :: Bool
, quietMode :: Bool
, githubTokenRefreshThresholdSeconds :: Int
, trace :: Bool
, traceFiles :: Bool
} deriving (Show)

type JobName = String
Expand Down
3 changes: 2 additions & 1 deletion taskrunner.cabal
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cabal-version: 2.2

-- This file has been generated from package.yaml by hpack version 0.36.0.
-- This file has been generated from package.yaml by hpack version 0.37.0.
--
-- see: https://github.com/sol/hpack

Expand Down Expand Up @@ -31,6 +31,7 @@ library
Control.Monad.EarlyReturn
RemoteCache
SnapshotCliArgs
Trace
Types
Utils
other-modules:
Expand Down
Loading
Loading