Recent changes to this wiki:
comment
diff --git a/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_2818941822cf1c1563c420e4d055dd4b._comment b/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_2818941822cf1c1563c420e4d055dd4b._comment new file mode 100644 index 0000000000..8ba801115d --- /dev/null +++ b/doc/bugs/Compiling_20250925__44___variable_not_in_scope_error/comment_3_2818941822cf1c1563c420e4d055dd4b._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2026-01-01T18:33:42Z" + content=""" +@caleb from what I can see there is no current version of git-annex +packaged in Arch, at least <https://aur.archlinux.org/packages?O=0&SeB=nd&K=git-annex&outdated=&SB=p&SO=d&PP=50&submit=Go> +only has old stuff. Where did your package go? +"""]]
Remove support for building with old versions of persistent-sqlite
Old versions of persistent-sqlite don't properly support non-ascii
paths when run in a non-unicode locale. So this both simplifies the code
and avoids buggy behavior.
Old versions of persistent-sqlite don't properly support non-ascii
paths when run in a non-unicode locale. So this both simplifies the code
and avoids buggy behavior.
diff --git a/BuildFlags.hs b/BuildFlags.hs
index d4a3a4f73e..60f240c368 100644
--- a/BuildFlags.hs
+++ b/BuildFlags.hs
@@ -80,7 +80,6 @@ dependencyVersions = map fmt $ sortBy (comparing (CI.mk . fst))
, ("uuid", VERSION_uuid)
, ("bloomfilter", VERSION_bloomfilter)
, ("http-client", VERSION_http_client)
- , ("persistent-sqlite", VERSION_persistent_sqlite)
, ("crypton", VERSION_crypton)
, ("aws", VERSION_aws)
, ("DAV", VERSION_DAV)
diff --git a/CHANGELOG b/CHANGELOG
index 85aa0528a3..96d3df4f89 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -3,6 +3,7 @@ git-annex (10.20251216) UNRELEASED; urgency=medium
* fix: Populate unlocked pointer files in situations where a git command,
like git reset or git stash, leaves them unpopulated.
* When displaying sqlite error messages, include the path to the database.
+ * Remove support for building with old versions of persistent-sqlite.
-- Joey Hess <id@joeyh.name> Thu, 01 Jan 2026 12:20:29 -0400
diff --git a/Database/ContentIdentifier.hs b/Database/ContentIdentifier.hs
index 4fdfd5b292..e5a701ba3f 100644
--- a/Database/ContentIdentifier.hs
+++ b/Database/ContentIdentifier.hs
@@ -5,7 +5,6 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
-{-# LANGUAGE CPP #-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TypeOperators, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts, EmptyDataDecls #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
@@ -50,13 +49,7 @@ import qualified Logs.ContentIdentifier as Log
import Database.Persist.Sql hiding (Key)
import Database.Persist.TH
-
-#if MIN_VERSION_persistent_sqlite(2,13,3)
import Database.RawFilePath
-#else
-import Database.Persist.Sqlite (runSqlite)
-import qualified Data.Text as T
-#endif
data ContentIdentifierHandle = ContentIdentifierHandle H.DbQueue Bool
@@ -103,13 +96,8 @@ openDb = do
runMigrationSilent migrateContentIdentifier
-- Migrate from old versions of database, which had buggy
-- and suboptimal uniqueness constraints.
-#if MIN_VERSION_persistent_sqlite(2,13,3)
else liftIO $ runSqlite' (fromOsPath db) $ void $
runMigrationSilent migrateContentIdentifier
-#else
- else liftIO $ runSqlite (T.pack (fromRawFilePath db)) $ void $
- runMigrationSilent migrateContentIdentifier
-#endif
h <- liftIO $ H.openDbQueue db "content_identifiers"
return $ ContentIdentifierHandle h isnew
diff --git a/Database/Handle.hs b/Database/Handle.hs
index f859467b8e..135811ca86 100644
--- a/Database/Handle.hs
+++ b/Database/Handle.hs
@@ -195,11 +195,7 @@ runSqliteRobustly tablename db a = do
| otherwise -> rethrow $ errmsg ("after successful sqlite database " ++ fromOsPath (safeOutput db) ++ " open") ex
opensettle retries ic = do
-#if MIN_VERSION_persistent_sqlite(2,13,3)
conn <- Sqlite.open' (fromOsPath db)
-#else
- conn <- Sqlite.open (T.pack (fromOsPath db))
-#endif
settle conn retries ic
settle conn retries ic = do
diff --git a/Database/Init.hs b/Database/Init.hs
index eab3a6f32d..c516c89c76 100644
--- a/Database/Init.hs
+++ b/Database/Init.hs
@@ -5,7 +5,7 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
-{-# LANGUAGE OverloadedStrings, CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
module Database.Init where
@@ -13,9 +13,7 @@ import Annex.Common
import Annex.Perms
import Utility.FileMode
import qualified Utility.RawFilePath as R
-#if MIN_VERSION_persistent_sqlite(2,13,3)
import Database.RawFilePath
-#endif
import Database.Persist.Sqlite
import Lens.Micro
@@ -36,11 +34,7 @@ initDb db migration = do
let tmpdb = tmpdbdir </> literalOsPath "db"
let tmpdb' = fromOsPath tmpdb
createAnnexDirectory tmpdbdir
-#if MIN_VERSION_persistent_sqlite(2,13,3)
liftIO $ runSqliteInfo' tmpdb' (enableWAL tmpdb) migration
-#else
- liftIO $ runSqliteInfo (enableWAL tmpdb) migration
-#endif
setAnnexDirPerm tmpdbdir
-- Work around sqlite bug that prevents it from honoring
-- less restrictive umasks.
diff --git a/Database/RawFilePath.hs b/Database/RawFilePath.hs
index e154b74a3a..fdedf65762 100644
--- a/Database/RawFilePath.hs
+++ b/Database/RawFilePath.hs
@@ -31,11 +31,10 @@
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-}
-{-# LANGUAGE OverloadedStrings, CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
module Database.RawFilePath where
-#if MIN_VERSION_persistent_sqlite(2,13,3)
import Database.Persist.Sqlite
import qualified Database.Sqlite as Sqlite
import Utility.RawFilePath (RawFilePath)
@@ -92,4 +91,3 @@ withSqliteConnInfo'
-> (SqlBackend -> m a)
-> m a
withSqliteConnInfo' db = withSqlConn . openWith' db const
-#endif
diff --git a/doc/bugs/Get_crashes_when_remote_contains_non-english_chars.mdwn b/doc/bugs/Get_crashes_when_remote_contains_non-english_chars.mdwn
index fe6098f754..3610ac03f3 100644
--- a/doc/bugs/Get_crashes_when_remote_contains_non-english_chars.mdwn
+++ b/doc/bugs/Get_crashes_when_remote_contains_non-english_chars.mdwn
@@ -1,6 +1,7 @@
Hi,
### Please describe the problem.
+
I'm trying to set up a git-annex repo for my books/technical papers to have easy access to them on my desktop and laptop. I'm using a centralized server (following [this guide](https://git-annex.branchable.com/tips/centralized_git_repository_tutorial/on_your_own_server/)) to make it easy to sync between my machines.
The issue is however that sqlite crashes when I'm trying to get a file from my server. See the log further down for the error message. I'm suspecting it is due to the repo on my server is named `Böcker` (swedish name for books). It does work if I'm cloning it locally on my server. E.g.
@@ -105,3 +106,11 @@ I'm not giving up on this that easily. Worst case I'll just rename my repo on my
Thank you for all the hours developing this software!
+> This seems to be the same bug that was fixed in [[!commit 8a3beabf350899e369dcd57a72432930581fbc25]].
+> and released in version 10.20231227. While this bug actually has a fixed
+> version of git-annex, the version output shows it was built with too
+> old a version of persistent-sqlite to get the fix.
+>
+> I've now updated git-annex's build deps, so all future versions will
+> be with a sufficiently new persistent-sqlite to not have this problem.
+> [[done]] --[[Joey]]
diff --git a/git-annex.cabal b/git-annex.cabal
index f0fdb7c031..1006e7a59a 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -245,7 +245,7 @@ Executable git-annex
conduit,
time (>= 1.9.1),
old-locale,
- persistent-sqlite (>= 2.8.1),
+ persistent-sqlite (>= 2.13.3),
persistent (>= 2.8.1),
persistent-template (>= 2.8.0),
unliftio-core,
When displaying sqlite error messages, include the path to the database
diff --git a/CHANGELOG b/CHANGELOG
index 476c305d8f..85aa0528a3 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,7 @@ git-annex (10.20251216) UNRELEASED; urgency=medium
* fix: Populate unlocked pointer files in situations where a git command,
like git reset or git stash, leaves them unpopulated.
+ * When displaying sqlite error messages, include the path to the database.
-- Joey Hess <id@joeyh.name> Thu, 01 Jan 2026 12:20:29 -0400
diff --git a/Database/Handle.hs b/Database/Handle.hs
index ff358f7588..f859467b8e 100644
--- a/Database/Handle.hs
+++ b/Database/Handle.hs
@@ -1,6 +1,6 @@
{- Persistent sqlite database handles.
-
- - Copyright 2015-2023 Joey Hess <id@joeyh.name>
+ - Copyright 2015-2026 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -24,6 +24,7 @@ import Utility.Debug
import Utility.DebugLocks
import Utility.InodeCache
import Utility.OsPath
+import Utility.SafeOutput
import Database.Persist.Sqlite
import qualified Database.Sqlite as Sqlite
@@ -78,7 +79,7 @@ closeDb (DbHandle _db worker jobs _) = do
- it is able to run.
-}
queryDb :: DbHandle -> SqlPersistM a -> IO a
-queryDb (DbHandle _db _ jobs errvar) a = do
+queryDb (DbHandle db _ jobs errvar) a = do
res <- newEmptyMVar
putMVar jobs $ QueryJob $
debugLocks $ liftIO . putMVar res =<< tryNonAsync a
@@ -86,7 +87,7 @@ queryDb (DbHandle _db _ jobs errvar) a = do
Right r -> either throwIO return r
Left BlockedIndefinitelyOnMVar -> do
err <- takeMVar errvar
- giveup $ "sqlite worker thread crashed: " ++ err
+ giveup $ "sqlite worker thread for " ++ fromOsPath (safeOutput db) ++ " crashed: " ++ err
{- Writes a change to the database.
-
@@ -111,7 +112,7 @@ commitDb h@(DbHandle db _ _ errvar) wa =
robustly a
Left BlockedIndefinitelyOnMVar -> do
err <- takeMVar errvar
- giveup $ "sqlite worker thread crashed: " ++ err
+ giveup $ "sqlite worker thread for " ++ fromOsPath (safeOutput db) ++ " crashed: " ++ err
briefdelay = 100000 -- 1/10th second
@@ -191,7 +192,7 @@ runSqliteRobustly tablename db a = do
briefdelay
retryHelper "access" ex maxretries db retries ic $
go conn
- | otherwise -> rethrow $ errmsg "after successful open" ex
+ | otherwise -> rethrow $ errmsg ("after successful sqlite database " ++ fromOsPath (safeOutput db) ++ " open") ex
opensettle retries ic = do
#if MIN_VERSION_persistent_sqlite(2,13,3)
@@ -217,7 +218,7 @@ runSqliteRobustly tablename db a = do
if e == Sqlite.ErrorIO
then opensettle
else settle conn
- | otherwise -> rethrow $ errmsg "while opening database connection" ex
+ | otherwise -> rethrow $ errmsg ("while opening sqlite database " ++ fromOsPath (safeOutput db) ++ " connection") ex
-- This should succeed for any table.
nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1"
@@ -274,7 +275,7 @@ closeRobustly db conn = go maxretries emptyDatabaseInodeCache
| e == Sqlite.ErrorBusy -> do
threadDelay briefdelay
retryHelper "close" ex maxretries db retries ic go
- | otherwise -> rethrow $ errmsg "while closing database connection" ex
+ | otherwise -> rethrow $ errmsg ("while closing sqlite database " ++ fromOsPath (safeOutput db) ++ " connection") ex
briefdelay = 1000 -- 1/1000th second
@@ -312,7 +313,7 @@ retryHelper action err maxretries db retries ic a = do
databaseAccessStalledMsg :: Show err => String -> OsPath -> err -> String
databaseAccessStalledMsg action db err =
- "Repeatedly unable to " ++ action ++ " sqlite database " ++ fromOsPath db
+ "Repeatedly unable to " ++ action ++ " sqlite database " ++ fromOsPath (safeOutput db)
++ ": " ++ show err ++ ". "
++ "Perhaps another git-annex process is suspended and is "
++ "keeping this database locked?"
diff --git a/doc/bugs/SQLite3_database_disk_image_malformed.mdwn b/doc/bugs/SQLite3_database_disk_image_malformed.mdwn
index ca25c815f4..6e38637d05 100644
--- a/doc/bugs/SQLite3_database_disk_image_malformed.mdwn
+++ b/doc/bugs/SQLite3_database_disk_image_malformed.mdwn
@@ -41,3 +41,5 @@ The only SQLite3 database I can find is in .git/annex/keysdb . I can open that u
I've been happily using git-annex for many many years, first time I've encountered an issue like this.
+> Calling this [[done]] since the sqlite error messages have been improved.
+> --[[Joey]]
diff --git a/doc/bugs/SQLite3_database_disk_image_malformed/comment_6_e16f300193b36db6793d9d6e2808e56a._comment b/doc/bugs/SQLite3_database_disk_image_malformed/comment_6_e16f300193b36db6793d9d6e2808e56a._comment
new file mode 100644
index 0000000000..c610260d53
--- /dev/null
+++ b/doc/bugs/SQLite3_database_disk_image_malformed/comment_6_e16f300193b36db6793d9d6e2808e56a._comment
@@ -0,0 +1,15 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 6"""
+ date="2026-01-01T17:58:00Z"
+ content="""
+> A useful thing to display might be the path to the corrupted database file and advice to remove it?
+
+Good idea to display the path. I've made that change.
+
+I don't think I want to make git-annex suggest deleting sqlite databases
+anytime sqlite crashes for any reason. While they are safe to delete,
+that encourages users to shrug and move on and tends to normalize any
+problem with sqlite. In reality, problems with sqlite are very rare,
+and I'd like to hear about them and understand them.
+"""]]
response
diff --git a/doc/bugs/SQLite3_database_disk_image_malformed/comment_5_2f6a291a2bb37000f6e3b757a00a0713._comment b/doc/bugs/SQLite3_database_disk_image_malformed/comment_5_2f6a291a2bb37000f6e3b757a00a0713._comment
new file mode 100644
index 0000000000..4c90a0235e
--- /dev/null
+++ b/doc/bugs/SQLite3_database_disk_image_malformed/comment_5_2f6a291a2bb37000f6e3b757a00a0713._comment
@@ -0,0 +1,21 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 5"""
+ date="2026-01-01T17:29:54Z"
+ content="""
+Your previous problem with the sqlite database cannot have caused fsck to
+detect a checksum problem with your annexed file.
+
+It looks like you have somehow modified annex object files, eg files in
+`.git/annex/objects`. git-annex sets permissions that usually prevent such
+a thing from happening.
+
+There is no way to make git-annex accept a version of a file with a different
+checksum than the one recorded in git. Instead you need to `git-annex add` the
+new version of the files to the repository in place of the old version.
+
+Here is a bash script that will pull the files out of `.git/annex/bad/`
+and update the annexed files:
+
+ IFS=$'\n'; for x in $(git-annex find --format='${key}\n${file}\n'); do if [ "$l" ]; then f="$x"; l=; if [ -e ".git/annex/bad/$k" ]; then mv ".git/annex/bad/$k" "$f"; git-annex add "$f" ; fi; else k="$x"; l=1; fi; done
+"""]]
improve synopsis for fix
It operates on pointers, whether those are symlinks or unlocked pointer
files.
It operates on pointers, whether those are symlinks or unlocked pointer
files.
diff --git a/Command/Fix.hs b/Command/Fix.hs index 05292059e5..2852fac9a3 100644 --- a/Command/Fix.hs +++ b/Command/Fix.hs @@ -29,7 +29,7 @@ import Utility.Touch cmd :: Command cmd = noCommit $ withAnnexOptions [annexedMatchingOptions, jsonOptions] $ command "fix" SectionMaintenance - "fix up links to annexed content" + "fix up pointers to annexed content" paramPaths (withParams seek) seek :: CmdParams -> CommandSeek diff --git a/doc/git-annex-fix.mdwn b/doc/git-annex-fix.mdwn index 1ac2165c89..e1ec3fc771 100644 --- a/doc/git-annex-fix.mdwn +++ b/doc/git-annex-fix.mdwn @@ -1,6 +1,6 @@ # NAME -git-annex fix - fix up links to annexed content +git-annex fix - fix up pointers to annexed content # SYNOPSIS @@ -13,8 +13,9 @@ content. This is useful to run manually when you have been moving the symlinks around, but is done automatically when committing a change with git too. -Also, populates unlocked files with annexed content. Usually this happens -automatically, but some git commands can leave them as unpopulated. +Also, populates unlocked pointer files with annexed content. +Usually this happens automatically, but some git commands can leave them +unpopulated. Also, adjusts unlocked files to be copies or hard links as configured by annex.thin.
fix: handle unlocked pointer files
fix: Populate unlocked pointer files in situations where a git command,
like git reset or git stash, leaves them unpopulated.
populatePointerFile' is safe to use here because seeking has found the
key, and isPointerFile is checked just before calling it.
fix: Populate unlocked pointer files in situations where a git command,
like git reset or git stash, leaves them unpopulated.
populatePointerFile' is safe to use here because seeking has found the
key, and isPointerFile is checked just before calling it.
diff --git a/Annex/Content/PointerFile.hs b/Annex/Content/PointerFile.hs
index 51c431d5ad..4c0743c2b4 100644
--- a/Annex/Content/PointerFile.hs
+++ b/Annex/Content/PointerFile.hs
@@ -1,6 +1,6 @@
{- git-annex pointer files
-
- - Copyright 2010-2018 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2026 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -33,21 +33,29 @@ import System.PosixCompat.Files (fileMode)
populatePointerFile :: Restage -> Key -> OsPath -> OsPath -> Annex (Maybe InodeCache)
populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f)
where
- go (Just k') | k == k' = do
- destmode <- liftIO $ catchMaybeIO $
- fileMode <$> R.getFileStatus (fromOsPath f)
- (ic, populated) <- replaceWorkTreeFile f $ \tmp -> do
- ok <- linkOrCopy k obj tmp destmode >>= \case
- Just _ -> thawContent tmp >> return True
- Nothing -> liftIO (writePointerFile tmp k destmode) >> return False
- ic <- withTSDelta (liftIO . genInodeCache tmp)
- return (ic, ok)
- maybe noop (restagePointerFile restage f) ic
- if populated
- then return ic
- else return Nothing
+ go (Just k') | k == k' = populatePointerFile' restage k obj f
go _ = return Nothing
+{- Before calling, must verify that the pointer file is a pointer to the key.
+ -
+ - This returns Nothing when populating the pointer file fails due to eg,
+ - not enough disk space.
+ -}
+populatePointerFile' :: Restage -> Key -> OsPath -> OsPath -> Annex (Maybe InodeCache)
+populatePointerFile' restage k obj f = do
+ destmode <- liftIO $ catchMaybeIO $
+ fileMode <$> R.getFileStatus (fromOsPath f)
+ (ic, populated) <- replaceWorkTreeFile f $ \tmp -> do
+ ok <- linkOrCopy k obj tmp destmode >>= \case
+ Just _ -> thawContent tmp >> return True
+ Nothing -> liftIO (writePointerFile tmp k destmode) >> return False
+ ic <- withTSDelta (liftIO . genInodeCache tmp)
+ return (ic, ok)
+ maybe noop (restagePointerFile restage f) ic
+ if populated
+ then return ic
+ else return Nothing
+
{- Removes the content from a pointer file, replacing it with a pointer.
-
- Does not check if the pointer file is modified. -}
diff --git a/CHANGELOG b/CHANGELOG
index a034823796..476c305d8f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,10 @@
+git-annex (10.20251216) UNRELEASED; urgency=medium
+
+ * fix: Populate unlocked pointer files in situations where a git command,
+ like git reset or git stash, leaves them unpopulated.
+
+ -- Joey Hess <id@joeyh.name> Thu, 01 Jan 2026 12:20:29 -0400
+
git-annex (10.20251215) upstream; urgency=medium
* Added annex.trashbin configuration.
diff --git a/Command/Fix.hs b/Command/Fix.hs
index a12747ee49..05292059e5 100644
--- a/Command/Fix.hs
+++ b/Command/Fix.hs
@@ -1,6 +1,6 @@
{- git-annex command
-
- - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2026 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -14,6 +14,7 @@ import Config
import qualified Annex
import Annex.ReplaceFile
import Annex.Content
+import Annex.Content.PointerFile
import Annex.Perms
import Annex.Link
import qualified Database.Keys
@@ -54,11 +55,24 @@ start fixwhat si file key = do
fixby $ fixSymlink file wantlink
| otherwise -> stop
Nothing -> case fixwhat of
- FixAll -> fixthin
+ FixAll -> fixpointers
FixSymlinks -> stop
where
file' = fromOsPath file
+
fixby = starting "fix" (mkActionItem (key, file)) si
+
+ fixpointers =
+ ifM (isJust <$> liftIO (isPointerFile file))
+ ( stopUnless (inAnnex key) $ fixby $ do
+ obj <- calcRepo (gitAnnexLocation key)
+ populatePointerFile' QueueRestage key obj file >>= \case
+ Just ic -> Database.Keys.addInodeCaches key [ic]
+ Nothing -> giveup "not enough disk space to populate pointer file"
+ next $ return True
+ , fixthin
+ )
+
fixthin = do
obj <- calcRepo (gitAnnexLocation key)
stopUnless (isUnmodified key file <&&> isUnmodified key obj) $ do
@@ -71,7 +85,6 @@ start fixwhat si file key = do
(Just n, Just n', False) | n > 1 && n == n' ->
fixby $ breakHardLink file key obj
_ -> stop
-
breakHardLink :: OsPath -> Key -> OsPath -> CommandPerform
breakHardLink file key obj = do
replaceWorkTreeFile file $ \tmp -> do
diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn
index caed1ef7e6..d4cc46a24a 100644
--- a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn
+++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn
@@ -112,3 +112,5 @@ git-annex version: 10.20251114-geeb21b831e7c45078bd9447ec2b0532a691fe471
```
[[!meta title="after git reset --hard, git-annex get of unlocked unpopulated pointer file does nothing"]]
+
+> [[fixed|done]] --[[Joey]]
diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_3_09e90b656763e3a8452260f0abead168._comment b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_3_09e90b656763e3a8452260f0abead168._comment
new file mode 100644
index 0000000000..d52acaf827
--- /dev/null
+++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_3_09e90b656763e3a8452260f0abead168._comment
@@ -0,0 +1,9 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 3"""
+ date="2026-01-01T16:17:58Z"
+ content="""
+I think it makes sense for `git-annex fix` to deal with this situation.
+In both cases the user has run a git command that affects files in the
+workint tree, and it has left the annexed content not accessible.
+"""]]
diff --git a/doc/git-annex-fix.mdwn b/doc/git-annex-fix.mdwn
index 5a670cd1a0..1ac2165c89 100644
--- a/doc/git-annex-fix.mdwn
+++ b/doc/git-annex-fix.mdwn
@@ -9,10 +9,12 @@ git annex fix `[path ...]`
# DESCRIPTION
Fixes up symlinks that have become broken to again point to annexed
-content.
+content. This is useful to run manually when you have been moving the
+symlinks around, but is done automatically when committing a change
+with git too.
-This is useful to run manually when you have been moving the symlinks
-around, but is done automatically when committing a change with git too.
+Also, populates unlocked files with annexed content. Usually this happens
+automatically, but some git commands can leave them as unpopulated.
Also, adjusts unlocked files to be copies or hard links as
configured by annex.thin.
diff --git a/doc/git-annex-smudge.mdwn b/doc/git-annex-smudge.mdwn
index 6f6eba8140..7c44779641 100644
--- a/doc/git-annex-smudge.mdwn
+++ b/doc/git-annex-smudge.mdwn
@@ -47,8 +47,8 @@ it records which worktree files need to be updated, and
the content. That is run by several git hooks, including post-checkout
and post-merge. However, a few git commands, notably `git stash` and
`git cherry-pick`, do not run any hooks, so after using those commands
-you can manually run `git annex smudge --update` to update the working
-tree.
+you can manually run `git annex smudge --update` (or `git-annex fix`)
+to update the working tree.
# OPTIONS
Added a comment
diff --git a/doc/special_remotes/rclone/comment_10_edef3c4eb5f6d06e496c0e90329d8143._comment b/doc/special_remotes/rclone/comment_10_edef3c4eb5f6d06e496c0e90329d8143._comment new file mode 100644 index 0000000000..4908d5b5fe --- /dev/null +++ b/doc/special_remotes/rclone/comment_10_edef3c4eb5f6d06e496c0e90329d8143._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="nadir" + avatar="http://cdn.libravatar.org/avatar/2af9174cf6c06de802104d632dc40071" + subject="comment 10" + date="2026-01-01T11:27:39Z" + content=""" +That makes a lot of sense. So if I understood things right, the correct place to work on this is rclone. I think I'll try to ask what they think of this kind of use case. + +Thanks for the explanation +"""]]
Added a comment: Fixing a bit of a mess
diff --git a/doc/bugs/SQLite3_database_disk_image_malformed/comment_4_8cd94b23828fa865c6f04b021b971b55._comment b/doc/bugs/SQLite3_database_disk_image_malformed/comment_4_8cd94b23828fa865c6f04b021b971b55._comment new file mode 100644 index 0000000000..7d4e86b3a0 --- /dev/null +++ b/doc/bugs/SQLite3_database_disk_image_malformed/comment_4_8cd94b23828fa865c6f04b021b971b55._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="puck" + avatar="http://cdn.libravatar.org/avatar/06d3f4f0a82dd00a84f8f8fabc8e537d" + subject="Fixing a bit of a mess" + date="2026-01-01T09:07:11Z" + content=""" +While the database file was corrupt, I did some work (not realising it was corrupt) to fix up MP3 tags in my music collection. Now when I run git annex fsck I'm getting errors like: + + fsck music/Arlo_Guthrie/The_Best_Of_Arlo_Guthrie/01-Alices_Restaurant_Massacree.mp3 + music/Arlo_Guthrie/The_Best_Of_Arlo_Guthrie/01-Alices_Restaurant_Massacree.mp3: Bad file size (128 B larger); moved to .git/annex/bad/SHA256E-s17800671--1a992cda34a5ab52d42cd7a420114fc122458ff57672e468f8403faa77f209b0.mp3 + + ** No known copies exist of music/Arlo_Guthrie/The_Best_Of_Arlo_Guthrie/01-Alices_Restaurant_Massacree.mp3 + failed + +and + + fsck music/Arrow/misc/Hot_Hot_Hot.mp3 (checksum...) + music/Arrow/misc/Hot_Hot_Hot.mp3: Bad file content; moved to .git/annex/bad/SHA256E-s3444736--3178689ce4a69a0e94fe11afaf077b6471077fd2d5128a5a65a71dcf84272ed5.mp3 + + ** No known copies exist of music/Arrow/misc/Hot_Hot_Hot.mp3 + failed + +I've tried using git annex reinject, but that is refused as the checksum doesn't match. + +Can I tell git-annex to just accept the files that I have in my repository as being correct? +"""]]
Added a comment: More details in error message?
diff --git a/doc/bugs/SQLite3_database_disk_image_malformed/comment_3_9ae97b4f4cacefef77542a65455cc1d3._comment b/doc/bugs/SQLite3_database_disk_image_malformed/comment_3_9ae97b4f4cacefef77542a65455cc1d3._comment new file mode 100644 index 0000000000..6b40bdbb51 --- /dev/null +++ b/doc/bugs/SQLite3_database_disk_image_malformed/comment_3_9ae97b4f4cacefef77542a65455cc1d3._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="puck" + avatar="http://cdn.libravatar.org/avatar/06d3f4f0a82dd00a84f8f8fabc8e537d" + subject="More details in error message?" + date="2026-01-01T07:32:23Z" + content=""" +Hey, + +I just came back to this after trying to do something in my repository. Good to hear I can just the SQlite file, done that now, and it is busy running fsck now. + +A useful thing to display might be the path to the corrupted database file and advice to remove it? +"""]]
todo
diff --git a/doc/todo/support_more_backup_software_like_borg.mdwn b/doc/todo/support_more_backup_software_like_borg.mdwn new file mode 100644 index 0000000000..3f097babf4 --- /dev/null +++ b/doc/todo/support_more_backup_software_like_borg.mdwn @@ -0,0 +1,17 @@ +The borg special remote allows git-annex to treat borg backups of a +git-annex repository as just another remote. This could also be done for +other backup software. + +restic seems like a good candidate. What other commonly used backup +software might be good to support? Comments welcome with suggestions.. + +--- + +Currently, support for these has to be in git-annex, it cannot be an +external special remote. Just providing a way to in the external special +remote interfase to set `thirdPartyPopulated` might be enough to allow +using external special remotes for this. + +The borg implementation does have getImported which looks at the git-annex +branch, and is used in an optimisation. It would be good to factor that out +to a common optimisation for all `thirdPartyPopulated` remotes. --[[Joey]]
response
diff --git a/doc/forum/Find_never__40____33____41___used_files_in_annex__63__/comment_1_470f9ec8a18e2080558af8d5a568bc97._comment b/doc/forum/Find_never__40____33____41___used_files_in_annex__63__/comment_1_470f9ec8a18e2080558af8d5a568bc97._comment new file mode 100644 index 0000000000..e40d60ca0e --- /dev/null +++ b/doc/forum/Find_never__40____33____41___used_files_in_annex__63__/comment_1_470f9ec8a18e2080558af8d5a568bc97._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-31T18:58:44Z" + content=""" +`git-annex p2phttp` does update the git-annex branch itself when recieving +files. And generally, any time git-annex stores an object in a repository, +it updates the git-annex branch accordingly. + +So, you can fetch from the remote and learn about those objects, +and then `git-annex unused --from=$remote` will show you unused objects in +the remote. + +When running `git-annex unused` on the local repository, it does list all +objects in the local repository. So if an object somehow does get into the +repository without a branch update, it will still show as unused. + +There is no way to list all objects present in a remote. Special remotes +are not required to support emumeration at all. So, if an object got sent +to a special remote, and the git-annex branch record of that was lost, +there would be no way to find that unused object. +"""]]
response
diff --git a/doc/special_remotes/rclone/comment_9_d0c23b1d2c2267ef0e1e91e8b33385df._comment b/doc/special_remotes/rclone/comment_9_d0c23b1d2c2267ef0e1e91e8b33385df._comment new file mode 100644 index 0000000000..6effbb7263 --- /dev/null +++ b/doc/special_remotes/rclone/comment_9_d0c23b1d2c2267ef0e1e91e8b33385df._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="joey" + subject="""Re: passing additional flags to rclone""" + date="2025-12-31T18:38:21Z" + content=""" +Passing arbitrary parameters to rclone is not supported. It would possibly +be a security hole if it were supported, because if there were a parameter +say --deleteeverything, you could `initremote` a special remote with that +parameter, and then wait for someone else to `enableremote` and use that +special remote and have a bad day. + +The "*" in `initremote --whatelse` output is a placeholder. It is not +intended to mean that every possible thing is passed through, but that, +if rclone supports some additional parameters, and explicitly asks for +them (via GETCONFIG), they will be passed through to it. + +I think that currently, `rclone gitannex` does not request any parameters. +It would certainly be possible to make it support something like +"bwlimit=3000". +"""]]
comment
diff --git a/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_2_5fe65196b2f160c63305cc0274cf1530._comment b/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_2_5fe65196b2f160c63305cc0274cf1530._comment new file mode 100644 index 0000000000..10f1ccfb42 --- /dev/null +++ b/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_2_5fe65196b2f160c63305cc0274cf1530._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-31T18:18:50Z" + content=""" +It might well be possible to implement this for restic too. +The crucial thing needed is for git-annex to +be able to list the backups and find the annexed files. For borg, +it does that by using `borg list`. +"""]]
comment
diff --git a/doc/git-annex-preferred-content/comment_8_284cfcc5f9ea6c534687e4d1afa2420a._comment b/doc/git-annex-preferred-content/comment_8_284cfcc5f9ea6c534687e4d1afa2420a._comment new file mode 100644 index 0000000000..6e613eec1b --- /dev/null +++ b/doc/git-annex-preferred-content/comment_8_284cfcc5f9ea6c534687e4d1afa2420a._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""Re: Order the remotes are processed in""" + date="2025-12-31T18:08:50Z" + content=""" +The order depends on the particular command. + +This kind of problem generally means you need to rethink your preferred +content expressions. +"""]]
comments
diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn index 8048b070e9..caed1ef7e6 100644 --- a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn +++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn @@ -111,3 +111,4 @@ There should be a more seamless way to recover, or should I generally always use git-annex version: 10.20251114-geeb21b831e7c45078bd9447ec2b0532a691fe471 ``` +[[!meta title="after git reset --hard, git-annex get of unlocked unpopulated pointer file does nothing"]] diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_1_2b9108af318c95a6459fce0b6bb92abd._comment b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_1_2b9108af318c95a6459fce0b6bb92abd._comment new file mode 100644 index 0000000000..fcb8d284b8 --- /dev/null +++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_1_2b9108af318c95a6459fce0b6bb92abd._comment @@ -0,0 +1,38 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-31T16:23:36Z" + content=""" +`git-annex get` is not doing anything because the content of the annexed +file is already present in the repository, as shown by the `git-annex +whereis`. All that `get` does is get objects that are not present in the +git-annex repository. It does not fix up after other problems. + +As far as I can tell, this will reproduce your situation: + + git-annex init + git config annex.addunlocked true + echo 1 > foo + git-annex add foo + git commit -m add + echo 2 > foo + git config annex.largefiles nothing + git annex add foo + git commit -m add + git config annex.largefiles anything + git reset --hard HEAD^ + +The command you could have run then, which would have fixed it right up, is +`git status`. That will populate the files with their annexed content. And +it will recommend running `git-annex restage` to fix up the index to +reflect those changes. (Running `git-annex restage` on its own is useless +though.) + +This all happens because `git reset --hard` does not run any git hooks. +So `git-annex smudge --update` does not get a chance to automatically +run like it usually would when a checkout or merge is made. +Until that `git status`. + +Running `git-annex smudge --update` after the `git reset` will also fix +things right up. +"""]] diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_2_03b5da4a5018086aa5df396d8c8559ee._comment b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_2_03b5da4a5018086aa5df396d8c8559ee._comment new file mode 100644 index 0000000000..c3d40ead0e --- /dev/null +++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_/comment_2_03b5da4a5018086aa5df396d8c8559ee._comment @@ -0,0 +1,35 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-31T17:29:42Z" + content=""" +I can understand reaching for `git-annex get` in this situation. But I +don't know if it really makes sense to have that and every other command +that gets the content of a file also handle populating annex pointer +files that have been staged. That seems like it might very +well violate least surprise in other situations, or cause problems in other +situations. + +The best fix would be if `git reset`, `git stash`, `git cherry-pick` and +whatever else were made to run some hook. + +One could just as well complain about this: + + git-annex add foo + mkdir subdir + git mv foo subdir/ + cat subdir/foo + cat: subdir/foo: No such file or directory + git-annex get subdir/foo + cat subdir/foo + cat: subdir/foo: No such file or directory + +There `git-annex get` does not fix up the annex symlink for the new +location of the file. The situation will get resolved by `git-annex fix` +which gets run on `git commit` and is usually enough that the user doesn't +need to remember to run it. + +Not only is it out of scope for `git-annex get` to deal with that +situation, if it modified the symlink it would then leave a change in the +working tree. Which is surprising behavior. +"""]]
diff --git a/doc/forum/annex_forget_that_operates_on_the_master_branch__63__.mdwn b/doc/forum/annex_forget_that_operates_on_the_master_branch__63__.mdwn new file mode 100644 index 0000000000..1d13e92568 --- /dev/null +++ b/doc/forum/annex_forget_that_operates_on_the_master_branch__63__.mdwn @@ -0,0 +1,14 @@ +Greetings. + +I would like to be able to truncate the commit history of the master branch (or, ideally, arbitrarily rewrite history). + +At the moment I manually prune commit history via [this](https://gist.github.com/JuliaSprenger/05810e0f7fe04062a32b3951c6520904) guide which boils down to: +1. Squash commits of the master branch and ```git annex forget --drop-dead --force```; +2. Force-push changes to the master and git-annex branches for each repository; +3. Delete ```sync/...``` branches in each repository. + +The problem is that rewriting history this way requires all of them to be online at the same time. + +Can this be done in an asynchronous manner? + +Thanks.
removed
diff --git a/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment b/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment deleted file mode 100644 index 636780d2a6..0000000000 --- a/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment +++ /dev/null @@ -1,10 +0,0 @@ -[[!comment format=mdwn - username="MatusGoljer1" - avatar="http://cdn.libravatar.org/avatar/8152eed1d594c570563ed46e7fd8356f" - subject="Order the remotes are processed in" - date="2025-12-26T19:21:46Z" - content=""" -Hi. What is the order the remotes are processed in? I have 10 or so special remotes and often the files are uploaded to one and then dropped from another, where doing nothing would be fine. They all have the same group and preferred content. - -I think it might be some concurrency issue (I have cron jobs running sync and I also commit manually sometimes), but I want to be sure. -"""]]
Added a comment: Order the remotes are processed in
diff --git a/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment b/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment new file mode 100644 index 0000000000..636780d2a6 --- /dev/null +++ b/doc/git-annex-preferred-content/comment_8_97d3f90eb02d734428f68395e0b04fff._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="MatusGoljer1" + avatar="http://cdn.libravatar.org/avatar/8152eed1d594c570563ed46e7fd8356f" + subject="Order the remotes are processed in" + date="2025-12-26T19:21:46Z" + content=""" +Hi. What is the order the remotes are processed in? I have 10 or so special remotes and often the files are uploaded to one and then dropped from another, where doing nothing would be fine. They all have the same group and preferred content. + +I think it might be some concurrency issue (I have cron jobs running sync and I also commit manually sometimes), but I want to be sure. +"""]]
Added a comment: Order the remotes are processed in
diff --git a/doc/git-annex-preferred-content/comment_7_65842cfd8a1040fa374563e40203a197._comment b/doc/git-annex-preferred-content/comment_7_65842cfd8a1040fa374563e40203a197._comment new file mode 100644 index 0000000000..2453011448 --- /dev/null +++ b/doc/git-annex-preferred-content/comment_7_65842cfd8a1040fa374563e40203a197._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="MatusGoljer1" + avatar="http://cdn.libravatar.org/avatar/8152eed1d594c570563ed46e7fd8356f" + subject="Order the remotes are processed in" + date="2025-12-26T19:21:35Z" + content=""" +Hi. What is the order the remotes are processed in? I have 10 or so special remotes and often the files are uploaded to one and then dropped from another, where doing nothing would be fine. They all have the same group and preferred content. + +I think it might be some concurrency issue (I have cron jobs running sync and I also commit manually sometimes), but I want to be sure. +"""]]
reporting on difficulty of recovering content for unlocked file
diff --git a/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn new file mode 100644 index 0000000000..8048b070e9 --- /dev/null +++ b/doc/bugs/get_of_unlocked___34__absent__34___file_does_nothing_.mdwn @@ -0,0 +1,113 @@ +### Please describe the problem. + +situation is tricky as I forced it via `git reset --hard HEAD^` after a commit which managed to commit a file directly into git instead of git-annex due to changes in `.gitattributes` , whenever originally it was under annex unlocked. + +So now I am in situation where it is not obvious on how to "instantiate" that file in the tree since `annex get` does nothing although file is just a link: + +``` +❯ cat .strava-backup/config.toml +/annex/objects/MD5E-s1235--04d86de671070073b0bade06fd5085c1.toml +❯ git annex whereis .strava-backup/config.toml +whereis .strava-backup/config.toml (1 copy) + abe1a028-2aec-4c31-b48d-0db92e338292 -- yoh@bilena:~/proj/strava-backup-mine [here] +ok +❯ git annex get .strava-backup/config.toml +❯ git annex get --force .strava-backup/config.toml +❯ git annex restage .strava-backup/config.toml +git-annex: This command takes no parameters. +❯ git annex restage +restage ok +❯ cat .strava-backup/config.toml +/annex/objects/MD5E-s1235--04d86de671070073b0bade06fd5085c1.toml +``` + +I thought I could just "instantiate" its content from the key -- but now `git-annex` insists (well - via `git status`) that it is modified, even after I do `restage` and commit. + + +<details> +<summary></summary> + +```shell +❯ cat .git/annex/objects/F2/m8/MD5E-s1235--04d86de671070073b0bade06fd5085c1.toml/MD5E-s1235--04d86de671070073b0bade06fd5085c1.toml >| .strava-backup/config.toml +❯ git status +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +Changes not staged for commit: + (use "git add <file>..." to update what will be committed) + (use "git restore <file>..." to discard changes in working directory) + modified: .strava-backup/config.toml + +no changes added to commit (use "git add" and/or "git commit -a") +❯ git annex restage +restage ok +❯ git status +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +Changes not staged for commit: + (use "git add <file>..." to update what will be committed) + (use "git restore <file>..." to discard changes in working directory) + modified: .strava-backup/config.toml + +no changes added to commit (use "git add" and/or "git commit -a") +❯ git diff +❯ git diff --cached +❯ git status +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +Changes not staged for commit: + (use "git add <file>..." to update what will be committed) + (use "git restore <file>..." to discard changes in working directory) + modified: .strava-backup/config.toml + +no changes added to commit (use "git add" and/or "git commit -a") +❯ git commit -m 'reinstantiated file' +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +Changes not staged for commit: + (use "git add <file>..." to update what will be committed) + (use "git restore <file>..." to discard changes in working directory) + modified: .strava-backup/config.toml + +no changes added to commit (use "git add" and/or "git commit -a") +❯ git commit -m 'reinstantiated file' -a +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +nothing to commit, working tree clean +❯ git status +On branch master +Your branch is ahead of 'test-washoe-tmp/master' by 1 commit. + (use "git push" to publish your local commits) + +Changes not staged for commit: + (use "git add <file>..." to update what will be committed) + (use "git restore <file>..." to discard changes in working directory) + modified: .strava-backup/config.toml + +no changes added to commit (use "git add" and/or "git commit -a") + +``` +</details> + +only if I do `git reset --hard` it becomes unmodified . I had to do `annex fsck` which made it instantiated but modified, then "commit" which committed nothing but everything became kosher. + +There should be a more seamless way to recover, or should I generally always use `annex fsck` after `git reset --hard` when working with unlocked files? + + +### What version of git-annex are you using? On what operating system? + + +``` +❯ git annex version | head -n 1 +git-annex version: 10.20251114-geeb21b831e7c45078bd9447ec2b0532a691fe471 +``` +
Added a comment: This is amazing
diff --git a/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_1_1e586618eadbbb70f4e274aee201c67a._comment b/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_1_1e586618eadbbb70f4e274aee201c67a._comment new file mode 100644 index 0000000000..9b57812e60 --- /dev/null +++ b/doc/tips/using_borg_for_efficient_storage_of_old_annexed_files/comment_1_1e586618eadbbb70f4e274aee201c67a._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="nadir" + avatar="http://cdn.libravatar.org/avatar/2af9174cf6c06de802104d632dc40071" + subject="This is amazing" + date="2025-12-25T12:10:18Z" + content=""" +It (near) perfectly solves a problem I had. + +If something similar existed for restic, I could use my existing backup repositories with this. There's also the issue that borg does not support most rclone supported remotes, like restic does, so it reduces usable cloud providers pretty much to hetzner in my situation. + +Still great. Thanks! +"""]]
Added a comment: Directory remotes in offline drives for archiving?
diff --git a/doc/tips/offline_archive_drives/comment_8_21ad80b8ddb9ca0105fae302eb74e94f._comment b/doc/tips/offline_archive_drives/comment_8_21ad80b8ddb9ca0105fae302eb74e94f._comment new file mode 100644 index 0000000000..0954eaeadb --- /dev/null +++ b/doc/tips/offline_archive_drives/comment_8_21ad80b8ddb9ca0105fae302eb74e94f._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="wzhd" + avatar="http://cdn.libravatar.org/avatar/1795a91af84f4243a3bf0974bc8d79fe" + subject="Directory remotes in offline drives for archiving?" + date="2025-12-20T10:28:35Z" + content=""" +Using offline drives as remotes makes it easy to enable encryption. I can rely on git-annex to encrypt the annexed files instead of setting up block device or file system encryption. The git repo does not need to be cloned to the drive. + +Can I move annexed files out of my laptop and into archival drives only? +I have multiple drives plugged in via USB to store multiple copies. +But it seems git-annex doesn't consider it safe because copies can't be locked down with directory remotes? +I'm only moving files into these drives by invoking one instance of git-annex, I'm pretty sure files won't be concurrently removed from the drives. +Can I move the files without entirely disabling all safety checks? +"""]]
diff --git a/doc/forum/Find_never__40____33____41___used_files_in_annex__63__.mdwn b/doc/forum/Find_never__40____33____41___used_files_in_annex__63__.mdwn new file mode 100644 index 0000000000..83bc7634ba --- /dev/null +++ b/doc/forum/Find_never__40____33____41___used_files_in_annex__63__.mdwn @@ -0,0 +1,5 @@ +It is possible to deposit files in a remotes annex, for example via a p2phttp request. In this case, the deposited file was never known to the git-annex branch metadata. It is my understanding that in this case all the "unused" tooling is not applicable. + +Does git-annex provide means to scan an annex for unexpected annex keys, and maybe for ingesting them such that the appear as unused? + +Thx!
comment
diff --git a/doc/todo/recover_from_export_of_corrupted_object/comment_2_7ce55f8dbe9372085508cebc977587bd._comment b/doc/todo/recover_from_export_of_corrupted_object/comment_2_7ce55f8dbe9372085508cebc977587bd._comment new file mode 100644 index 0000000000..b740f91970 --- /dev/null +++ b/doc/todo/recover_from_export_of_corrupted_object/comment_2_7ce55f8dbe9372085508cebc977587bd._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-17T18:30:06Z" + content=""" +In a non-export S3 bucket with versioning, fsck also cannot recover from a +corrupted object, due to the same problem with the versionId. The same +method should work to handle this case. +"""]]
comment
diff --git a/doc/todo/recover_from_export_of_corrupted_object/comment_1_33113d748bc7c35ef669c90f6b82d36a._comment b/doc/todo/recover_from_export_of_corrupted_object/comment_1_33113d748bc7c35ef669c90f6b82d36a._comment new file mode 100644 index 0000000000..28adbcfb41 --- /dev/null +++ b/doc/todo/recover_from_export_of_corrupted_object/comment_1_33113d748bc7c35ef669c90f6b82d36a._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-17T17:22:32Z" + content=""" +Note that it would also be possible for a valid object to be sent, but then +get corrupted in the remote storage. I don't think that's what happened here. + +If that did happen, a similar recovery process is also needed. + +Which I think says that focusing on a recovery process, rather than on +prevention, is more useful. +"""]]
update
diff --git a/doc/todo/recover_from_export_of_corrupted_object.mdwn b/doc/todo/recover_from_export_of_corrupted_object.mdwn index 706f88cc4d..9311547310 100644 --- a/doc/todo/recover_from_export_of_corrupted_object.mdwn +++ b/doc/todo/recover_from_export_of_corrupted_object.mdwn @@ -2,10 +2,16 @@ This is a case where a truncated file was exported as part of a tree to S3. In particular a bucket with `versioning=yes`. +Note that `git-annex export` does not verify checksums before sending, and +so it's possible for this to happen if a corrupted object has somehow +gotten into the local repository. It might be possible to improve this to +deal better with object corruption, including object corruption that occurs +while exporting. + Currently there is no good way for a user to recover from this. Exporting a tree that deletes the corrupted file, followed by a tree that adds back the -right version of the file will generally work. It will not work for a -versioned S3 bucket though, because removing an export from a versioned S3 +right version of the file will generally work. But it will not work for a +versioned S3 bucket, because removing an export from a versioned S3 bucket does not remove the recorded S3 versionId. While re-exporting the file will record the new versionId, the old one remains recorded, and when multiple versionIds are recorded for the same key, either may be used when
add
diff --git a/doc/todo/recover_from_export_of_corrupted_object.mdwn b/doc/todo/recover_from_export_of_corrupted_object.mdwn new file mode 100644 index 0000000000..706f88cc4d --- /dev/null +++ b/doc/todo/recover_from_export_of_corrupted_object.mdwn @@ -0,0 +1,33 @@ +<https://github.com/OpenNeuroOrg/openneuro/issues/3446#issuecomment-2892398583> +This is a case where a truncated file was exported as part of a tree to S3. +In particular a bucket with `versioning=yes`. + +Currently there is no good way for a user to recover from this. Exporting a +tree that deletes the corrupted file, followed by a tree that adds back the +right version of the file will generally work. It will not work for a +versioned S3 bucket though, because removing an export from a versioned S3 +bucket does not remove the recorded S3 versionId. While re-exporting the +file will record the new versionId, the old one remains recorded, and when +multiple versionIds are recorded for the same key, either may be used when +retrieving it. + +What needs to be done is to remove the old versionId. But it does not seem +right to generally do this when removing an exported file from a S3 bucket, +because usually, when it's not corrupted, that versionId is still valid, +and can still be used to retrieve that object. + +`git-annex fsck --from=s3` will detect the problem, but it is unable to do +anything to resolve it, since it can only try to drop the corrupted key, +and dropping by key is not supported with an exporttree=yes remote. + +Could fsck be extended to handle this? It should be possible for fsck to: + +1. removeExport the corrupted file, and update the export log to say that + the export of the tree to the special remote is incomplete. +2. Handle the special case of the versioned S3 bucket with eg, a new Remote + method that is used when a key on the remote is corrupted. In the case + of a versioned S3 bucket, that new method would remove the versionId. + +--[[Joey]] + +[[!tag projects/openneuro]]
Added a comment
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_4_cd1cc39065715a35924f6bdfb11cbbc5._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_4_cd1cc39065715a35924f6bdfb11cbbc5._comment new file mode 100644 index 0000000000..d144cc1818 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_4_cd1cc39065715a35924f6bdfb11cbbc5._comment @@ -0,0 +1,35 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 4" + date="2025-12-16T20:33:56Z" + content=""" +yes -- that one is embargoed (can be seen by going to https://dandiarchive.org/dandiset/000675) + +> And when you replicated the problem from the backup, were you using it in the configuration where it cannot access those? + +if I got the question right and since I do not recall now -- judging from me using `( source .git/secrets.env; git-annex import master...` I think I was with credentials allowing to access them (hence no errors while importing) + +> Do you have annex.largefiles configured in this repository, and are all of the affected files non-annexed files? + +yes + +``` +(venv-annex) dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ grep largefiles .gitattributes +**/.git* annex.largefiles=nothing +* annex.largefiles=((mimeencoding=binary)and(largerthan=0)) +``` + +and it seems all go into git + +``` +(venv-annex) dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git annex list +here +|s3-dandiarchive (untrusted) +||web +|||bittorrent +|||| +``` +is empty + +"""]]
Added a comment
diff --git a/doc/bugs/assistant_does_not_add_some_of_the___40__renamed__41___files/comment_3_123b3b033b230140739a5e60ce2a8974._comment b/doc/bugs/assistant_does_not_add_some_of_the___40__renamed__41___files/comment_3_123b3b033b230140739a5e60ce2a8974._comment new file mode 100644 index 0000000000..80b11489fb --- /dev/null +++ b/doc/bugs/assistant_does_not_add_some_of_the___40__renamed__41___files/comment_3_123b3b033b230140739a5e60ce2a8974._comment @@ -0,0 +1,61 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 3" + date="2025-12-16T18:06:34Z" + content=""" +yes -- small files, go to git + +no, it is a small number of files created/renamed. In this case it is a set of 4 files [pre-created empty and closed](https://github.com/con/duct/blob/36944dafd4b555a59355ac56f93f550b375bb733/src/con_duct/duct_main.py#L831), and then 3 out of 4 opened for writing by duct and at the end of the process closed, and that original 1 (`_info.json`) is [reopened for writing to dump the record and closed](https://github.com/con/duct/blob/v0.8.0/src/con_duct/__main__.py#L1072-L1074). Then outside tool which ran it takes all of them and renames into the filename with end timestamp. git-annex manages to detect that original 0-sized `_info.json` one gets removed but does not pick up the new one which gets rapidly renamed into a longer name. + +In git log looks like: + +``` +commit 65e9f13a882ef78d743fbe634c8e05f9dcb32c45 +Author: ReproStim User <changeme@example.com> +Date: Tue Dec 16 09:44:30 2025 -0500 + + git-annex in reprostim@reproiner:/data/reprostim + + Videos/2025/12/2025.12.16-09.30.29.570--.mkv.duct_info.json | 0 + Videos/2025/12/2025.12.16-09.30.29.570--2025.12.16-09.44.28.225.mkv | 1 + + Videos/2025/12/2025.12.16-09.30.29.570--2025.12.16-09.44.28.225.mkv.duct_usage.json | 1 + + Videos/2025/12/2025.12.16-09.30.29.570--2025.12.16-09.44.28.225.mkv.log | 1 + + 4 files changed, 3 insertions(+) + +commit 3fe4710fc058e7d1433637c9af538b3bb9e5ebed +Author: ReproStim User <changeme@example.com> +Date: Tue Dec 16 09:30:31 2025 -0500 + + git-annex in reprostim@reproiner:/data/reprostim + + Videos/2025/12/2025.12.16-09.30.29.570--.mkv.duct_info.json | 0 + 1 file changed, 0 insertions(+), 0 deletions(-) + +commit f6bb6137c81ef36387ded229a4d8592964530bc8 +Author: ReproStim User <changeme@example.com> +Date: Tue Dec 16 09:30:23 2025 -0500 + + git-annex in reprostim@reproiner:/data/reprostim + + Videos/2025/12/2025.12.16-09.29.32.681--.mkv.duct_info.json | 0 + Videos/2025/12/2025.12.16-09.29.32.681--2025.12.16-09.30.21.889.mkv | 1 + + Videos/2025/12/2025.12.16-09.29.32.681--2025.12.16-09.30.21.889.mkv.duct_usage.json | 1 + + Videos/2025/12/2025.12.16-09.29.32.681--2025.12.16-09.30.21.889.mkv.log | 1 + + 4 files changed, 3 insertions(+) + +commit 00444920167e17b429d10fa29df8f1947930152c +Author: ReproStim User <changeme@example.com> +Date: Tue Dec 16 09:29:34 2025 -0500 + + git-annex in reprostim@reproiner:/data/reprostim + + Videos/2025/12/2025.12.16-09.29.32.681--.mkv.duct_info.json | 0 + 1 file changed, 0 insertions(+), 0 deletions(-) + +``` + + +Here is a copy of current process: https://www.oneukrainian.com/tmp/daemon-20251216.log + +"""]]
Added a comment: passing additional flags to rclone
diff --git a/doc/special_remotes/rclone/comment_8_f97d711efbb38e122150557832a8aa2e._comment b/doc/special_remotes/rclone/comment_8_f97d711efbb38e122150557832a8aa2e._comment new file mode 100644 index 0000000000..964e5ea98e --- /dev/null +++ b/doc/special_remotes/rclone/comment_8_f97d711efbb38e122150557832a8aa2e._comment @@ -0,0 +1,31 @@ +[[!comment format=mdwn + username="nadir" + avatar="http://cdn.libravatar.org/avatar/2af9174cf6c06de802104d632dc40071" + subject="passing additional flags to rclone" + date="2025-12-15T20:42:38Z" + content=""" +I'm trying to pass additional flags to rclone, like `--bwlimit` for example. Not sure how to do that, though. The `--whatelse` flag tells me they should just be passed by default: + +``` +> git annex initremote hetzner type=rclone rcloneremotename=hetzner rcloneprefix=someprefix encryption=shared chunk=500MiB --whatelse +embedcreds + embed credentials into git repository + (yes or no) +onlyencryptcreds + only encrypt embedded credentials, not annexed files + (yes or no) +mac + how to encrypt filenames used on the remote + (HMACSHA1 or HMACSHA224 or HMACSHA256 or HMACSHA384 or HMACSHA512) +keyid + gpg key id +keyid+ + add additional gpg key +keyid- + remove gpg key +* + all other parameters are passed to rclone +``` + +I tried `--bwlimit 3000` and `bwlimit=3000`, but that gives me `invalid option` plus help text or `git-annex: Unexpected parameters: bwlimit` respectively. +"""]]
diff --git a/doc/forum/special_remote_to___34__batch_archive__34_____40__to_tapes__41____63__.mdwn b/doc/forum/special_remote_to___34__batch_archive__34_____40__to_tapes__41____63__.mdwn new file mode 100644 index 0000000000..f04be05401 --- /dev/null +++ b/doc/forum/special_remote_to___34__batch_archive__34_____40__to_tapes__41____63__.mdwn @@ -0,0 +1,8 @@ +Ultimate goal is to backup [dandiarchive](https://dandiarchive.org/) (currently about 800TB but grows) to [NESE tapes](https://nese.readthedocs.io/en/latest/user-docs.html#nese-tape). NESE tapes service expects transfer via globus and + +> Files stored on NESE Tape should ideally be between 1 GiB and 1 TiB. Please consider creating tarballs of these target sizes before sending data via Globus to NESE Tape. + +All our dandisets range in sizes of their files from KBs to GBs, and already present in git/git-annex'es at e.g. https://github.com/dandisets . Previously, we abused Dropbox via an rclone shared special remote, but that one is gone now. So I wonder what could be our setup here to most seamlessly and "automatically" batch archive across a range of git/git-annex repos into the same "shared" globus space tarballs. + +Any ideas on the setup would be appreciated. +
add news item for git-annex 10.20251215
diff --git a/doc/news/version_10.20250828.mdwn b/doc/news/version_10.20250828.mdwn deleted file mode 100644 index 9dead20771..0000000000 --- a/doc/news/version_10.20250828.mdwn +++ /dev/null @@ -1,38 +0,0 @@ -git-annex 10.20250828 released with [[!toggle text="these changes"]] -[[!toggleable text=""" * p2p: Added --enable option, which can be used to enable P2P networks - provided by external commands git-annex-p2p-<netname> - * Added git-remote-p2p-annex, which allows git pull and push to - P2P networks provided by commands git-annex-p2p-<netname> - * S3: Default to signature=v4 when using an AWS endpoint, since some - AWS regions need v4 and all support it. When host= is used to specify - a different S3 host, the default remains signature=v2. - * webapp: Support setting up S3 buckets in regions that need v4 - signatures. - * S3: When initremote is given the name of a bucket that already exists, - automatically set datacenter to the right value, rather than needing it - to be explicitly set. - * info: Added --show option to pick which parts of the info to calculate - and display. - * Improve behavior when there are special remotes configured with - autoenable=yes with names that conflict with other remotes. - * adjust: When another branch has been manually merged into the adjusted - branch, re-adjusting errors out, rather than losing that merge commit. - * sync: When another branch has been manually merged into an adjusted - branch, error out rather than only displaying a warning. - * initremote: New which can be used along with - embedcreds=yes, to only encrypt the embedded creds, without encrypting - the content of the special remote. Useful for exporttree/importtree - remotes. - * Don't allow the type of encryption of an existing special remote to be - changed. Fixes reversion introduced in version 7.20191230. - * tahoe: Support tahoe-lafs command versions newer than 1.16. - * tahoe: Fix bug that made initremote require an encryption= parameter, - despite git-annex encryption not being used with this special remote. - Fixes reversion introduced in version 7.20191230. - * Improved error message when yt-dlp is not installed and is needed to - get a file from the web. - * The annex.youtube-dl-command git config is no longer used, git-annex - always runs the yt-dlp command, rather than the old youtube-dl command. - * Removed support for git versions older than 2.22. - * Bump aws build dependency to 0.24.1. - * stack.yaml: Update to lts-24.2."""]] \ No newline at end of file diff --git a/doc/news/version_10.20251215.mdwn b/doc/news/version_10.20251215.mdwn new file mode 100644 index 0000000000..e6aa18e260 --- /dev/null +++ b/doc/news/version_10.20251215.mdwn @@ -0,0 +1,16 @@ +git-annex 10.20251215 released with [[!toggle text="these changes"]] +[[!toggleable text=""" * Added annex.trashbin configuration. + * Added --presentsince, --lackingsince, and --changedsince file + matching options. + * Added TRANSFER-RETRIEVE-URL extension to the external special remote + protocol. + * S3: Remote can be configured with an x-amz-tagging header. + (Needs aws-0.25) + * S3: Support Google Cloud Storage + (Needs aws-0.25.1) + * S3: Support restore=yes, when used with storageclass=DEEP\_ARCHIVE and + similar. This is equivilant to the now deprecated Amazon Glacier. + (Needs aws-0.25.2) + * Add a build warning when the version of aws being built against is + too old to support all features. + * stack.yaml: Use aws-0.25.2."""]] \ No newline at end of file
close non-bug
diff --git a/doc/bugs/Walrus_storage_backend.mdwn b/doc/bugs/Walrus_storage_backend.mdwn index c5b1168c15..f9ca1ebe60 100644 --- a/doc/bugs/Walrus_storage_backend.mdwn +++ b/doc/bugs/Walrus_storage_backend.mdwn @@ -18,3 +18,5 @@ The whole infrastructure, allow to build a decentralized annex cloud storage, wh ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) I love git annex, works like a charm. Using it for 5+ years + +> [[notabug|done]] --[[Joey]] diff --git a/doc/bugs/Walrus_storage_backend/comment_1_9f32df0a59959cc8fe570b65093d8e41._comment b/doc/bugs/Walrus_storage_backend/comment_1_9f32df0a59959cc8fe570b65093d8e41._comment new file mode 100644 index 0000000000..719d8c73cc --- /dev/null +++ b/doc/bugs/Walrus_storage_backend/comment_1_9f32df0a59959cc8fe570b65093d8e41._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-15T17:33:31Z" + content=""" +This is not a bug. While it could be moved to [[todo]], anyone can write an +external special remote to use this or any other storage system. + +So I am closing this bug report. +"""]]
respond and close
diff --git a/doc/bugs/S3_fails_with_v4_signing.mdwn b/doc/bugs/S3_fails_with_v4_signing.mdwn index 4713d454fe..37aecd8822 100644 --- a/doc/bugs/S3_fails_with_v4_signing.mdwn +++ b/doc/bugs/S3_fails_with_v4_signing.mdwn @@ -43,3 +43,9 @@ initremote: 1 failed ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) Yep. It's a great little tool; up till now always local network + rsync. + +> Pass signature=v4 to `git-annex initremotr` to use v4 signing. +> This has been supported for years. +> +> That actually became the default for S3 in version +> 10.20250828. [[done]] --[[Joey]]
formatting
diff --git a/doc/todo/Delayed_drop_from_remote/comment_6_6c4078bb00d6c4cab4fbba0bc7f3ad29._comment b/doc/todo/Delayed_drop_from_remote/comment_6_6c4078bb00d6c4cab4fbba0bc7f3ad29._comment index 1ef22c6293..294e7178f4 100644 --- a/doc/todo/Delayed_drop_from_remote/comment_6_6c4078bb00d6c4cab4fbba0bc7f3ad29._comment +++ b/doc/todo/Delayed_drop_from_remote/comment_6_6c4078bb00d6c4cab4fbba0bc7f3ad29._comment @@ -6,7 +6,7 @@ Actually I have gone ahead an implemented some [[git-annex-matching-options]] that will be useful in finding content to drop from the trashbin: -`--presentsince --lackingsince `--changedsince` +`--presentsince --lackingsince --changedsince` You might use, for example:
--presentsince, --lackingsince, and --changedsince
There are some complications around vector clocks, which basically come
down to, when the clocks of a repository has been wrong, these won't
reflect actual real times. Which I think is too obvious to document to the
user.
The possibility of a distributed system updating the location log
unncessarily does seem like something a user might get confused by. It
would be a fairly rare situation that caused it though. For example
`git-annex fsck --from remote` will update the location logs for changes
it finds, and when the git-annex branch is out of sync with the remote,
will make unncessary updates. Once those get synced to the remote,
in that repository eg --presentsince=here:interval will only see the
most recent log entry, which is at a later point in time than when
the content was actually present in the repository. It would be good to
document this perhaps, but it's a rather complicated and unusual situation.
There are some complications around vector clocks, which basically come
down to, when the clocks of a repository has been wrong, these won't
reflect actual real times. Which I think is too obvious to document to the
user.
The possibility of a distributed system updating the location log
unncessarily does seem like something a user might get confused by. It
would be a fairly rare situation that caused it though. For example
`git-annex fsck --from remote` will update the location logs for changes
it finds, and when the git-annex branch is out of sync with the remote,
will make unncessary updates. Once those get synced to the remote,
in that repository eg --presentsince=here:interval will only see the
most recent log entry, which is at a later point in time than when
the content was actually present in the repository. It would be good to
document this perhaps, but it's a rather complicated and unusual situation.
diff --git a/CHANGELOG b/CHANGELOG
index b78fc604bb..4fb5f7d5cc 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -11,6 +11,8 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
(Needs aws-0.25.2)
* stack.yaml: Use aws-0.25.2.
* Added annex.trashbin configuration.
+ * Added --presentsince, --lackingsince, and --changedsince file
+ matching options.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/CmdLine/GitAnnex/Options.hs b/CmdLine/GitAnnex/Options.hs
index 4b44edda56..529935f237 100644
--- a/CmdLine/GitAnnex/Options.hs
+++ b/CmdLine/GitAnnex/Options.hs
@@ -381,6 +381,21 @@ keyMatchingOptions' =
<> help "match files accessed within a time interval"
<> hidden
)
+ , annexOption (setAnnexState . Limit.addPresentSince) $ strOption
+ ( long "presentsince" <> metavar paramValue
+ <> help "matches files present in a repository throughout a time interval"
+ <> hidden
+ )
+ , annexOption (setAnnexState . Limit.addLackingSince) $ strOption
+ ( long "lackingsince" <> metavar paramValue
+ <> help "matches files not present in a repository throughout a time interval"
+ <> hidden
+ )
+ , annexOption (setAnnexState . Limit.addChangedSince) $ strOption
+ ( long "changedsince" <> metavar paramValue
+ <> help "matches files whose presence changed during a time interval"
+ <> hidden
+ )
, annexOption (setAnnexState . Limit.addMimeType) $ strOption
( long "mimetype" <> metavar paramGlob
<> help "match files by mime type"
diff --git a/Limit.hs b/Limit.hs
index 1916a606d5..28ab1be653 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -910,6 +910,43 @@ addAccessedWithin duration = do
return $ delta <= secs
secs = fromIntegral (durationSeconds duration)
+addPresentSince :: String -> Annex ()
+addPresentSince = limitLocationDuration "presentsince"
+ (\k t -> loggedLocationsUnchangedSince k t (== InfoPresent))
+
+addLackingSince :: String -> Annex ()
+addLackingSince = limitLocationDuration "lackingsince"
+ (\k t -> loggedLocationsUnchangedSince k t (/= InfoPresent))
+
+addChangedSince :: String -> Annex ()
+addChangedSince = limitLocationDuration "changedsince"
+ (\k t -> loggedLocationsChangedAfter k t (const True))
+
+limitLocationDuration :: String -> (Key -> POSIXTime -> Annex [UUID]) -> String-> Annex ()
+limitLocationDuration desc getter s = do
+ u <- Remote.nameToUUID name
+ case parseDuration interval of
+ Left parseerr -> addLimit $ Left parseerr
+ Right duration ->
+ let check _notpresent key = do
+ now <- liftIO getPOSIXTime
+ let t = now - fromIntegral (durationSeconds duration)
+ us <- getter key t
+ return $ u `elem` us
+ in addLimit $ Right $ mkmatcher check
+ where
+ (name, interval) = separate (== ':') s
+ mkmatcher check = MatchFiles
+ { matchAction = const $ checkKey . check
+ , matchNeedsFileName = False
+ , matchNeedsFileContent = False
+ , matchNeedsKey = True
+ , matchNeedsLocationLog = True
+ , matchNeedsLiveRepoSize = False
+ , matchNegationUnstable = False
+ , matchDesc = desc =? s
+ }
+
lookupFileKey :: FileInfo -> Annex (Maybe Key)
lookupFileKey fi = case matchKey fi of
Just k -> return (Just k)
diff --git a/Logs/Location.hs b/Logs/Location.hs
index 2adcddd2e3..0f06cd738f 100644
--- a/Logs/Location.hs
+++ b/Logs/Location.hs
@@ -8,7 +8,7 @@
- Repositories record their UUID and the date when they --get or --drop
- a value.
-
- - Copyright 2010-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -23,6 +23,8 @@ module Logs.Location (
loggedLocations,
loggedPreviousLocations,
loggedLocationsHistorical,
+ loggedLocationsUnchangedSince,
+ loggedLocationsChangedAfter,
loggedLocationsRef,
isKnownKey,
checkDead,
@@ -53,6 +55,7 @@ import Git.Types (RefDate, Ref, Sha)
import qualified Annex
import Data.Time.Clock
+import Data.Time.Clock.POSIX
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import qualified Data.Set as S
@@ -74,6 +77,9 @@ logStatusAfter lu key a = ifM a
)
{- Log a change in the presence of a key's value in a repository.
+ -
+ - If the provided LogStatus is the same as what is currently in the log,
+ - the log is not updated.
-
- Cluster UUIDs are not logged. Instead, when a node of a cluster is
- logged to contain a key, loading the log will include the cluster's
@@ -98,7 +104,7 @@ loggedLocations :: Key -> Annex [UUID]
loggedLocations = getLoggedLocations presentLogInfo
{- Returns a list of repository UUIDs that the location log indicates
- - used to have the vale of a key, but no longer do.
+ - used to have the value of a key, but no longer do.
-}
loggedPreviousLocations :: Key -> Annex [UUID]
loggedPreviousLocations = getLoggedLocations notPresentLogInfo
@@ -107,6 +113,44 @@ loggedPreviousLocations = getLoggedLocations notPresentLogInfo
loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID]
loggedLocationsHistorical = getLoggedLocations . historicalLogInfo
+{- Returns a list of repository UUIDs that the location log indicates
+ - have had a matching LogStatus for a key that has not changed
+ - since the given time.
+ -
+ - This assumes that logs were written with a properly set clock.
+ -
+ - Note that, while logChange avoids updating the log with the same
+ - LogStatus that is already in it, there are distributed situations
+ - where the log for a repository does get updated redundantly,
+ - setting the same LogStatus that was already logged. When that has
+ - happened, this will treat it as the LogStatus having changed at the
+ - last time it was written.
+ -}
+loggedLocationsUnchangedSince :: Key -> POSIXTime -> (LogStatus -> Bool) -> Annex [UUID]
+loggedLocationsUnchangedSince key time matchstatus =
+ loggedLocationsMatchingTime key (<= time) matchstatus
+
+{- Similar to loggedLocationsSince, but lists repository UUIDs that
+ - have had a matching LogStatus recorded after the given time.
+ -}
+loggedLocationsChangedAfter :: Key -> POSIXTime -> (LogStatus -> Bool) -> Annex [UUID]
+loggedLocationsChangedAfter key time matchstatus =
+ loggedLocationsMatchingTime key (> time) matchstatus
+
+loggedLocationsMatchingTime :: Key -> (POSIXTime -> Bool) -> (LogStatus -> Bool) -> Annex [UUID]
+loggedLocationsMatchingTime key matchtime matchstatus = do
+ config <- Annex.getGitConfig
+ locs <- map (toUUID . fromLogInfo . info)
+ . filter (matchtime' . date)
+ . filter (matchstatus . status)
+ . compactLog
+ <$> readLog (locationLogFile config key)
+ clusters <- getClusters
+ return $ addClusterUUIDs clusters locs
+ where
+ matchtime' (VectorClock t) = matchtime t
+ matchtime' Unknown = False
+
{- Gets the locations contained in a git ref. -}
loggedLocationsRef :: Ref -> Annex [UUID]
loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref
diff --git a/doc/git-annex-matching-options.mdwn b/doc/git-annex-matching-options.mdwn
index cf964cc71d..5bfeb73f05 100644
--- a/doc/git-annex-matching-options.mdwn
+++ b/doc/git-annex-matching-options.mdwn
@@ -64,7 +64,7 @@ in either of two repositories.
The repository should be specified using the name of a configured remote,
or the UUID or description of a repository. For the current repository,
- use `--in=here`
+ use "here".
Note that this does not check remote repositories to verify that content
is still present on them. However, when checking the current repository,
@@ -224,8 +224,8 @@ in either of two repositories.
* `--accessedwithin=interval`
- Matches when the content was accessed recently, within the specified time
- interval.
(Diff truncated)
Added a comment
diff --git a/doc/install/FreeBSD/comment_5_6372a545cb3c4a5b20f713ef80a0b6d3._comment b/doc/install/FreeBSD/comment_5_6372a545cb3c4a5b20f713ef80a0b6d3._comment new file mode 100644 index 0000000000..709208745d --- /dev/null +++ b/doc/install/FreeBSD/comment_5_6372a545cb3c4a5b20f713ef80a0b6d3._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 5" + date="2025-12-12T10:46:12Z" + content=""" +FWIW, dynamically linked binary is no good either: + +``` +[yoh@dbic-mrinbox ~]$ wget https://downloads.kitenet.net/git-annex/linux/current/git-annex-standalone-amd64.tar.gz +[yoh@dbic-mrinbox ~]$ tar -xzvf git-annex-standalone-amd64.tar.gz +[yoh@dbic-mrinbox ~]$ cd git-annex.linux/ +[yoh@dbic-mrinbox ~/git-annex.linux]$ ls +LICENSE exe git-annex git-core git-remote-tor-annex lib logo_16x16.png templates +README extra git-annex-shell git-receive-pack git-shell lib64 magic trustedkeys.gpg +bin gconvdir git-annex-webapp git-remote-annex git-upload-pack libdirs runshell usr +buildid git git-annex.MANIFEST git-remote-p2p-annex i18n logo.svg shimmed +[yoh@dbic-mrinbox ~/git-annex.linux]$ ./git-annex +ELF binary type \"3\" not known. +exec: /usr/home/yoh/git-annex.linux/exe/git-annex: Exec format error + +``` + +I will try to assemble build commands later... +"""]]
note
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 5027412b50..c325adc9c1 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1327,6 +1327,9 @@ repository, using [[git-annex-config]]. See its man page for a list.) will not see a progress display for their drop action. So this is best used with a fast remote. + And, if the remote is not accessible, or a file fails to be moved to it, + the file will not be dropped from the repository. + * `annex.url` When a remote has a http url, the first time git-annex uses the remote
annex.trashbin
Note that, in the unlikely event that the reasoning in commit
5a081fc246664e7b5c17023dddfb8d123eef64e5 is wrong and there is some
situation where Annex.remotelist is not filled at a time when this is used,
the user will get back the "annex.trashbin is set to the name of an unknown
remote" error for a remote that does exist.
Note that, in the unlikely event that the reasoning in commit
5a081fc246664e7b5c17023dddfb8d123eef64e5 is wrong and there is some
situation where Annex.remotelist is not filled at a time when this is used,
the user will get back the "annex.trashbin is set to the name of an unknown
remote" error for a remote that does exist.
diff --git a/Annex/Content.hs b/Annex/Content.hs
index c113620cc9..876f526785 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -96,7 +96,7 @@ import Annex.ReplaceFile
import Annex.AdjustedBranch (adjustedBranchRefresh)
import Annex.DirHashes
import Messages.Progress
-import Types.Remote (RetrievalSecurityPolicy(..), VerifyConfigA(..))
+import Types.Remote (RetrievalSecurityPolicy(..), VerifyConfigA(..), name, storeKey, uuid)
import Types.NumCopies
import Types.Key
import Types.Transfer
@@ -779,7 +779,8 @@ unlinkAnnex key = do
{- Removes a key's file from .git/annex/objects/ -}
removeAnnex :: Annex [Remote] -> ContentRemovalLock -> Annex ()
-removeAnnex remotelist (ContentRemovalLock key) = withObjectLoc key $ \file ->
+removeAnnex remotelist (ContentRemovalLock key) = withObjectLoc key $ \file -> do
+ putouttrash
cleanObjectLoc key $ do
secureErase file
liftIO $ removeWhenExistsWith removeFile file
@@ -800,6 +801,20 @@ removeAnnex remotelist (ContentRemovalLock key) = withObjectLoc key $ \file ->
-- removal process, so thaw it.
, void $ tryIO $ thawContent file
)
+
+ putouttrash = annexTrashbin <$> Annex.getGitConfig >>= \case
+ Nothing -> return ()
+ Just trashbin -> do
+ rs <- remotelist
+ putouttrash' trashbin rs
+
+ putouttrash' _ [] = giveup "annex.trashbin is set to the name of an unknown remote"
+ putouttrash' trashbin (r:rs)
+ | name r == trashbin = do
+ catchNonAsync (storeKey r key (AssociatedFile Nothing) Nothing nullMeterUpdate)
+ (\ex -> giveup $ "Failed to move to annex.trashbin remote; unable to drop " ++ show ex)
+ logChange NoLiveUpdate key (uuid r) InfoPresent
+ | otherwise = putouttrash' trashbin rs
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
- returns the file it was moved to. -}
diff --git a/CHANGELOG b/CHANGELOG
index 3de0bf454d..b78fc604bb 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,7 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
similar. This is equivilant to the now deprecated Amazon Glacier.
(Needs aws-0.25.2)
* stack.yaml: Use aws-0.25.2.
+ * Added annex.trashbin configuration.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index 81d8201ed3..4303c09961 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -158,6 +158,7 @@ data GitConfig = GitConfig
, annexAdjustedBranchRefresh :: Integer
, annexSupportUnlocked :: Bool
, annexAssistantAllowUnlocked :: Bool
+ , annexTrashbin :: Maybe RemoteName
, coreSymlinks :: Bool
, coreSharedRepository :: SharedRepository
, coreQuotePath :: QuotePath
@@ -283,6 +284,7 @@ extractGitConfig configsource r = GitConfig
(getmayberead (annexConfig "adjustedbranchrefresh"))
, annexSupportUnlocked = getbool (annexConfig "supportunlocked") True
, annexAssistantAllowUnlocked = getbool (annexConfig "assistant.allowunlocked") False
+ , annexTrashbin = getmaybe "annex.trashbin"
, coreSymlinks = getbool "core.symlinks" True
, coreSharedRepository = getSharedRepository r
, coreQuotePath = QuotePath (getbool "core.quotepath" True)
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 747bb1eb7f..5027412b50 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -1318,6 +1318,15 @@ repository, using [[git-annex-config]]. See its man page for a list.)
After changing this config, you need to re-run `git-annex init` for it
to take effect.
+* `annex.trashbin`
+
+ When this is set to the name of a remote, files that are dropped from the
+ repository will be moved to that remote.
+
+ Note that, if it takes a long time to move a file to the remote, the user
+ will not see a progress display for their drop action. So this is best
+ used with a fast remote.
+
* `annex.url`
When a remote has a http url, the first time git-annex uses the remote
diff --git a/doc/todo/Delayed_drop_from_remote.mdwn b/doc/todo/Delayed_drop_from_remote.mdwn
index dd2d26bd4e..9c9e6b0ff3 100644
--- a/doc/todo/Delayed_drop_from_remote.mdwn
+++ b/doc/todo/Delayed_drop_from_remote.mdwn
@@ -9,3 +9,5 @@ The point is to have a fast path to recovery from over-eager dropping that might
Or maybe something like this exists already...
[[!tag projects/ICE4]]
+
+> [[done]] --[[Joey]]
diff --git a/doc/todo/Delayed_drop_from_remote/comment_5_94a46f515a4e6df7d8d7855e0bfb7de5._comment b/doc/todo/Delayed_drop_from_remote/comment_5_94a46f515a4e6df7d8d7855e0bfb7de5._comment
new file mode 100644
index 0000000000..cd55a59f8b
--- /dev/null
+++ b/doc/todo/Delayed_drop_from_remote/comment_5_94a46f515a4e6df7d8d7855e0bfb7de5._comment
@@ -0,0 +1,20 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 5"""
+ date="2025-12-11T19:25:23Z"
+ content="""
+annex.trashbin is implemented.
+
+I am going to close this todo; if it turns out there is some preferred
+content improvement that would help with cleaning out the trash, let's talk
+about that on a new todo. But I'm guessing you'll make do with `find`.
+
+> I think I would deliberately want this to be invisible to the user, since I wouldn't want anyone to actively start relying on it.
+
+With a private remote it's reasonably invisible. The very observant user
+might notice a drop time that scales with the size of the file being
+dropped and be able to guess this feature is being used. And, if there is
+some error when it tries to move the object to the remote, the drop will
+fail. The error message in that case cannot really obscure the fact that
+annex.trashbin is configured.
+"""]]
comments
diff --git a/doc/install/FreeBSD/comment_4_65c9fdcc54924ab064c78f9436924191._comment b/doc/install/FreeBSD/comment_4_65c9fdcc54924ab064c78f9436924191._comment new file mode 100644 index 0000000000..5f2a1c94ea --- /dev/null +++ b/doc/install/FreeBSD/comment_4_65c9fdcc54924ab064c78f9436924191._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2025-12-11T14:56:30Z" + content=""" +I don't know much about the static-annex builds, but you may have better +luck with the [[Linux_standalone]] builds due to their using a more +conventional libc. + +Building git-annex from source is not hard if you can get the stack tool +installed. It looks like the only currently supported way to do that as a +freebsd user is to install <https://www.haskell.org/ghcup/> which includes +stack. Then follow the [[fromsource]] section on "building from source with +stack". +"""]] diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_4_7fe8f0b860a765f3bfb9da7f5d61f8c8._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_4_7fe8f0b860a765f3bfb9da7f5d61f8c8._comment new file mode 100644 index 0000000000..6ee3bfde1a --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_4_7fe8f0b860a765f3bfb9da7f5d61f8c8._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2025-12-11T14:42:10Z" + content=""" +> IIRC user can just push `git-annex` branch directly after `git-annex` merging remote version locally, right? + +Sure, but my point was that they would have to change their workflow due to +a change on the server that might not be visible to them. Violating least +surprise. +"""]]
remove accidentially added file
diff --git a/doc/.git-annex.mdwn.swp b/doc/.git-annex.mdwn.swp deleted file mode 100644 index 704713d9ce..0000000000 Binary files a/doc/.git-annex.mdwn.swp and /dev/null differ
Added a comment
diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_3_a9c504e7cd8080158fd68b4bcaa90e26._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_3_a9c504e7cd8080158fd68b4bcaa90e26._comment new file mode 100644 index 0000000000..3cfbce29d5 --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_3_a9c504e7cd8080158fd68b4bcaa90e26._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 3" + date="2025-12-11T13:33:27Z" + content=""" +> In that example, the git-annex branch is not pushed to origin after annexed files are sent to it. So how does git-annex on otherhost know that origin has those files? Well, git-annex-shell, when receiving the files, updates the git-annex branch in origin. + + +IIRC user can just push `git-annex` branch directly after `git-annex` merging remote version locally, right? + +> Making it read-only would somewhat limit the exposure to all these problems, but if it's read-only, how would any annex objects get into the remote repository in the first place? + +my use-case at hands: I manipulate git-annex repo on a linux box on an NFS mount and the original one is freebsd box with bare minimal installation. I have about 50 datasets in a hierarchy. I wanted to backup to another location and it would be more performant to talk to the original freebsd server directly instead of going through NFS mount. I [can't install git-annex on that freebsd box ATM](https://git-annex.branchable.com/install/FreeBSD/#comment-38d4cc2a1e1deb696447cc0a9e149e77). + +FWIW, on a second thought, given that I do have a workaround with `rsync` (verified that it works) and unless another more prominent usecase arrives, might be indeed not worth the hassle. + +"""]]
Added a comment
diff --git a/doc/install/FreeBSD/comment_3_369afac17cc75bec4584f3525f0c2826._comment b/doc/install/FreeBSD/comment_3_369afac17cc75bec4584f3525f0c2826._comment new file mode 100644 index 0000000000..a1d5697c9b --- /dev/null +++ b/doc/install/FreeBSD/comment_3_369afac17cc75bec4584f3525f0c2826._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 3" + date="2025-12-11T12:20:27Z" + content=""" +don't know much about freebsd but static builds from https://git.kyleam.com/static-annex do not work: + +```shell +[yoh@dbic-mrinbox ~/git-annex-10.20250828]$ bin/git-annex +ELF binary type \"0\" not known. +bash: bin/git-annex: cannot execute binary file: Exec format error +[yoh@dbic-mrinbox ~/git-annex-10.20250828]$ file bin/git-annex +bin/git-annex: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), statically linked, BuildID[sha1]=a6f7f36778ade374ef6572c787cacf6ffa2ec78d, with debug_info, not stripped + +``` +"""]]
comment
diff --git a/doc/.git-annex.mdwn.swp b/doc/.git-annex.mdwn.swp new file mode 100644 index 0000000000..704713d9ce Binary files /dev/null and b/doc/.git-annex.mdwn.swp differ diff --git a/doc/install/FreeBSD/comment_2_36a9e11d3140b892c4ff334387567eab._comment b/doc/install/FreeBSD/comment_2_36a9e11d3140b892c4ff334387567eab._comment new file mode 100644 index 0000000000..cfbe1dde1e --- /dev/null +++ b/doc/install/FreeBSD/comment_2_36a9e11d3140b892c4ff334387567eab._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-10T18:15:59Z" + content=""" +Doesn't FreeBSD support emulating linux syscalls? I suspect that the linux +standalone tarball could be used to install git-annex on user-space on +FreeBSD and work that way. Have not tried it maybe there is a better way, +to install a FreeBSD port as a regular user. +"""]]
comment
diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_2_a8feba19f86aeb6d3b76266051b8bebb._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_2_a8feba19f86aeb6d3b76266051b8bebb._comment new file mode 100644 index 0000000000..8191356794 --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_2_a8feba19f86aeb6d3b76266051b8bebb._comment @@ -0,0 +1,47 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-10T17:54:01Z" + content=""" +As for the idea that git-annex could access a remote without +git-annex-shell, I think that any efforts in this area are bound to end up +with some partial implementation of a quarter of git-annex-shell in shell +script, which is bound to not work as well as the real thing. + +Consider that this is a supported workflow: + + git push origin master + git-annex copy --to origin + + ssh otherhost + cd repo + git pull origin + git-annex get + +In that example, the git-annex branch is not pushed to origin after annexed +files are sent to it. So how does git-annex on otherhost know that origin +has those files? Well, git-annex-shell, when receiving the files, updates +the git-annex branch in origin. + +So, to support this workflow, the git-annex-shell reimplementation in shell +would need to update the git-annex branch. That's about 3000 lines of code +in git-annex, with complecations including concurrency, making it fast, +etc. + +Other complications include supporting different repository versions, +populating unlocked files, supporting configs like +annex.secure-erase-command, etc. And while any of these could be left out +an be documented as limitations of not having git-annex installed, I think +the real kicker is that this is behavior what would occur even if git-annex +is only *temporarily* not installed. So there's the risk that any user who +is having a bad PATH day suddenly gets a weird behavior. + +Making it read-only would somewhat limit the exposure to all these +problems, but if it's read-only, how would any annex objects get into the +remote repository in the first place? + +Using a separate special remote seems much cleaner. Then it's only used if +you choose to use it. And it works like any other special remote. +The rsync special remote is close enough to work, but a more special-purpose +one could support things a bit better. +"""]]
comment
diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_1_5f9c75b6aa0a50634ff4004b89c3fe12._comment b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_1_5f9c75b6aa0a50634ff4004b89c3fe12._comment new file mode 100644 index 0000000000..8536bfecd0 --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_/comment_1_5f9c75b6aa0a50634ff4004b89c3fe12._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-10T17:39:01Z" + content=""" +It's actually possible to use a rsync special remote to fetch objects right +out of `.git/annex/objects/`. For example: + + git-annex initremote foo-rsync type=rsync encryption=none rsyncurl=example.com:/path/to/repo/.git/annex/objects/ --sameas=foo + +Since the default hash directory paths are different for rsync than for a +git-annex repository, getting an object will first try the wrong hash path, +which does lead to rsync complaining to stderr. But then it will fall back +to a hash path that works. + +Sending an object to the rsync special remote will store it in a hash path +different from the one that git-annex usually uses. So later switching to using +git-annex in that repository will result in some unusual behavior, since +it won't see some files that were put there. `git-annex fsck` will actually +recover from this too, eg: + + fsck newfile (normalizing object location) (checksum...) ok + +There are enough problems that I can't really recommend this, +it just seemed worth pointing out that it can be done. +"""]]
fix example output
diff --git a/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn b/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn
index cac81d06b4..1a080c0a4b 100644
--- a/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn
+++ b/doc/tips/using_Amazon_S3_with_DEEP_ARCHIVE_and_GLACIER.mdwn
@@ -38,7 +38,7 @@ Now the remote can be used like any other remote.
But, when you try to get a file out of S3, it'll start a restore:
# git annex get my_cool_big_file
- get my_cool_big_file (from s3...) (gpg)
+ get my_cool_big_file (from mys3...) (gpg)
Restore initiated, try again later.
failed
S3: support restore=yes
When used with GLACIER, this is similar to Amazon Glacier, which is
now depreacted by Amazon. It can also be used with other storage classes
like DEEP_ARCHIVE and lifecycle rules. Which is why it's a separate config.
Also added some associated git configs.
This needs aws-0.25.2.
Sponsored-by: Brock Spratlen on Patreon
When used with GLACIER, this is similar to Amazon Glacier, which is
now depreacted by Amazon. It can also be used with other storage classes
like DEEP_ARCHIVE and lifecycle rules. Which is why it's a separate config.
Also added some associated git configs.
This needs aws-0.25.2.
Sponsored-by: Brock Spratlen on Patreon
diff --git a/CHANGELOG b/CHANGELOG
index 97d64583f1..3de0bf454d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,9 +4,12 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
(Needs aws-0.25)
* Add a build warning when the version of aws being built against is
too old. 0.25.1 is needed to support Google Cloud Storage.
- * stack.yaml: Use aws-0.25.1.
* Added TRANSFER-RETRIEVE-URL extension to the external special remote
protocol.
+ * S3: Support restore=yes, when used with storageclass=DEEP_ARCHIVE and
+ similar. This is equivilant to the now deprecated Amazon Glacier.
+ (Needs aws-0.25.2)
+ * stack.yaml: Use aws-0.25.2.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/Remote/S3.hs b/Remote/S3.hs
index e8401d80ef..002cdc1958 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -12,8 +12,8 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE CPP #-}
-#if ! MIN_VERSION_aws(0,25,1)
-#warning Building with an old version of the aws library. Recommend updating to 0.25.1, which fixes bugs and is needed for some features.
+#if ! MIN_VERSION_aws(0,25,2)
+#warning Building with an old version of the aws library. Recommend updating to 0.25.2, which fixes bugs and is needed for some features.
#endif
module Remote.S3 (remote, iaHost, configIA, iaItemUrl) where
@@ -96,6 +96,8 @@ remote = specialRemoteType $ RemoteType
(FieldDesc "part size for multipart upload (eg 1GiB)")
, optionalStringParser storageclassField
(FieldDesc "storage class, eg STANDARD or STANDARD_IA or ONEZONE_IA")
+ , yesNoParser restoreField (Just False)
+ (FieldDesc "enable restore of files not currently accessible in the bucket")
, optionalStringParser fileprefixField
(FieldDesc "prefix to add to filenames in the bucket")
, yesNoParser versioningField (Just False)
@@ -151,7 +153,10 @@ storageclassField = Accepted "storageclass"
fileprefixField :: RemoteConfigField
fileprefixField = Accepted "fileprefix"
-
+
+restoreField :: RemoteConfigField
+restoreField = Accepted "restore"
+
publicField :: RemoteConfigField
publicField = Accepted "public"
@@ -208,7 +213,7 @@ gen r u rc gc rs = do
where
new c cst info hdl magic = Just $ specialRemote c
(store hdl this info magic)
- (retrieve hdl rs c info)
+ (retrieve gc hdl rs c info)
(remove hdl this info)
(checkKey hdl rs c info)
this
@@ -432,14 +437,14 @@ storeHelper info h magic f object p = liftIO $ case partSize info of
{- Implemented as a fileRetriever, that uses conduit to stream the chunks
- out to the file. Would be better to implement a byteRetriever, but
- that is difficult. -}
-retrieve :: S3HandleVar -> RemoteStateHandle -> ParsedRemoteConfig -> S3Info -> Retriever
-retrieve hv rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
+retrieve :: RemoteGitConfig -> S3HandleVar -> RemoteStateHandle -> ParsedRemoteConfig -> S3Info -> Retriever
+retrieve gc hv rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
Right h ->
eitherS3VersionID info rs c k (T.pack $ bucketObject info k) >>= \case
Left failreason -> do
warning (UnquotedString failreason)
giveup "cannot download content"
- Right loc -> retrieveHelper info h loc f p iv
+ Right loc -> retrieveHelper gc info h loc f p iv
Left S3HandleNeedCreds ->
getPublicWebUrls' rs info c k >>= \case
Left failreason -> do
@@ -448,17 +453,44 @@ retrieve hv rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
Right us -> unlessM (withUrlOptions Nothing $ downloadUrl False k p iv us f) $
giveup "failed to download content"
-retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()
-retrieveHelper info h loc f p iv = retrieveHelper' h f p iv $
+retrieveHelper :: RemoteGitConfig -> S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()
+retrieveHelper gc info h loc f p iv = retrieveHelper' gc info h f p iv $
case loc of
Left o -> S3.getObject (bucket info) o
Right (S3VersionID o vid) -> (S3.getObject (bucket info) o)
{ S3.goVersionId = Just vid }
-retrieveHelper' :: S3Handle -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> S3.GetObject -> Annex ()
-retrieveHelper' h f p iv req = liftIO $ runResourceT $ do
- S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle h req
+retrieveHelper' :: RemoteGitConfig -> S3Info -> S3Handle -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> S3.GetObject -> Annex ()
+retrieveHelper' gc info h f p iv req = liftIO $ runResourceT $ do
+ S3.GetObjectResponse { S3.gorResponse = rsp } <- handlerestore $
+ sendS3Handle h req
Url.sinkResponseFile p iv zeroBytesProcessed f WriteMode rsp
+ where
+ needrestore st = restore info && statusCode st == 403
+ handlerestore a = catchJust (Url.matchStatusCodeException needrestore) a $ \_ -> do
+#if MIN_VERSION_aws(0,25,2)
+ let tier = case remoteAnnexS3RestoreTier gc of
+ Just "bulk" -> S3.RestoreObjectTierBulk
+ Just "expedited" -> S3.RestoreObjectTierExpedited
+ _ -> S3.RestoreObjectTierStandard
+ let days = case remoteAnnexS3RestoreDays gc of
+ Just n -> S3.RestoreObjectLifetimeDays n
+ Nothing -> S3.RestoreObjectLifetimeDays 1
+ let restorereq = S3.restoreObject
+ (S3.goBucket req)
+ (S3.goObjectName req)
+ tier
+ days
+ restoreresp <- sendS3Handle h $ restorereq
+ { S3.roVersionId = S3.goVersionId req
+ }
+ case restoreresp of
+ S3.RestoreObjectAccepted -> giveup "Restore initiated, try again later."
+ S3.RestoreObjectAlreadyInProgress -> giveup "Restore in progress, try again later."
+ S3.RestoreObjectAlreadyRestored -> a
+#else
+ giveup "git-annex is built with too old a version of the aws library to support restore=yes"
+#endif
remove :: S3HandleVar -> Remote -> S3Info -> Remover
remove hv r info _proof k = withS3HandleOrFail (uuid r) hv $ \h -> do
@@ -529,7 +561,7 @@ storeExportS3' hv r rs info magic f k loc p = withS3Handle hv $ \case
retrieveExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
retrieveExportS3 hv r info k loc f p = verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
withS3Handle hv $ \case
- Right h -> retrieveHelper info h (Left (T.pack exportloc)) f p iv
+ Right h -> retrieveHelper (gitconfig r) info h (Left (T.pack exportloc)) f p iv
Left S3HandleNeedCreds -> case getPublicUrlMaker info of
Just geturl -> either giveup return =<<
withUrlOptions Nothing
@@ -728,7 +760,7 @@ retrieveExportWithContentIdentifierS3 hv r rs info loc (cid:_) dest gk p =
where
go iv = withS3Handle hv $ \case
Right h -> do
- rewritePreconditionException $ retrieveHelper' h dest p iv $
+ rewritePreconditionException $ retrieveHelper' (gitconfig r) info h dest p iv $
limitGetToContentIdentifier cid $
S3.getObject (bucket info) o
k <- either return id gk
@@ -1036,6 +1068,7 @@ data S3Info = S3Info
, partSize :: Maybe Integer
, isIA :: Bool
, versioning :: Bool
+ , restore :: Bool
, publicACL :: Bool
, publicurl :: Maybe URLString
, host :: Maybe String
@@ -1060,6 +1093,8 @@ extractS3Info c = do
, isIA = configIA c
, versioning = fromMaybe False $
getRemoteConfigValue versioningField c
+ , restore = fromMaybe False $
+ getRemoteConfigValue restoreField c
, publicACL = fromMaybe False $
getRemoteConfigValue publicField c
, publicurl = getRemoteConfigValue publicurlField c
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index 156b88c32c..81d8201ed3 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -439,6 +439,8 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexTahoe :: Maybe FilePath
, remoteAnnexBupSplitOptions :: [String]
, remoteAnnexDirectory :: Maybe FilePath
+ , remoteAnnexS3RestoreTier :: Maybe String
+ , remoteAnnexS3RestoreDays :: Maybe Integer
, remoteAnnexAndroidDirectory :: Maybe FilePath
, remoteAnnexAndroidSerial :: Maybe String
, remoteAnnexGCrypt :: Maybe String
@@ -541,6 +543,8 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexTahoe = getmaybe TahoeField
, remoteAnnexBupSplitOptions = getoptions BupSplitOptionsField
, remoteAnnexDirectory = notempty $ getmaybe DirectoryField
+ , remoteAnnexS3RestoreTier = notempty $ getmaybe S3RestoreTierField
+ , remoteAnnexS3RestoreDays = getmayberead S3RestoreDaysField
, remoteAnnexAndroidDirectory = notempty $ getmaybe AndroidDirectoryField
, remoteAnnexAndroidSerial = notempty $ getmaybe AndroidSerialField
, remoteAnnexGCrypt = notempty $ getmaybe GCryptField
@@ -625,6 +629,8 @@ data RemoteGitConfigField
| TahoeField
| BupSplitOptionsField
| DirectoryField
+ | S3RestoreTierField
+ | S3RestoreDaysField
| AndroidDirectoryField
| AndroidSerialField
| GCryptField
@@ -697,6 +703,8 @@ remoteGitConfigField = \case
TahoeField -> uninherited True "tahoe"
BupSplitOptionsField -> uninherited True "bup-split-options"
(Diff truncated)
initial
request on making git-annex work without git-annex-shell to get files from remote ssh
diff --git a/doc/todo/get_from_ssh_git_remote_without_git-annex_install_.mdwn b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_.mdwn new file mode 100644 index 0000000000..23f4fac3fd --- /dev/null +++ b/doc/todo/get_from_ssh_git_remote_without_git-annex_install_.mdwn @@ -0,0 +1,26 @@ +I thought I had an issue on this but failed to find :-/ + +ATM git-annex does not even bother to suggest or do anything about a remote git/git-annex repository if there is no git-annex (`git-annex-shell`) available there: + +``` +yoh@typhon:/mnt/DATA/data/dbic/QA$ git annex list + + Unable to parse git config from origin + + Remote origin does not have git-annex installed; setting annex-ignore + + This could be a problem with the git-annex installation on the remote. Please make sure that git-annex-shell is available in PATH when you ssh into the remote. Once you have fixed the git-annex installation, run: git annex enableremote origin +here +|datasets.datalad.org +||origin +|||web +||||bittorrent +||||| +_X___ .datalad/metadata/objects/06/cn-2c3eade47bd2d9052658c6a9d10a57.xz + +... +``` + +a workaround, it seems as [it was posted over a decade ago](https://superuser.com/questions/526705/hosting-a-git-annex-on-a-server-without-git-annex-installed) (and now even google ai suggests that) is to setup an additional `rsync` remote and use it to fetch. upon a quick try didn't work for me but could have been an operator error... + +As files are available over regular ssh/scp and even rsync over ssh - I really do not see a technical problem for git-annex to establish interoperability with such a remote, at least for reading from, without having remote git-annex-shell. That should make it possible to access git-annex'es on servers which might be running some odd setups where installation of git-annex in user-space would be tricky if not impossible.
Added a comment: Q: any way to "install" without having root/admin privileges
diff --git a/doc/install/FreeBSD/comment_1_48c712af243119f9a525c55705edc536._comment b/doc/install/FreeBSD/comment_1_48c712af243119f9a525c55705edc536._comment new file mode 100644 index 0000000000..44e4abbc83 --- /dev/null +++ b/doc/install/FreeBSD/comment_1_48c712af243119f9a525c55705edc536._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="Q: any way to "install" without having root/admin privileges" + date="2025-12-07T19:38:46Z" + content=""" +need to install on a box where I am not an admin, just to copy the files from it (odd that git-annex can't just get anything it needs since SSH is there and working fine!). +"""]]
Revert "update"
This reverts commit 550c6b482845ec978aa796191c9931fe19dbc369.
This reverts commit 550c6b482845ec978aa796191c9931fe19dbc369.
diff --git a/doc/thanks/list b/doc/thanks/list index 563a0b6b21..dfeda7a813 100644 --- a/doc/thanks/list +++ b/doc/thanks/list @@ -126,99 +126,3 @@ Lilia.Nanne, Dusty Mabe, mpol, Andrew Poelstra, -AlexS, -Amitai Schleier, -Andrew, -anon, -Anthony DeRobertis, -Anton Grensjö, -Art S, -Arthur Lutz, -Ben, -Boyd Stephen Smith, -Bruno BEAUFILS, -Caleb Allen, -Calvin Beck, -Chris Lamb, -Christian Diller, -Christopher Baines, -Christopher Goes, -Dave Pifke, -don haraway, -DuncanConstruction, -encryptio, -Eric Drechsel, -ers35, -Evgeni Ku, -Fernando Jimenez, -fiatjaf, -Francois Marier, -Gabriel Lee, -Greg Grossmeier, -HeartBreak KB Official, -Ignacio, -Ilya Baryshnikov, -James (purpleidea), -James Valleroy, -Jan, -Jason Woofenden, -Jeff Goeke-Smith, -Jim, -Jo, -Johannes Schlatow, -John Peloquin, -Jon D, -jose_d, -Josh Taylor, -Josh Tilles, -Lacnic, -Land Reaver, -Lee Hinman, -Lee-kai Wang, -Lukas Platz, -Lukas Waymann, -Madison McGaffin, -Maggie Hess, -Matthew Willcockson, -Matthias Urlichs, -Matthieu, -Mattias J, -Mica, -Michal Politowski, -Mika Pflüger, -mo, -Mohit Munjani, -Nahum Shalman, -NinjaTrappeur, -Ole-Morten Duesund, -Paul Tötterman, -Pedro Luz, -Peter, -Renaud Casenave-Péré, -rjbl, -Ryan Newton, -Rémi Vanicat, -Sergey Karpukhin, -Shane-o, -Shawn Butts, -Stan Yamane, -Stephan Burkhardt, -Stephan Meister, -SvenDowideit, -sww, -Teremu HAMBLIN, -Thom May, -Thomas Ferris Nicolaisen, -Thomas Hochstein, -Thomas Schwinge, -Tim Howes, -tj, -Trent Lloyd, -Tyler Cipriani, -Valeria_, -Walltime, -wawatcz, -Will Hughes, -Willard Korfhage, -wzhd, -Zoé Cassiopée Gauthier,
update
diff --git a/doc/thanks/list b/doc/thanks/list index dfeda7a813..563a0b6b21 100644 --- a/doc/thanks/list +++ b/doc/thanks/list @@ -126,3 +126,99 @@ Lilia.Nanne, Dusty Mabe, mpol, Andrew Poelstra, +AlexS, +Amitai Schleier, +Andrew, +anon, +Anthony DeRobertis, +Anton Grensjö, +Art S, +Arthur Lutz, +Ben, +Boyd Stephen Smith, +Bruno BEAUFILS, +Caleb Allen, +Calvin Beck, +Chris Lamb, +Christian Diller, +Christopher Baines, +Christopher Goes, +Dave Pifke, +don haraway, +DuncanConstruction, +encryptio, +Eric Drechsel, +ers35, +Evgeni Ku, +Fernando Jimenez, +fiatjaf, +Francois Marier, +Gabriel Lee, +Greg Grossmeier, +HeartBreak KB Official, +Ignacio, +Ilya Baryshnikov, +James (purpleidea), +James Valleroy, +Jan, +Jason Woofenden, +Jeff Goeke-Smith, +Jim, +Jo, +Johannes Schlatow, +John Peloquin, +Jon D, +jose_d, +Josh Taylor, +Josh Tilles, +Lacnic, +Land Reaver, +Lee Hinman, +Lee-kai Wang, +Lukas Platz, +Lukas Waymann, +Madison McGaffin, +Maggie Hess, +Matthew Willcockson, +Matthias Urlichs, +Matthieu, +Mattias J, +Mica, +Michal Politowski, +Mika Pflüger, +mo, +Mohit Munjani, +Nahum Shalman, +NinjaTrappeur, +Ole-Morten Duesund, +Paul Tötterman, +Pedro Luz, +Peter, +Renaud Casenave-Péré, +rjbl, +Ryan Newton, +Rémi Vanicat, +Sergey Karpukhin, +Shane-o, +Shawn Butts, +Stan Yamane, +Stephan Burkhardt, +Stephan Meister, +SvenDowideit, +sww, +Teremu HAMBLIN, +Thom May, +Thomas Ferris Nicolaisen, +Thomas Hochstein, +Thomas Schwinge, +Tim Howes, +tj, +Trent Lloyd, +Tyler Cipriani, +Valeria_, +Walltime, +wawatcz, +Will Hughes, +Willard Korfhage, +wzhd, +Zoé Cassiopée Gauthier,
typo
diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn index b8fd29522c..5a1f9fa969 100644 --- a/doc/design/external_special_remote_protocol.mdwn +++ b/doc/design/external_special_remote_protocol.mdwn @@ -463,7 +463,7 @@ The two protocol versions are actually identical. Old versions of git-annex that supported only `VERSION 1` had a bug in their implementation of the part of the protocol documented in the [[export_and_import_appendix]]. -The bug could result in ontent being exported to the wrong file. +The bug could result in content being exported to the wrong file. External special remotes that implement that should use `VERSION 2` to avoid talking to the buggy old version of git-annex.
Added TRANSFER-RETRIEVE-URL extension to the external special remote protocol
Since retrieveKeyFileM has to use fileRetriever before it sees this
response, which uses tailVerify, it's unfortunately not possible to
stream the url download to do incremental verification. That would be
more efficient.
Similarly, watchFileSize does some extra work, but the progress meter is
updated as the content streams in.
The downloadFailed case is never reached I think, since the url list
provided to downloadUrl' is not empty.
Sponsored-by: Dartmouth College's OpenNeuro project
Since retrieveKeyFileM has to use fileRetriever before it sees this
response, which uses tailVerify, it's unfortunately not possible to
stream the url download to do incremental verification. That would be
more efficient.
Similarly, watchFileSize does some extra work, but the progress meter is
updated as the content streams in.
The downloadFailed case is never reached I think, since the url list
provided to downloadUrl' is not empty.
Sponsored-by: Dartmouth College's OpenNeuro project
diff --git a/Annex/Content.hs b/Annex/Content.hs
index e10329d8c2..edb1052d52 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -1,6 +1,6 @@
{- git-annex file content managing
-
- - Copyright 2010-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -47,6 +47,7 @@ module Annex.Content (
listKeys',
saveState,
downloadUrl,
+ downloadUrl',
preseedTmp,
dirKeys,
withObjectLoc,
@@ -881,13 +882,21 @@ saveState nocommit = doSideAction $ do
- that failed.
-}
downloadUrl :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> OsPath -> Url.UrlOptions -> Annex Bool
-downloadUrl listfailedurls k p iv urls file uo =
+downloadUrl listfailedurls k p iv urls file uo =
+ downloadUrl' listfailedurls k p iv urls file uo >>= \case
+ Right r -> return r
+ Left e -> do
+ warning $ UnquotedString e
+ return False
+
+downloadUrl' :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> OsPath -> Url.UrlOptions -> Annex (Either String Bool)
+downloadUrl' listfailedurls k p iv urls file uo =
-- Poll the file to handle configurations where an external
-- download command is used.
meteredFile file (Just p) k (go urls [])
where
go (u:us) errs p' = Url.download' p' iv u file uo >>= \case
- Right () -> return True
+ Right () -> return (Right True)
Left err -> do
-- If the incremental verifier was fed anything
-- while the download that failed ran, it's unable
@@ -899,14 +908,12 @@ downloadUrl listfailedurls k p iv urls file uo =
_ -> noop
Nothing -> noop
go us ((u, err) : errs) p'
- go [] [] _ = return False
- go [] errs@((_, err):_) _ = do
+ go [] [] _ = return (Right False)
+ go [] errs@((_, err):_) _ = return $ Left $
if listfailedurls
- then warning $ UnquotedString $
- unlines $ flip map errs $ \(u, err') ->
- u ++ " " ++ err'
- else warning $ UnquotedString err
- return False
+ then unlines $ flip map errs $ \(u, err') ->
+ u ++ " " ++ err'
+ else err
{- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -}
diff --git a/CHANGELOG b/CHANGELOG
index 018efd7a3d..ac29f65722 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,8 @@ git-annex (10.20251118) UNRELEASED; urgency=medium
* Add a build warning when the version of aws being built against is
too old. 0.25.1 is needed to support Google Cloud Storage.
* stack.yaml: Use aws-0.25.1.
+ * Added TRANSFER-RETRIEVE-URL extension to the external special remote
+ protocol.
-- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
diff --git a/Remote/External.hs b/Remote/External.hs
index c392b3f31e..07d1272f24 100644
--- a/Remote/External.hs
+++ b/Remote/External.hs
@@ -1,6 +1,6 @@
{- External special remote interface.
-
- - Copyright 2013-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -93,7 +93,7 @@ gen rt externalprogram r u rc gc rs
let exportactions = if exportsupported
then ExportActions
{ storeExport = storeExportM external
- , retrieveExport = retrieveExportM external
+ , retrieveExport = retrieveExportM external gc
, removeExport = removeExportM external
, checkPresentExport = checkPresentExportM external
, removeExportDirectory = Just $ removeExportDirectoryM external
@@ -116,7 +116,7 @@ gen rt externalprogram r u rc gc rs
cheapexportsupported
return $ Just $ specialRemote c
(storeKeyM external)
- (retrieveKeyFileM external)
+ (retrieveKeyFileM external gc)
(removeKeyM external)
(checkPresentM external)
rmt
@@ -248,17 +248,19 @@ storeKeyM external = fileStorer $ \k f p ->
result (Left (respErrorMessage "TRANSFER" errmsg))
_ -> Nothing
-retrieveKeyFileM :: External -> Retriever
-retrieveKeyFileM external = fileRetriever $ \d k p ->
- either giveup return =<< watchFileSize d p (go d k)
+retrieveKeyFileM :: External -> RemoteGitConfig -> Retriever
+retrieveKeyFileM external gc = fileRetriever $ \dest k p ->
+ either giveup return =<< watchFileSize dest p (go dest k)
where
- go d k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromOsPath d)) k (Just p) $ \resp ->
+ go dest k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromOsPath dest)) k (Just p) $ \resp ->
case resp of
TRANSFER_SUCCESS Download k'
| k == k' -> result $ Right ()
TRANSFER_FAILURE Download k' errmsg
| k == k' -> result $ Left $
respErrorMessage "TRANSFER" errmsg
+ TRANSFER_RETRIEVE_URL k' url
+ | k == k' -> retrieveUrl' gc url dest k p
_ -> Nothing
removeKeyM :: External -> Remover
@@ -306,8 +308,8 @@ storeExportM external f k loc p = either giveup return =<< go
_ -> Nothing
req sk = TRANSFEREXPORT Upload sk (fromOsPath f)
-retrieveExportM :: External -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
-retrieveExportM external k loc dest p = do
+retrieveExportM :: External -> RemoteGitConfig -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
+retrieveExportM external gc k loc dest p = do
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
tailVerify iv dest $
either giveup return =<< go
@@ -317,6 +319,8 @@ retrieveExportM external k loc dest p = do
| k == k' -> result $ Right ()
TRANSFER_FAILURE Download k' errmsg
| k == k' -> result $ Left $ respErrorMessage "TRANSFER" errmsg
+ TRANSFER_RETRIEVE_URL k' url
+ | k == k' -> retrieveUrl' gc url dest k p
UNSUPPORTED_REQUEST ->
result $ Left "TRANSFEREXPORT not implemented by external special remote"
_ -> Nothing
@@ -838,7 +842,18 @@ retrieveUrl :: RemoteGitConfig -> Retriever
retrieveUrl gc = fileRetriever' $ \f k p iv -> do
us <- getWebUrls k
unlessM (withUrlOptions (Just gc) $ downloadUrl True k p iv us f) $
- giveup "failed to download content"
+ giveup downloadFailed
+
+retrieveUrl' :: RemoteGitConfig -> URLString -> OsPath -> Key -> MeterUpdate -> Maybe (Annex (ResponseHandlerResult (Either String ())))
+retrieveUrl' gc url dest k p =
+ Just $ withUrlOptions (Just gc) $ \uo ->
+ downloadUrl' False k p Nothing [url] dest uo >>= return . \case
+ Left msg -> Result (Left msg)
+ Right True -> Result (Right ())
+ Right False -> Result (Left downloadFailed)
+
+downloadFailed :: String
+downloadFailed = "failed to download content"
checkKeyUrl :: RemoteGitConfig -> CheckPresent
checkKeyUrl gc k = do
diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs
index 58bbc9f656..f265d4a1bd 100644
--- a/Remote/External/Types.hs
+++ b/Remote/External/Types.hs
@@ -1,6 +1,6 @@
{- External special remote data types.
-
- - Copyright 2013-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -116,6 +116,7 @@ supportedExtensionList = ExtensionList
[ "INFO"
, "GETGITREMOTENAME"
, "UNAVAILABLERESPONSE"
+ , "TRANSFER-RETRIEVE-URL"
, asyncExtension
]
@@ -243,6 +244,7 @@ data Response
| PREPARE_FAILURE ErrorMsg
| TRANSFER_SUCCESS Direction Key
| TRANSFER_FAILURE Direction Key ErrorMsg
+ | TRANSFER_RETRIEVE_URL Key URLString
| CHECKPRESENT_SUCCESS Key
| CHECKPRESENT_FAILURE Key
| CHECKPRESENT_UNKNOWN Key ErrorMsg
@@ -281,6 +283,7 @@ instance Proto.Receivable Response where
(Diff truncated)
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_3_1c262c9459373bff638c87d838446ed5._comment b/doc/todo/Special_remote_redirect_to_URL/comment_3_1c262c9459373bff638c87d838446ed5._comment new file mode 100644 index 0000000000..89c69c47e5 --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_3_1c262c9459373bff638c87d838446ed5._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-05T17:23:24Z" + content=""" +TRANSFEREXPORT, in the "simple export interface" also +uses TRANSFER-SUCCESS/TRANSFER-FAILURE, and should +also support this extension. +"""]]
remove incorrect comment
tailVerify is already used so it does not re-read at end.
I don't think it will be possible to avoid using tailVerify with this
exension since it's already started by the time the response comes back.
tailVerify is already used so it does not re-read at end.
I don't think it will be possible to avoid using tailVerify with this
exension since it's already started by the time the response comes back.
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment b/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment deleted file mode 100644 index b3a27d4c7e..0000000000 --- a/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment +++ /dev/null @@ -1,9 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 3""" - date="2025-12-05T16:58:21Z" - content=""" -An added benefit of this will be that git-annex can stream hash -while downloading, so it will avoid re-reading the file at the end to -verifiy it. -"""]]
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment b/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment new file mode 100644 index 0000000000..b3a27d4c7e --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_3_59842c14fecfadd4838a6832f291bcc5._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-05T16:58:21Z" + content=""" +An added benefit of this will be that git-annex can stream hash +while downloading, so it will avoid re-reading the file at the end to +verifiy it. +"""]]
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_2_31c4540e06d80021d8be57393c3fb817._comment b/doc/todo/Special_remote_redirect_to_URL/comment_2_31c4540e06d80021d8be57393c3fb817._comment new file mode 100644 index 0000000000..f0258a67d2 --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_2_31c4540e06d80021d8be57393c3fb817._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-05T16:35:05Z" + content=""" +One problem with this design is that there may be HTTP headers that are +used for authorization, rather than putting authentication in the url. + +I think we may have talked about this at the hackfest, and came down on the +side of simplicity, supporting only an url. Can't quite remember. + +It might also be possible to redirect to an url when storing an object. + +I think that protocol design should leave these possibilities open to be +implemented later. +"""]]
comment
diff --git a/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_/comment_1_6536200f3ff5e076f028eef77660bae3._comment b/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_/comment_1_6536200f3ff5e076f028eef77660bae3._comment new file mode 100644 index 0000000000..291ca824cd --- /dev/null +++ b/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_/comment_1_6536200f3ff5e076f028eef77660bae3._comment @@ -0,0 +1,7 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-05T16:31:47Z" + content=""" +Is the assistant running in the local repository, or on the remote? +"""]]
comment
diff --git a/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_2_9ad59f18725cd45d3ad2261f1c92f694._comment b/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_2_9ad59f18725cd45d3ad2261f1c92f694._comment new file mode 100644 index 0000000000..149bf4feb2 --- /dev/null +++ b/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_2_9ad59f18725cd45d3ad2261f1c92f694._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-05T16:29:39Z" + content=""" +Probably this. In any case, it's better to upgrade before filing a bug on something like this. + + git-annex (8.20211123) upstream; urgency=medium + + * Bugfix: When -J was enabled, getting files could leak an + ever-growing number of git cat-file processes. +"""]]
issue about unstaged files on push
diff --git a/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_.mdwn b/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_.mdwn new file mode 100644 index 0000000000..c09c42564f --- /dev/null +++ b/doc/bugs/__34__assisted__34___refuses_to_push__58___unstaged_changes_.mdwn @@ -0,0 +1,42 @@ +### Please describe the problem. + +This is a repo under `git-annex assistant` control and was committing and pushing fine but then stopped... there were many other oddities (some files were `git rm`ed and never committed back etc), but now it is at the level of git on remote I guess, so likely going through `git-annex filter-process` -- claims to have unstaged changes when `git status` says none: + + +```shell +reprostim@reproiner:/data/reprostim$ git push typhon +Enumerating objects: 15079, done. +Counting objects: 100% (10929/10929), done. +Delta compression using up to 4 threads +Compressing objects: 100% (6763/6763), done. +Writing objects: 100% (6764/6764), 816.02 KiB | 16.00 MiB/s, done. +Total 6764 (delta 64), reused 6388 (delta 1), pack-reused 0 +remote: Resolving deltas: 100% (64/64), completed with 30 local objects. +To typhon:/data/repronim/dbic-reproflow/sourcedata/reprostim-reproiner + ! [remote rejected] master -> master (Working directory has unstaged changes) +error: failed to push some refs to 'typhon:/data/repronim/dbic-reproflow/sourcedata/reprostim-reproiner' +reprostim@reproiner:/data/reprostim$ ssh typhon git -C /data/repronim/dbic-reproflow/sourcedata/reprostim-reproiner status +On branch master +Your branch is ahead of 'reproiner/master' by 2906 commits. + (use "git push" to publish your local commits) + +nothing to commit, working tree clean + +``` + +how to troubleshoot in the future? + +workaround done: pushed as another branch which I then just "merged" (was fast-forward) locally (on typhon). + + + +### What version of git-annex are you using? On what operating system? + + +``` +reprostim@reproiner:/data/reprostim$ git annex version | head -n 1 +git-annex version: 10.20251114-1~ndall+1 +reprostim@reproiner:/data/reprostim$ ssh typhon git annex version | head -n 1 +git-annex version: 10.20250416 + +```
Added a comment
diff --git a/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_1_014ab514d290dfaa729cce64138d7be0._comment b/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_1_014ab514d290dfaa729cce64138d7be0._comment new file mode 100644 index 0000000000..0bc19cfebf --- /dev/null +++ b/doc/bugs/does_not_clean_sub_processes_until_freeze/comment_1_014ab514d290dfaa729cce64138d7be0._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="octvs" + avatar="http://cdn.libravatar.org/avatar/af90c1c7c98f0b5ed446a272685f29ab" + subject="comment 1" + date="2025-12-04T17:17:09Z" + content=""" +I'm currently testing it with version `10.20251114-....`. I will update/close issue according to the result. +"""]]
diff --git a/doc/bugs/does_not_clean_sub_processes_until_freeze.mdwn b/doc/bugs/does_not_clean_sub_processes_until_freeze.mdwn new file mode 100644 index 0000000000..3291e35910 --- /dev/null +++ b/doc/bugs/does_not_clean_sub_processes_until_freeze.mdwn @@ -0,0 +1,65 @@ +### Please describe the problem. + +Whenever I leave git-annex to do a long operation on a remote server, I came +back to its eventual death. I leave a `git-annex get ... -J 4` on a tmux at a +server it eventually freezes, so I tend to rely on my machine and +short/repetitive operations to be able to use git-annex. Before I thought the +parallelization might be the issue so I reduced it to 4 from 8/16/32, but it +didn't resolve the problem. + +This last time I wanted to investigate and I found out a lot of subprocess +hanging around. + +```sh +$ ps -u <me> | grep git +2460230 pts31 00:00:00 git <defunct> +2460231 pts31 00:00:00 git <defunct> +2460232 pts31 00:00:00 git <defunct> +2460233 pts31 00:00:00 git <defunct> +2460234 pts31 00:00:00 git <defunct> +2460235 pts31 00:00:00 git <defunct> +2460237 pts31 00:00:00 git <defunct> +... +$ ps -u <me> | grep git | wc -l +13193 +``` + +Which explains why lowering parallel jobs didn't resolve but delay the eventual +freeze. + +On this setup the version is old which might be the culprit. Although I can try +to get an updated version (via binary or conda) on my path, I would still like +to identify the problem. It also happened on another server setup which had a +relatively newer version but since I have no access to there anymore, I can't +provide more information on that (yet). + +### What steps will reproduce the problem? + +Run `$ git annex get * -J 4` and wait for eventual freeze + +### What version of git-annex are you using? On what operating system? + +git-annex version: 8.20210223 +build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite S3 WebDAV +dependency versions: aws-0.22 bloomfilter-2.0.1.0 cryptonite-0.26 DAV-1.3.4 feed-1.3.0.1 ghc-8.8.4 http-client-0.6.4.1 persistent-sqlite-2.10.6.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.1.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL X* +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg hook external +operating system: linux x86_64 +supported repository versions: 8 +upgrade supported from repository versions: 0 1 2 3 4 5 6 7 + +### Please provide any additional information below. + +[[!format sh """ +# If you can, paste a complete transcript of the problem occurring here. +# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log + + +# End of transcript or log. +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +I use it for all kind of data I have both in private and at work. Amazing +piece of software. I'm sure my colleagues/IT are annoyed of me plugging it to +every possible discussion.
comment
diff --git a/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_3_261106ade035ed69fc6c9ee05db1eb48._comment b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_3_261106ade035ed69fc6c9ee05db1eb48._comment new file mode 100644 index 0000000000..6b2ff15ca4 --- /dev/null +++ b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_3_261106ade035ed69fc6c9ee05db1eb48._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-03T19:08:42Z" + content=""" +With the separate autoenabled remote for PRs, the UX could look like this: + + > git-annex add myfile + add myfile ok + > git commit -m foo + > git push origin HEAD:refs/for/main -o topic="add myfile" + > git-annex push origin-PRs + copy myfile (to origin-PRs) ... ok + +Or with a small git-annex improvement, even: + + > git-annex assist -o topic="add myfile" + add myfile ok + copy myfile (to origin-PRs) ... ok + +For this, origin-PRs would want all files not in origin, +and origin would want all files not in origin-PRs. And origin-PRs would +need to have a lower cost than origin so that it doesn't first try, and +fail, to copy the file to origin. +"""]]
fix
diff --git a/doc/projects/FZJ.mdwn b/doc/projects/FZJ.mdwn index d076a07189..1ef8eb3c27 100644 --- a/doc/projects/FZJ.mdwn +++ b/doc/projects/FZJ.mdwn @@ -35,5 +35,5 @@ Bugs Forum posts =========== -[[!inline pages="forum/* (tagged(projects/INM7) or tagged(projects/ICE4))" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist template=buglist]] +[[!inline pages="forum/* and (tagged(projects/INM7) or tagged(projects/ICE4))" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist template=buglist]]
add tagged forum posts since we have one
diff --git a/doc/projects/FZJ.mdwn b/doc/projects/FZJ.mdwn index c2953d4600..d076a07189 100644 --- a/doc/projects/FZJ.mdwn +++ b/doc/projects/FZJ.mdwn @@ -31,3 +31,9 @@ Bugs (tagged(projects/INM7) or tagged(projects/ICE4))" feeds=no actions=yes archive=yes show=0 template=buglist]] </details> + +Forum posts +=========== + +[[!inline pages="forum/* (tagged(projects/INM7) or tagged(projects/ICE4))" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist template=buglist]] +
comments
diff --git a/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_1_6740265d7182747ca1a9e6abbc0bd62b._comment b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_1_6740265d7182747ca1a9e6abbc0bd62b._comment new file mode 100644 index 0000000000..53f2f69329 --- /dev/null +++ b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_1_6740265d7182747ca1a9e6abbc0bd62b._comment @@ -0,0 +1,56 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-03T17:54:21Z" + content=""" +> Obviously annexed objects copied to the Forgejo-aneksajo instance +> via this path should only be available in the context of that PR in some way. +> +> The fundamental issue seems to be that annexed objects always belong to the entire repository, and are not scoped to any branch. + +Hmm.. git objects also don't really belong to any particular branch. +git only fetches objects referenced by the branches you clone. + +Similarly, git-annex can only ever get annex objects that are listed +in the git-annex branch. Even with `--all`, it will not know about objects +not listed there. + +So, seems to me you may only need to keep the PR's git-annex branch separate from +the main git-annex branch, so that the main git-annex branch does not list +objects from the PR. I see two problems that would need to be solved to do +that: + +1. If git-annex is able to see the PR's git-annex branch as eg + (refs/foo/git-annex), it will auto-merge it into the main git-annex branch, and + then --all will operate on objects from the PR as well. So the PR's + git-annex branch would need to be named to avoid that. + + This could be just `git push origin git-annex:refs/for/git-annex/topic-branch` + Maybe `git-annex sync` could be made to support that for its pushes? + +2. When git-annex receives an object into the repository, the receiving side + updates the git-annex branch to indicate it now has a copy of that object. So, + you would need a way to make objects sent to a PR update the PR's git-annex branch, + rather than the main git-annex branch. + + This could be something similar to `git push -o topic` in + git-annex. Which would need to be a P2P protocol extension. Or maybe + some trick with the repository UUID? + +When the PR is merged, you would then also merge its git-annex branch. + +If the PR is instead rejected, and you want to delete the objects +associated with it, you would first delete the PR's other branches, and +then run `git-annex unused`, arranging (how?) for it to see only the PR's +git-annex branch and not any other git-annex branches. That would find any +objects that were sent as part of the PR, that don't also happen to be used +in other branches (including other PRs). + +---- + +I do wonder, if this were implemeted, would the git-annex +workflow for the user be any better than if there were a per-PR +remote for them to use? If every git-annex command that pushes the +git-annex branch or sends objects to forjejo needs `-o topic` +to be given, then it might be a worse user experience. +"""]] diff --git a/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_2_bb6e73189471420640ff563530d79253._comment b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_2_bb6e73189471420640ff563530d79253._comment new file mode 100644 index 0000000000..c4049195e9 --- /dev/null +++ b/doc/forum/Git-annex_in___34__AGit-Flow__34__/comment_2_bb6e73189471420640ff563530d79253._comment @@ -0,0 +1,36 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-03T18:44:56Z" + content=""" +> A per-user special remote that is assumed to contain the annexed files for all of the users AGit-PRs. If git recognizes remote +> configs in the users' global git config then it could be possible to get away with configuring things once, but I am not sure of the +> behavior of git in that case. + +I think git will do that (have not checked), but a special remote needs +information to be written to the git-annex branch, not just git config, +so there's no way to globally configure a special remote to be accessible +in every git-annex repository. + +Along similar lines, forgejo could set up an autoenabled remote +that contains annexed files for all AGit-PRs, and that wants any files +not in the main git repository. (This could be a special remote, or a +git-annex repository that just doesn't allow any ref pushes to it. The +latter might be easier to deal with since `git-annex p2phttp` could serve +it as just another git-annex repository.) + +That would solve the second problem I discussed in the comment above, +because when the user copies objects to that separate remote, it will not +cause git-annex in the forgejo repository to update the main git-annex +branch to list those objects. + +When merging a PR, forgejo would move the objects over from that remote +to the main git repository. + +You would be left with a bit of an problem in deleting objects from that +remote when a PR is rejected. Since the user may never have pushed +their git-annex branch after sending an object to it, +and so you would not know what PR that object belongs to. I suppose this +could be handled by finding all objects that are in active PRs and deleting +ones that are not after some amount of time. +"""]]
close
diff --git a/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn index ae708bb1b3..e2ab131f7b 100644 --- a/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn +++ b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn @@ -51,3 +51,6 @@ rsync exited 23 git-annex solves a lot of problems for both work and privately. I use it on a daily basis since a decade, and it is just great and very reliable. Great software, please keep up developing and maintaining it. + +> [[closing|done]] as this must have been a bug in the older version. +> --[[Joey]]
remove incorrect comment
Re-sending a restore request while a restore is ongoing does not change
the restoration period.
Re-sending a restore request while a restore is ongoing does not change
the restoration period.
diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment deleted file mode 100644 index a0581ba9e8..0000000000 --- a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment +++ /dev/null @@ -1,12 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 3""" - date="2025-12-03T17:28:28Z" - content=""" -If one `git-annex get` starts a restore, then a while later, -but before the restore is done, `git-annex get` is run again, -sending RestoreObject again will change the lifetime of the restored -object. This seems like something to at least warn users about, -since it could cost money to leave restored objects in the S3 bucket -longer than necessary. -"""]]
update
diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn index 10c1beae5f..8f671c2943 100644 --- a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn +++ b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn @@ -6,4 +6,4 @@ Basically, the files moved by AWS from S3 to Glacier are not available under the I suppose DELETE needs special care as well. -> [[meta title="wishlist: Restore s3 files moved to Glacier or Deep Archive"]] +[[!meta title="wishlist: Restore s3 files moved to Glacier or Deep Archive"]] diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment index f11f584e75..be2c97df2e 100644 --- a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment +++ b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment @@ -13,10 +13,19 @@ to move objects to deep archive, git-annex won't be able to retrieve objects stored in deep archive. To support that, the S3 special remote would need to send a request to S3 to -RESTORE an object from deep archive. Then later (on a subsequent `git-annex` run) -GET the object from S3. +restore an object from deep archive. Then later (on a subsequent `git-annex` run) +it can download the object from S3. -And then after getting the object, it would be left in the S3 bucket rather -than in deep archive, so something would need to deal with that to move it -back into deep archive. Either git-annex could do it, or a lifecycle rule could. +This is the API: +<https://docs.aws.amazon.com/AmazonS3/latest/API/API_RestoreObject.html> + +It includes a Tier tag which controls whether the restore is +expedited. There would probably need to be a git config for that, since +the user may want to get a file fast or pay less for a slower retrieval. + +And there is a Days tag, which controls how long the object should be left +accessible in S3. This would also make sense to have a git config. + +I have opened this issue, which is a prerequisite to implementing this +<https://github.com/aristidb/aws/issues/297> """]] diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment new file mode 100644 index 0000000000..a0581ba9e8 --- /dev/null +++ b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_3_3b5722a679268a46c0e6ffd004a25821._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-03T17:28:28Z" + content=""" +If one `git-annex get` starts a restore, then a while later, +but before the restore is done, `git-annex get` is run again, +sending RestoreObject again will change the lifetime of the restored +object. This seems like something to at least warn users about, +since it could cost money to leave restored objects in the S3 bucket +longer than necessary. +"""]]
fix link
diff --git a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment index 66d9ae9427..5c435b0e85 100644 --- a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment +++ b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment @@ -6,6 +6,6 @@ I don't think `storageclass=DEEP_ARCHIVE` will currently work, git-annex is not able to request that the object be restored. -See [[todo/wishlist__58___Restore_s3_files_moved_to_Glacier]] +See <https://git-annex.branchable.com/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/> for a todo which would solve this. """]]
link
diff --git a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment index 766263cb59..66d9ae9427 100644 --- a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment +++ b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment @@ -6,6 +6,6 @@ I don't think `storageclass=DEEP_ARCHIVE` will currently work, git-annex is not able to request that the object be restored. -See [[todo/wishlist__58___Restore_s3_files_moved_to_Glacier/]] +See [[todo/wishlist__58___Restore_s3_files_moved_to_Glacier]] for a todo which would solve this. """]]
comments
diff --git a/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment new file mode 100644 index 0000000000..766263cb59 --- /dev/null +++ b/doc/forum/Does_DEEP__95__ARCHIVE_replace_glacier_special_remote__63__/comment_1_a7012b4751f5def04101a7bf33e3daee._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-12-03T16:58:52Z" + content=""" +I don't think `storageclass=DEEP_ARCHIVE` will currently work, +git-annex is not able to request that the object be restored. + +See [[todo/wishlist__58___Restore_s3_files_moved_to_Glacier/]] +for a todo which would solve this. +"""]] diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn index 85fc2785c4..10c1beae5f 100644 --- a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn +++ b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier.mdwn @@ -5,3 +5,5 @@ This is different from the adding a Glacier remote to git annex because of the r Basically, the files moved by AWS from S3 to Glacier are not available under the normal Glacier API. In fact, the moved S3 files are listed as available but under the `GLACIER` storage class and need a RESTORE request before they can be GET like other S3 files. Trying to GET an S3 file that has been moved to Glacier will not restore it from Glacier and will result in an 403 error. I suppose DELETE needs special care as well. + +> [[meta title="wishlist: Restore s3 files moved to Glacier or Deep Archive"]] diff --git a/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment new file mode 100644 index 0000000000..f11f584e75 --- /dev/null +++ b/doc/todo/wishlist__58___Restore_s3_files_moved_to_Glacier/comment_2_2b40301c4f2f85877a7eedb226e7407d._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-03T17:01:50Z" + content=""" +Glacier is in the process of being deprecated, instead there is +the Deep Archive S3 storage class. +<https://aws.amazon.com/blogs/aws/new-amazon-s3-storage-class-glacier-deep-archive/> + +While it is possible to configure a S3 special remote +with `storageclass=DEEP_ARCHIVE`, or configure a bucket with lifecycle rules +to move objects to deep archive, git-annex won't be able to retrieve objects +stored in deep archive. + +To support that, the S3 special remote would need to send a request to S3 to +RESTORE an object from deep archive. Then later (on a subsequent `git-annex` run) +GET the object from S3. + +And then after getting the object, it would be left in the S3 bucket rather +than in deep archive, so something would need to deal with that to move it +back into deep archive. Either git-annex could do it, or a lifecycle rule could. +"""]]
comments
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_2_c89990dc457eaa0f09c68482f17f77eb._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_2_c89990dc457eaa0f09c68482f17f77eb._comment new file mode 100644 index 0000000000..3afae49b58 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_2_c89990dc457eaa0f09c68482f17f77eb._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-12-03T16:16:14Z" + content=""" +Are the `000675/draft/` files you show it importing the ones that are +access restricted? + +And when you replicated the problem from the backup, were you using it in +the configuration where it cannot access those? + +I notice that all the files affected seem to be probably smallish text +files (yaml, jsonld). Do you have annex.largefiles configured in this +repository, and are all of the affected files non-annexed files? +If so, it would be worth retrying from the backup with the config changed +so those files get annexed and see if that avoids the problem. +"""]] diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_3_b36a8529cafbb0a4020af5723cd8eda9._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_3_b36a8529cafbb0a4020af5723cd8eda9._comment new file mode 100644 index 0000000000..0403c5c9b0 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_3_b36a8529cafbb0a4020af5723cd8eda9._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-12-03T16:37:11Z" + content=""" +> please advise on how to mitigate (`git reset --hard` the `s3-dandiarchive/master` to prior state before yesterday and reimport with newer git-annex or ... ?) + +Simply resetting the remote tracking branch and re-importing won't cause an +import to necessarily happen again. This is because git-annex tracks +internally what has been imported from the remote. Running an import again +when it's already imported files won't re-download those same files. +And it will regenerate the same remote tracking branch. + +So running in a clone from a backup is a better way to re-run the import. +"""]]
Added a comment
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_1_3545d2b6408f21a1ad6c5ff3c0255d57._comment b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_1_3545d2b6408f21a1ad6c5ff3c0255d57._comment new file mode 100644 index 0000000000..54695f9577 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs/comment_1_3545d2b6408f21a1ad6c5ff3c0255d57._comment @@ -0,0 +1,35 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 1" + date="2025-12-02T19:22:10Z" + content=""" +I have now tried with most recent release 10.20251114-geeb21b831e7c45078bd9447ec2b0532a691fe471 while operating on a copy from the backup. + +and looking at the fact that it starts with the latter, likely the \"access restricted ones\" + +``` +(venv-annex) dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ ( source .git/secrets.env; git-annex import master --from s3-dandiarchive && git merge s3-dandiarchive/master ) +list s3-dandiarchive ok +import s3-dandiarchive 000675/draft/assets.jsonld +ok +import s3-dandiarchive 000675/draft/assets.yaml +ok +... +``` + +while still making commits to earlier folders + +``` +(venv-annex) dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git log --stat s3-dandiarchive/master -- 000029/draft || echo $? +fatal: unable to read f7c097994e60c2b58dae464633583b65a6691415 +commit ce60e6d1 +Author: DANDI Team <team@dandiarchive.org> +Date: 2025 Dec 02 14:16:10 -0500 + + import from s3-dandiarchive + +128 +``` +I suspect it just somehow \"manufactures\" them for public ones without fetching their keys? +"""]]
broken git with import from s3
diff --git a/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs.mdwn b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs.mdwn new file mode 100644 index 0000000000..938d68ba57 --- /dev/null +++ b/doc/bugs/s3_imported_branch_is___34__git_buggy__34____58____bad_blobs.mdwn @@ -0,0 +1,117 @@ +### Please describe the problem. + +we have dandiarchive s3 bucket with versioning turned on. Currently, after I changed signature from anonymous and added region it looks like + +``` +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git show git-annex:remote.log +09b87154-c650-46d1-a036-6e03c56c0b1a bucket=dandiarchive datacenter=US encryption=none fileprefix=dandisets/ host=s3.amazonaws.com importtree=yes name=s3-dandiarchive port=80 publicurl=https://dandiarchive.s3.amazonaws.com/ region=us-east-2 signature=v4 storageclass=STANDARD type=S3 timestamp=1764626152s +``` + +Bucket has ["trailing delete"](https://github.com/dandi/dandi-archive/blob/master/doc/design/s3-trailing-delete.md) enabled since awhile (years). + +Originally it was all open and we were importing on cron, the last merge was + +``` +Date: 2025 Aug 27 21:23:09 -0400 + + Merge remote-tracking branch 's3-dandiarchive/master' +``` + +Recently-ish (sep/oct) policy got updated so some keys on s3 became protected and require authentication. We had a good number of failing due to 403 runs, including ones where I already specified AWS credentials but still had `signature=anonymous` and no region specified. Then (yesterday) I specified signature to be v4, had a run where it complained about region needing to be us-east-2 instead of us-east-1 (not sure why could not deduce automagically), so I specified it too. And then the `import` run seem to proceeded fine! + +But `git merge` then failed: + +``` +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git merge s3-dandiarchive/master +error: unable to read sha1 file of 000029/draft/dandiset.jsonld (f7c097994e60c2b58dae464633583b65a6691415) +error: unable to read sha1 file of 000029/draft/dandiset.yaml (1fa7abf602b540507c1a31e20da3d687e83ebfe6) +error: unable to read sha1 file of 000338/draft/assets.jsonld (4ad13ca757df0b39f2c20af47e5d3c9140ccfc7b) +error: unable to read sha1 file of 000338/draft/assets.yaml (08cca54d889faffc76c7911f5c700eb09c22e628) +error: unable to read sha1 file of 000338/draft/collection.jsonld (cf60b31aca7826a8d4993828e439af1f808cb17e) +... +``` + +and `git fsck` fails loudly with many blobs missing etc + +``` +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ head .duct/logs/2025.12.02T08.19.22-3737239_stdout +broken link from tree 8c233f531c125ef0edbba48300d7c2ca914c1dac + to blob 513d0a3ba28460f1c7db74b2f4b4905a9942d903 +broken link from tree 8c233f531c125ef0edbba48300d7c2ca914c1dac + to blob 2d3e42dc7935b136141f81f3113a6eac247aa570 +broken link from tree 8c233f531c125ef0edbba48300d7c2ca914c1dac + to blob e88e9ef106f8c7cdce43378079416ab353593335 +... +``` +and also similar errors while trying to git log a sample file there: + +``` +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git log s3-dandiarchive/master -- 000029/draft/dandiset.jsonld +commit 2fc1ff12 +Author: DANDI Team <team@dandiarchive.org> +Date: 2025 Dec 01 16:56:17 -0500 + + import from s3-dandiarchive + +commit 65c4ea5b +Author: DANDI Team <team@dandiarchive.org> +Date: 2025 Apr 24 16:23:07 -0400 + + import from s3-dandiarchive + +commit 832893d3 +Author: DANDI Team <team@dandiarchive.org> +Date: 2025 Apr 24 13:21:10 -0400 + + import from s3-dandiarchive +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git log -p s3-dandiarchive/master -- 000029/draft/dandiset.jsonld +fatal: unable to read f7c097994e60c2b58dae464633583b65a6691415 +commit 2fc1ff12 +Author: DANDI Team <team@dandiarchive.org> +Date: 2025 Dec 01 16:56:17 -0500 + + import from s3-dandiarchive +``` + +as the fail on the recently imported version, suggests that it is git-annex not importing correctly somehow? + +I believe this was done with this version: + +``` +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ source ~/git-annexes/static-10.20250416.sh +dandi@drogon:/mnt/backup/dandi/dandiset-manifests$ git annex version | head +git-annex version: 10.20250416-static1 +build flags: Pairing DBus DesktopNotify TorrentParser MagicMime Servant Benchmark Feeds Testsuite S3 WebDAV +dependency versions: aws-0.24.4 bloomfilter-2.0.1.2 crypton-1.0.4 DAV-1.3.4 feed-1.3.2.1 ghc-9.8.4 http-client-0.7.19 persistent-sqlite-2.13.3.0 torrent-10000.1.3 uuid-1.3.16 +... +``` + +please advise on how to mitigate (`git reset --hard` the `s3-dandiarchive/master` to prior state before yesterday and reimport with newer git-annex or ... ?) + +[[!meta author=yoh]] +[[!tag projects/dandi]] + + + + +Originally all keys in the bucket + +### What steps will reproduce the problem? + + +### What version of git-annex are you using? On what operating system? + + +### Please provide any additional information below. + +[[!format sh """ +# If you can, paste a complete transcript of the problem occurring here. +# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log + + +# End of transcript or log. +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +
Added a comment: git annex and starship 2
diff --git a/doc/bugs/git-annex__58_____60__stdout__62____58___hPutBuf__58___resource_vanished/comment_4_b1e20a50159d3dceb1397e6ae57cd241._comment b/doc/bugs/git-annex__58_____60__stdout__62____58___hPutBuf__58___resource_vanished/comment_4_b1e20a50159d3dceb1397e6ae57cd241._comment new file mode 100644 index 0000000000..84761e8c3e --- /dev/null +++ b/doc/bugs/git-annex__58_____60__stdout__62____58___hPutBuf__58___resource_vanished/comment_4_b1e20a50159d3dceb1397e6ae57cd241._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="caribou" + avatar="http://cdn.libravatar.org/avatar/914e725d4ec32ad668501d14ade2e52f" + subject="git annex and starship 2" + date="2025-12-02T10:16:41Z" + content=""" +Hello, +Thank you for the pointers from the previous comments it seems to be indeed connected to starship in my case also. +However for me, increasing the timeout up to 2000ms wasn't enough. The workaround that worked for me was to ignore_submodules... + +[git_status] +ignore_submodules = true +"""]]
scaling
diff --git a/doc/thanks.mdwn b/doc/thanks.mdwn index 7982bf19f4..f17dc5e749 100644 --- a/doc/thanks.mdwn +++ b/doc/thanks.mdwn @@ -37,7 +37,7 @@ contributed good bug reports and great ideas. <img alt="Stanford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> <img alt="Jülich Forschungszentrum logo" src="https://www.fz-juelich.de/static/media/Logo.2ceb35fc.svg" width=200> <img alt="DANDI logo" src="https://dandiarchive.org/assets/logo-DbYqjGgV.svg" width=150> -<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=75> +<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=70> git-annex development is supported in large part by:
scaling
diff --git a/doc/thanks.mdwn b/doc/thanks.mdwn index 47e4baaea2..7982bf19f4 100644 --- a/doc/thanks.mdwn +++ b/doc/thanks.mdwn @@ -37,7 +37,7 @@ contributed good bug reports and great ideas. <img alt="Stanford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> <img alt="Jülich Forschungszentrum logo" src="https://www.fz-juelich.de/static/media/Logo.2ceb35fc.svg" width=200> <img alt="DANDI logo" src="https://dandiarchive.org/assets/logo-DbYqjGgV.svg" width=150> -<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=50> +<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=75> git-annex development is supported in large part by:
scaling
diff --git a/doc/thanks.mdwn b/doc/thanks.mdwn index 2d3d8dd444..47e4baaea2 100644 --- a/doc/thanks.mdwn +++ b/doc/thanks.mdwn @@ -36,8 +36,8 @@ contributed good bug reports and great ideas. <img alt="OpenNeuro logo" src="https://raw.githubusercontent.com/OpenNeuroOrg/openneuro/1c1e0d3b2a2032729727702eb65b1b563eadce1d/packages/openneuro-components/src/assets/on-dark.svg" width=100> <img alt="Stanford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> <img alt="Jülich Forschungszentrum logo" src="https://www.fz-juelich.de/static/media/Logo.2ceb35fc.svg" width=200> -<img alt="DANDI logo" src="https://dandiarchive.org/assets/logo-DbYqjGgV.svg" width=200> -<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=200> +<img alt="DANDI logo" src="https://dandiarchive.org/assets/logo-DbYqjGgV.svg" width=150> +<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=50> git-annex development is supported in large part by:
more logos
diff --git a/doc/thanks.mdwn b/doc/thanks.mdwn index 4226420a06..2d3d8dd444 100644 --- a/doc/thanks.mdwn +++ b/doc/thanks.mdwn @@ -36,6 +36,8 @@ contributed good bug reports and great ideas. <img alt="OpenNeuro logo" src="https://raw.githubusercontent.com/OpenNeuroOrg/openneuro/1c1e0d3b2a2032729727702eb65b1b563eadce1d/packages/openneuro-components/src/assets/on-dark.svg" width=100> <img alt="Stanford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> <img alt="Jülich Forschungszentrum logo" src="https://www.fz-juelich.de/static/media/Logo.2ceb35fc.svg" width=200> +<img alt="DANDI logo" src="https://dandiarchive.org/assets/logo-DbYqjGgV.svg" width=200> +<img alt="ReproNim logo" src="https://repronim.org/images/square-512T2.png" width=200> git-annex development is supported in large part by:
thanksgiving update
diff --git a/doc/thanks.mdwn b/doc/thanks.mdwn index d99a2b43be..4226420a06 100644 --- a/doc/thanks.mdwn +++ b/doc/thanks.mdwn @@ -34,7 +34,7 @@ contributed good bug reports and great ideas. ## financial support, 2024-2025 <img alt="OpenNeuro logo" src="https://raw.githubusercontent.com/OpenNeuroOrg/openneuro/1c1e0d3b2a2032729727702eb65b1b563eadce1d/packages/openneuro-components/src/assets/on-dark.svg" width=100> -<img alt="Standford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> +<img alt="Stanford wordmark" src="https://poldracklab.org/images/stanford-line2-8.png" width=200> <img alt="Jülich Forschungszentrum logo" src="https://www.fz-juelich.de/static/media/Logo.2ceb35fc.svg" width=200> git-annex development is supported in large part by: @@ -50,6 +50,8 @@ git-annex development is supported in large part by: * [ReproNim](https://repronim.org/), funded by [a NIH grant](https://projectreporter.nih.gov/project_info_details.cfm?aid=8999833) awarded to UMass Medical School Worcester, Dartmouth College, MIT, et al. +* Institute of Climate and Energy Systems (Stratosphere; ICE-4) at + [Forschungszentrum Jülich](https://www.fz-juelich.de/en/ice/ice-4). Thanks also to these folks for their support: [[!inline raw=yes pages="thanks/list"]] and anonymous supporters.
Added a comment: Resolved: using newer version
diff --git a/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_/comment_1_0ca10bffc619ea3083bde714162c4f25._comment b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_/comment_1_0ca10bffc619ea3083bde714162c4f25._comment new file mode 100644 index 0000000000..98fa4f7b45 --- /dev/null +++ b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_/comment_1_0ca10bffc619ea3083bde714162c4f25._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="scinu@299f3f05b38f16a60b2c1d63240426946bec9ab7" + nickname="scinu" + avatar="http://cdn.libravatar.org/avatar/c5a190c5c0ce61a5be141609dff37fe1" + subject="Resolved: using newer version" + date="2025-11-25T12:39:58Z" + content=""" +After installing git-annex from Archlinux repositories, it works again. + +For some reason, I had installed git-annex-standalone (10.20220121-1) +With git-annex (10.20251114-2) everything works as intended. + +git-annex version: 10.20251114-geeb21b831e7c45078bd9447ec2b0532a691fe471 + +Sorry for the noise, should have done this before. + +Best, Scinu +"""]]
diff --git a/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn new file mode 100644 index 0000000000..ae708bb1b3 --- /dev/null +++ b/doc/bugs/rsync__58_____91__sender__93___change__95__dir__No_such_file_.mdwn @@ -0,0 +1,53 @@ +### Please describe the problem. + +In an rsync special remote, "git-annex get a.pdf" gives me the error: + +rsync: [sender] change_dir "/home/myuser/annex/M8/98/'SHA256E-s367497--24d4a5763a5cd718985a471b4d18981b38f9be73206c33ca885d1e7357a8a2f1.pdf" failed: No such file or directory (2) +rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1852) [Receiver=3.4.1] + +This seems due to different storage paths of the file in the local annex and in the remote annex. + +In the local annex, it is under objects/M8/98 + +In the remote annex, paths are of the form objects/1a3/df7 + +There seems to be a different repository layout, but git-annex attempts to obtain the file under the local path. + +### What steps will reproduce the problem? + +create new rsync-over-ssh special remote +copy some files there +git-annex drop a.pdf +git-annex get a.pdf + +### What version of git-annex are you using? On what operating system? + +archlinux, git-annex-standalone +Version : 10.20220121-1 + +git-annex version: 10.20220121-gdf6a8476e +build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite S3 WebDAV +dependency versions: aws-0.22 bloomfilter-2.0.1.0 cryptonite-0.26 DAV-1.3.4 feed-1.3.0.1 ghc-8.8.4 http-client-0.6.4.1 persistent-sqlite-2.10.6.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.1.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL X* +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg hook external +operating system: linux x86_64 +supported repository versions: 8 9 10 +upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10 +local repository version: 8 + + +### Please provide any additional information below. + +[[!format sh """ +get 240.pdf (from rsyncnet...) + +rsync: [sender] change_dir "/home/myuser/annex/M8/98/'SHA256E-s367497--24d4a5763a5cd718985a471b4d18981b38f9be73206c33ca885d1e7357a8a2f1.pdf" failed: No such file or directory (2) +rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1852) [Receiver=3.4.1] +rsync: [Receiver] write error: Broken pipe (32) +rsync exited 23 +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +git-annex solves a lot of problems for both work and privately. I use it on a daily basis since a decade, and it is just great and very reliable. +Great software, please keep up developing and maintaining it.
Fix a WikiLink
diff --git a/doc/copies.mdwn b/doc/copies.mdwn index f79e94d009..b18215cdb9 100644 --- a/doc/copies.mdwn +++ b/doc/copies.mdwn @@ -22,7 +22,7 @@ running `git-annex mincopies N` or can be overridden on a per-file-type basis by the annex.mincopies setting in `.gitattributes` files. The --mincopies switch allows temporarily using a different value. -Note that [trusted repositories|trust]] are assumed to +Note that [[trusted repositories|trust]] are assumed to continue to contain content, so checking them is skipped. So dropping content from trusted repositories does risk numcopies and mincopies later being violated.
comment
diff --git a/doc/todo/p2phttp__58___regularly_re-check_for_annex.url_config/comment_3_667ab1f57a53590c89498423f341ae99._comment b/doc/todo/p2phttp__58___regularly_re-check_for_annex.url_config/comment_3_667ab1f57a53590c89498423f341ae99._comment new file mode 100644 index 0000000000..1573eb1d72 --- /dev/null +++ b/doc/todo/p2phttp__58___regularly_re-check_for_annex.url_config/comment_3_667ab1f57a53590c89498423f341ae99._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-11-20T18:36:31Z" + content=""" +Even if it only re-checks when git-annex is going to use the remote +(and not on every run of git-annex) that seems perhaps too often to check. + +But if it checks less often than that, once per day or whatever, there will +of course be a window where it has not yet noticed the change and uses the +cached remote.name.annexUrl and potentially fails. + +A balance might be that if it fails to connect to the remote.name.annexUrl, +it could re-check it then. +"""]]
comment
diff --git a/doc/todo/Delayed_drop_from_remote/comment_4_7cc6ca779086402fd32999441a0e5693._comment b/doc/todo/Delayed_drop_from_remote/comment_4_7cc6ca779086402fd32999441a0e5693._comment new file mode 100644 index 0000000000..75bc668fc1 --- /dev/null +++ b/doc/todo/Delayed_drop_from_remote/comment_4_7cc6ca779086402fd32999441a0e5693._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2025-11-20T18:16:47Z" + content=""" +Yes, the trashbin remote could be private. I think we're in agreement +that's the best way to go. + +--accessedwithin relies on atime, and looks at objects in the local +repository only, so it would not work to find objects in the trashbin +remote. + +I don't think there is anything in preferred content +expressions that would meet your need here exactly. It would probably be +possible to add an expression that matches objects that have been present +in a given repository for a given amount of time. The presence logs do have a +timestamp. + +Of course, if you used a directory special remote you could use +plain old `find`. +"""]]
comment
diff --git a/doc/todo/Ephemeral_special_remotes/comment_2_b806f390d7e4f6a1f9e87173e777a5cb._comment b/doc/todo/Ephemeral_special_remotes/comment_2_b806f390d7e4f6a1f9e87173e777a5cb._comment new file mode 100644 index 0000000000..c03b9653e1 --- /dev/null +++ b/doc/todo/Ephemeral_special_remotes/comment_2_b806f390d7e4f6a1f9e87173e777a5cb._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-11-20T14:05:10Z" + content=""" +There are also some common setup stage tasks that pose problems but could +all be fixed in one place: + +* Encryption setup generates encryption keys. Which is both slow and also + generating an then throwing away an encryption key is the wrong thing to + do. I think this could be dealt with by copying the encryption setup of the + remote that is generating the emphemeral remote into it. +* remote.name.annex-uuid is set in git config by gitConfigSpecialRemote. + Either that could be disabled for ephemerals, or the uuid and name could + also be inherited, which would make that a no-op. +"""]]
comment
diff --git a/doc/todo/Ephemeral_special_remotes/comment_1_8e2082918dff4982d1e6dbb8c2fd1f98._comment b/doc/todo/Ephemeral_special_remotes/comment_1_8e2082918dff4982d1e6dbb8c2fd1f98._comment new file mode 100644 index 0000000000..b509d69bbd --- /dev/null +++ b/doc/todo/Ephemeral_special_remotes/comment_1_8e2082918dff4982d1e6dbb8c2fd1f98._comment @@ -0,0 +1,27 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-11-20T13:34:28Z" + content=""" +The major difficulty in implementing this seems to be the setup stage, +which is the per-special-remote code that runs during +initremote/enableremote. That code can write to disk, or perform +expensive operations. + +A few examples: + +* S3's setup makes 1 http request to verify that the bucket exists + (or about 4 http requests when it needs to create the bucket). + It does additional work when bucket versioning is enabled. +* directory's setup modifies the git config file to set + remote.name.directory. And if that were skipped, generating the directory + special remote would fail, because it reads that git config. + +My gut feeling is that it won't be practical to make it possible to +ephemeralize every type of special remote. But it would not be too +hard to make some subset of special remotes able to be used ephemerally. + +It might be possible to maintain a cache of recently used ephemeral special +remotes across runs of git-annex, and so avoid needing to re-run the setup +stage. +"""]]
comment
diff --git a/doc/todo/Special_remote_redirect_to_URL/comment_1_00ec4c643aaff6f9a6a70a27309aab2f._comment b/doc/todo/Special_remote_redirect_to_URL/comment_1_00ec4c643aaff6f9a6a70a27309aab2f._comment new file mode 100644 index 0000000000..7fb1e5e913 --- /dev/null +++ b/doc/todo/Special_remote_redirect_to_URL/comment_1_00ec4c643aaff6f9a6a70a27309aab2f._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-11-20T13:23:00Z" + content=""" +This seems like a good design to me. It will need a protocol extension +to indicate when a git-annex version supports it. + +It occured to me that when `git-annex p2phttp` is used and is proxying to a +special remote that uses this feature, it would be possible to forward the +redirect to the http client, so the server would not need to download the +object itself. + +A neat optimisation potential, although implementing it would cut across +several things in a way I'm unsure how to do cleanly. + +That did make me wonder though, if the redirect url would always be safe to +share with the client, without granting the client any abilities beyond a +one-time download. And I think that's too big an assumption to make for +this optionisation. Someone could choose to redirect to an url containing +eg, http basic auth, which would be fine when using it all locally, but not +in this proxy situation. So there would need to be an additional configuration +to enable the proxy optimisation. +"""]]
fixed in aws-0.25.1
diff --git a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn index 923209bb04..698fdeaf66 100644 --- a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn +++ b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes.mdwn @@ -95,3 +95,5 @@ initremote: 1 failed Thanks for all your great work, Joey! [[!tag projects/openneuro]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_1_f537c54572a418e7d126594947015a64._comment b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_1_f537c54572a418e7d126594947015a64._comment new file mode 100644 index 0000000000..b5bbf96ef4 --- /dev/null +++ b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_1_f537c54572a418e7d126594947015a64._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-11-19T16:24:33Z" + content=""" +Root caused to this bug: <https://github.com/aristidb/aws/issues/296> + +Seems likely that `git-annex import` from an importtree=yes S3 +remote on GCP is also broken since it also uses getBucket. + +git-annex uses getBucket to probe if the bucket already exists, +which lets it avoid dealing with the various ways that PUT of a bucket can +fail. GCP also has some incompatabilities in how it responds to that, +eg in the above log, it uses a custom "BucketNameUnavailable", +rather than the S3 standard " BucketAlreadyExists". +"""]] diff --git a/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_2_74bab9cd3c546533bd8047ce9342d4c7._comment b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_2_74bab9cd3c546533bd8047ce9342d4c7._comment new file mode 100644 index 0000000000..4bf6cf4205 --- /dev/null +++ b/doc/bugs/S3_remote_fails_for_GCP_with_multiple_prefixes/comment_2_74bab9cd3c546533bd8047ce9342d4c7._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2025-11-19T18:27:45Z" + content=""" +This is fixed in aws-0.25.1. I have made the git-annex stack build use that +version. I also added a build warning when built with an older version, +to hopefully encourage other builds to get updated. +"""]]
S3: Remote can be configured with an x-amz-tagging header.
Needs aws-0.25, which was just released.
stack.yaml: Use aws-0.25.
This risks some foot shooting if the user configures a bucket to hide
objects that have a given tag, and sets up the S3 special remote with it,
but then uses a build of git-annex that does not support sending it.
I considered making git-annex error out if it's built with too old a
version of aws. But that leaves the problem of older versions of git-annex,
which will just ignore the x-amz-tagging config. So it didn't seem worth
the bother to have a new failure mode. The user of this will just need to
be careful of their versions if they are using it in a security context.
The use of parseQueryText is kind of silly, because aws just reassembles it
back to a query string. But it made sense for the aws interface to be
key/value pairs. And it seems to make sense for the git-annex config
interface to be a query string. At least, I could not think of a better way
to handle the config.
Sponsored-by: Dartmouth College's OpenNeuro project
Needs aws-0.25, which was just released.
stack.yaml: Use aws-0.25.
This risks some foot shooting if the user configures a bucket to hide
objects that have a given tag, and sets up the S3 special remote with it,
but then uses a build of git-annex that does not support sending it.
I considered making git-annex error out if it's built with too old a
version of aws. But that leaves the problem of older versions of git-annex,
which will just ignore the x-amz-tagging config. So it didn't seem worth
the bother to have a new failure mode. The user of this will just need to
be careful of their versions if they are using it in a security context.
The use of parseQueryText is kind of silly, because aws just reassembles it
back to a query string. But it made sense for the aws interface to be
key/value pairs. And it seems to make sense for the git-annex config
interface to be a query string. At least, I could not think of a better way
to handle the config.
Sponsored-by: Dartmouth College's OpenNeuro project
diff --git a/CHANGELOG b/CHANGELOG
index f04408a114..b75996aedd 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,11 @@
+git-annex (10.20251118) UNRELEASED; urgency=medium
+
+ * S3: Remote can be configured with an x-amz-tagging header.
+ (Needs aws-0.25)
+ * stack.yaml: Use aws-0.25.
+
+ -- Joey Hess <id@joeyh.name> Tue, 18 Nov 2025 12:34:12 -0400
+
git-annex (10.20251114) upstream; urgency=medium
* p2p --pair: Fix to work with external P2P networks.
diff --git a/Remote/S3.hs b/Remote/S3.hs
index 566c8f5889..e23a281e3a 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -1,6 +1,6 @@
{- S3 remotes
-
- - Copyright 2011-2024 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -108,6 +108,8 @@ remote = specialRemoteType $ RemoteType
(FieldDesc "for path-style requests, set to \"path\"")
, signatureVersionParser signatureField
(FieldDesc "S3 signature version")
+ , optionalStringParser taggingField
+ (FieldDesc "tagging header to add when storing on S3")
, optionalStringParser mungekeysField HiddenField
, optionalStringParser AWS.s3credsField HiddenField
]
@@ -161,6 +163,9 @@ requeststyleField = Accepted "requeststyle"
signatureField :: RemoteConfigField
signatureField = Accepted "signature"
+taggingField :: RemoteConfigField
+taggingField = Accepted "x-amz-tagging"
+
data SignatureVersion
= SignatureVersion Int
| DefaultSignatureVersion
@@ -1017,6 +1022,7 @@ data S3Info = S3Info
, bucketExportLocation :: ExportLocation -> BucketObject
, bucketImportLocation :: BucketObject -> Maybe ImportLocation
, metaHeaders :: [(T.Text, T.Text)]
+ , tagging :: [(T.Text, T.Text)]
, partSize :: Maybe Integer
, isIA :: Bool
, versioning :: Bool
@@ -1039,6 +1045,7 @@ extractS3Info c = do
, bucketExportLocation = getBucketExportLocation c
, bucketImportLocation = getBucketImportLocation c
, metaHeaders = getMetaHeaders c
+ , tagging = getTagging c
, partSize = getPartSize c
, isIA = configIA c
, versioning = fromMaybe False $
@@ -1056,6 +1063,9 @@ putObject info file rbody = (S3.putObject (bucket info) file rbody)
, S3.poMetadata = metaHeaders info
, S3.poAutoMakeBucket = isIA info
, S3.poAcl = acl info
+#if MIN_VERSION_aws(0,25,0)
+ , S3.poTagging = tagging info
+#endif
}
acl :: S3Info -> Maybe S3.CannedAcl
@@ -1083,6 +1093,14 @@ getMetaHeaders = map munge
metaprefixlen = length metaPrefix
munge (k, v) = (T.pack $ drop metaprefixlen (fromProposedAccepted k), T.pack v)
+getTagging :: ParsedRemoteConfig -> [(T.Text, T.Text)]
+getTagging c = case getRemoteConfigValue taggingField c of
+ Nothing -> []
+ Just s -> map go $ parseQueryText (encodeBS s)
+ where
+ go (k, Just v) = (k, v)
+ go (k, Nothing) = (k, mempty)
+
isMetaHeader :: RemoteConfigField -> Bool
isMetaHeader h = metaPrefix `isPrefixOf` fromProposedAccepted h
diff --git a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn
index f8f800558b..cdafe7d7c8 100644
--- a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn
+++ b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header.mdwn
@@ -11,3 +11,5 @@ An example use case is publishing a private dataset where a bucket policy is use
[[!tag projects/openneuro]]
+
+> [[done]] --[[Joey]]
diff --git a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment
index 06f8922f7b..97c20cfe48 100644
--- a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment
+++ b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment
@@ -8,5 +8,12 @@ does not allow setting this header.
I opened an issue <https://github.com/aristidb/aws/issues/294>
-Update: This will need aws-0.25.
+Update: This will need git-annex to be built with aws-0.25. If a S3 special
+remote is configured with this header, and an older version of git-annex
+or a git-annex built with an older version of aws is used, it will just not
+send along the header when storing an object.
+
+So if your use case involves making newly uploaded objects private, you'll
+want to make sure you're always using a build of git-annex that supports
+it.
"""]]
diff --git a/doc/special_remotes/S3.mdwn b/doc/special_remotes/S3.mdwn
index d36dfa1b36..c1f21183fe 100644
--- a/doc/special_remotes/S3.mdwn
+++ b/doc/special_remotes/S3.mdwn
@@ -162,10 +162,14 @@ the S3 remote.
and to "bar/" in another special remote, and both special remotes could
then use the same bucket.
-* `x-amz-meta-*` are passed through as http headers when storing keys
- in S3.
+* `x-amz-meta-*` are passed through as http headers
+ when storing keys in S3.
* `x-archive-meta-*` are passed through as http headers when storing keys
in the Internet Archive. See
[the Internet Archive S3 interface documentation](https://archive.org/help/abouts3.txt)
for example headers.
+
+* `x-amz-tagging` is passed through as a http header
+ when storing keys in S3. (Needs git-annex 10.20251118 or newer
+ built with aws-0.25. Otherwise, the header will *not* be sent.)
diff --git a/git-annex.cabal b/git-annex.cabal
index 44b89db6bc..9da04c3618 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -1,5 +1,5 @@
Name: git-annex
-Version: 10.20251114
+Version: 10.20251118
Cabal-Version: 1.12
License: AGPL-3
Maintainer: Joey Hess <id@joeyh.name>
diff --git a/stack.yaml b/stack.yaml
index f4c26e49ae..4c8e0a9e29 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -13,3 +13,5 @@ flags:
packages:
- '.'
resolver: lts-24.2
+extra-deps:
+- aws-0.25
update
diff --git a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment index a4ad7aadde..06f8922f7b 100644 --- a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment +++ b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment @@ -7,4 +7,6 @@ Implementing this will need changes to the haskell aws library, since it does not allow setting this header. I opened an issue <https://github.com/aristidb/aws/issues/294> + +Update: This will need aws-0.25. """]]
comment
diff --git a/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment new file mode 100644 index 0000000000..a4ad7aadde --- /dev/null +++ b/doc/bugs/S3_remote_should_expose_x-amz-tagging_header/comment_1_1545ba89e48060fe41002499bba742b3._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2025-11-18T16:41:31Z" + content=""" +Implementing this will need changes to the haskell aws library, since it +does not allow setting this header. + +I opened an issue <https://github.com/aristidb/aws/issues/294> +"""]]
remove comments for old release news item
diff --git a/doc/news/version_10.20250416/comment_1_910c84a8ef1d6aed657be912dd68ffbd._comment b/doc/news/version_10.20250416/comment_1_910c84a8ef1d6aed657be912dd68ffbd._comment deleted file mode 100644 index db0be8c0b9..0000000000 --- a/doc/news/version_10.20250416/comment_1_910c84a8ef1d6aed657be912dd68ffbd._comment +++ /dev/null @@ -1,86 +0,0 @@ -[[!comment format=mdwn - username="liam" - avatar="http://cdn.libravatar.org/avatar/5cb416d010c1d3f0ca677dd7f6c822ea" - subject="Corruption? invalid object for uuid.log" - date="2025-04-20T02:32:45Z" - content=""" -Hello, - -I just installed this version (10.20250416) this afternoon. -Now I have the following issue: - -``` -$ git annex info -(recording state in git...) -error: invalid object 100644 f83a2e4115f14218f574058d3b3ccf72e9cfe677 for 'uuid.log' -fatal: git-write-tree: error building trees -git-annex: failed to read sha from git write-tree -``` - -and again with --verbose and --debug - -``` -git annex info --verbose --debug -[2025-04-20 04:27:23.998527] (Utility.Process) process [12752] read: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"show-ref\",\"git-annex\"] -[2025-04-20 04:27:24.042056] (Utility.Process) process [12752] done ExitSuccess -[2025-04-20 04:27:24.042331] (Utility.Process) process [12753] read: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"show-ref\",\"--hash\",\"refs/heads/git-annex\"] -[2025-04-20 04:27:24.046759] (Utility.Process) process [12753] done ExitSuccess -[2025-04-20 04:27:24.051021] (Utility.Process) process [12754] chat: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"hash-object\",\"-w\",\"--no-filters\",\"--stdin-paths\"] -[2025-04-20 04:27:24.051495] (Utility.Process) process [12755] feed: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"update-index\",\"-z\",\"--index-info\"] -[2025-04-20 04:27:24.265152] (Utility.Process) process [12755] done ExitSuccess -[2025-04-20 04:27:24.265928] (Annex.Branch) read transitions.log -[2025-04-20 04:27:24.266808] (Utility.Process) process [12756] chat: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"cat-file\",\"--batch\"] -(recording state in git...) -[2025-04-20 04:27:24.278685] (Utility.Process) process [12757] read: git [\"--git-dir=.git\",\"--work-tree=.\",\"--literal-pathspecs\",\"-c\",\"annex.debug=true\",\"write-tree\"] -error: invalid object 100644 f83a2e4115f14218f574058d3b3ccf72e9cfe677 for 'uuid.log' -fatal: git-write-tree: error building trees -[2025-04-20 04:27:25.065295] (Utility.Process) process [12757] done ExitFailure 128 -git-annex: failed to read sha from git write-tree -``` - -This first showed up when I tried to do: `git annex sync myremote` -Now this error keeps showing up but only in my computer's repo. - -Note: The object id shown doesn't seem to correspond to anything on my other drives. - -I'm worried it might be something in the latest version. - -On the external SSD from which I tried to sync it shows: - -``` -trusted repositories: 0 -semitrusted repositories: 12 -untrusted repositories: 1 -local annex keys: 245972 -local annex size: 2.7 terabytes -annexed files in working tree: 404264 -size of annexed files in working tree: 3.54 terabytes -combined annex size of all repositories: 9.46 terabytes -backend usage: - SHA256E: 404264 -bloom filter size: 32 mebibytes (49.2% full) -``` - -Any idea what this could be? - -Is it safe to fix this by just cloning the repo again and then replacing the annex directory? -I'm not sure how to deal with this since I've never had this error before. - -version information: - -``` -$ git annex version -git-annex version: 10.20250416 -build flags: Pairing TorrentParser MagicMime Servant Benchmark Feeds Testsuite S3 WebDAV -dependency versions: aws-0.24.4 bloomfilter-2.0.1.2 crypton-1.0.4 DAV-1.3.4 feed-1.3.2.1 ghc-9.8.4 http-client-0.7.19 persistent-sqlite-2.13.3.0 torrent-10000.1.3 uuid-1.3.16 -key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL GITBUNDLE GITMANIFEST VURL X* -remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg rclone hook external compute mask -operating system: darwin aarch64 -supported repository versions: 8 9 10 -upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10 -local repository version: 10 -``` - -Thank you, -Liam -"""]] diff --git a/doc/news/version_10.20250416/comment_2_88fc9ef91f9830f67b7658232a727ac4._comment b/doc/news/version_10.20250416/comment_2_88fc9ef91f9830f67b7658232a727ac4._comment deleted file mode 100644 index 54e94ec2bf..0000000000 --- a/doc/news/version_10.20250416/comment_2_88fc9ef91f9830f67b7658232a727ac4._comment +++ /dev/null @@ -1,34 +0,0 @@ -[[!comment format=mdwn - username="liam" - avatar="http://cdn.libravatar.org/avatar/5cb416d010c1d3f0ca677dd7f6c822ea" - subject="Repairing repositories on MacOS" - date="2025-04-20T02:49:48Z" - content=""" -Hi, - -As follow on from the above, I'm trying to repair the repo using the built-in tool from here: [[https://git-annex.branchable.com/git-annex-repair/]] -However, it seems to have problems on MacOS. - -``` -$ git annex repair -repair Running git fsck ... -Fsck found no problems. Checking for broken branches. -fatal: ambiguous argument 'refs/.DS_Store': unknown revision or path not in the working tree. -Use '--' to separate paths from revisions, like this: -'git <command> [<revision>...] -- [<file>...]' -fatal: ambiguous argument 'refs/heads/.DS_Store': unknown revision or path not in the working tree. -Use '--' to separate paths from revisions, like this: -'git <command> [<revision>...] -- [<file>...]' -^C -``` - -Somethings gone quite wrong if those .DS_Store files are showing up there. - -I'd love to dig into the source for git-annex but I don't know Haskell... -Maybe this is a sign I should learn it? - -The `uuid.log` problem in my previous post seems to have a bit of documentation here: [[https://git-annex.branchable.com/internals/]] -Maybe it has something to do with a previously failed clone attempt? I will dig some more tomorrow. - -Liam -"""]] diff --git a/doc/news/version_10.20250416/comment_3_9a9b7aa33311822ab38c00ca7477668b._comment b/doc/news/version_10.20250416/comment_3_9a9b7aa33311822ab38c00ca7477668b._comment deleted file mode 100644 index 6c4d462cee..0000000000 --- a/doc/news/version_10.20250416/comment_3_9a9b7aa33311822ab38c00ca7477668b._comment +++ /dev/null @@ -1,20 +0,0 @@ -[[!comment format=mdwn - username="liam" - avatar="http://cdn.libravatar.org/avatar/5cb416d010c1d3f0ca677dd7f6c822ea" - subject="Fetch from good repo to fix uuid.log issue" - date="2025-04-20T12:32:31Z" - content=""" -Hi, - -So to resolve the .DS_Store issue, I just had to go into the .git repo and remove all the .DS_Store files that MacOS littered around. - -To fix the repo itself, I ended up doing a `git fetch goodremote` and this seems to have fixed the issue. -I can now run `git annex info` again without a problem. - -The weird thing is, it was complaining about `uuid.log`, however, when running `git show git-annex:uuid.log` I was getting the same output on both repos. -It's not clear what went wrong. Maybe corruption but `git fsck` never showed an issue. Fetching from a good repo seems to have fixed it. - -Hopefully this helps if anyone has a similar problem. - -Liam -"""]] diff --git a/doc/news/version_10.20250416/comment_4_b6ac0341afd1c15eed0af4161db9c021._comment b/doc/news/version_10.20250416/comment_4_b6ac0341afd1c15eed0af4161db9c021._comment deleted file mode 100644 index 83d87f4084..0000000000 --- a/doc/news/version_10.20250416/comment_4_b6ac0341afd1c15eed0af4161db9c021._comment +++ /dev/null @@ -1,14 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 4""" - date="2025-05-13T14:55:52Z" - content=""" -First of all, a news item for a release is not the appropriate place to -discuss something like this. Use the [[forum]] in the furture. - -This looks like a `.git/annex/index` that references a git object that for -some reason didn't get written to disk. A common way this can happen is a -disk getting unmounted or system shutdown at a point in time that causes a -recently written git object to get lost. Deleting `.git/annex/index` will -solve this problem. -"""]]
add news item for git-annex 10.20251114
diff --git a/doc/news/version_10.20250721.mdwn b/doc/news/version_10.20250721.mdwn deleted file mode 100644 index 09ca1b73f0..0000000000 --- a/doc/news/version_10.20250721.mdwn +++ /dev/null @@ -1,17 +0,0 @@ -git-annex 10.20250721 released with [[!toggle text="these changes"]] -[[!toggleable text=""" * Improved workaround for git 2.50 bug, avoding an occasional test suite - failure, as well as some situations where an unlocked file did not get - populated when adding another file to the repository with the same - content. - * Add --url option and url= preferred content expression, to match - content that is recorded as present in an url. - * p2phttp: Scan multilevel directories with --directory. - * p2phttp: Added --socket option. - * Fix bug in handling of linked worktrees on filesystems not supporting - symlinks, that caused annexed file content to be stored in the wrong - location inside the git directory, and also caused pointer files to not - get populated. - * fsck: Fix location of annexed files when run in linked worktrees - that have experienced the above bug. - * Fix symlinks generated to annexed content when in adjusted unlocked - branch in a linked worktree on a filesystem not supporting symlinks."""]] \ No newline at end of file diff --git a/doc/news/version_10.20251114.mdwn b/doc/news/version_10.20251114.mdwn new file mode 100644 index 0000000000..63255f2897 --- /dev/null +++ b/doc/news/version_10.20251114.mdwn @@ -0,0 +1,10 @@ +git-annex 10.20251114 released with [[!toggle text="these changes"]] +[[!toggleable text=""" * p2p --pair: Fix to work with external P2P networks. + * p2phttp: Significant robustness fixes for bugs that caused the + server to stall. + * p2phttp: Fix a file descriptor leak. + * p2phttp: Added the --lockedfiles option. + * dropunused: Run the annex.secure-erase-command + (or .git/hooks/secure-erase-annex) when deleting + temp and bad object files. + * remotedaemon: Avoid crashing when run with --debug."""]] \ No newline at end of file