---- Hash
+--- Hash module with built-in caching.
-- @module generic.hash
--[[
]]
local hash = {}
+local e2lib = require("e2lib")
local eio = require("eio")
local err = require("err")
+local lsha1 = require("lsha1")
local strict = require("strict")
local trace = require("trace")
-local lsha1 = require("lsha1")
+
+--- The hashcache lookup dictionary.
+local hcachedict = false
+--- Path to the persistent storage file.
+local hcachestorage = false
+
+--- Internal hash cache entry dictionary.
+-- @table hce
+-- @field dev See stat.
+-- @field ino See stat.
+-- @field size See stat.
+-- @field mtime See stat.
+-- @field mtime_nsec See stat.
+-- @field ctime See stat.
+-- @field ctime_nsec See stat.
+-- @field hash SHA-1 checksum
+-- @field hit Count cache hits.
+
+--- Load or create the persistent hashcache file.
+-- @param filename Path to hashcache file. If filename does not exists, it
+-- will be created when calling hcache_store().
+-- @return True on success, false on error. Errors only have an effect on
+-- performance, and should ususally be ignored.
+-- @return Error object on failure.
+-- @see hcache_store
+function hash.hcache_load(filename)
+ local rc, re, hctab, chunk, msg
+
+ if hcachedict then
+ return false, err.new("hashcache already initialised")
+ end
+
+ hcachestorage = filename
+
+ hctab = {}
+ chunk, msg = loadfile(filename)
+ if not chunk then
+ return false, err.new("loading hashcache %q failed: %s", filename, msg)
+ end
+
+ -- set empty environment for this chunk
+ setfenv(chunk, {})
+ hctab = chunk()
+ if type(hctab) ~= "table" then
+ return false, err.new("ignoring malformed hashcache %q", filename)
+ end
+
+ for path,hce in pairs(hctab) do
+ if type(path) == "string" and #path > 0
+ and type(hce.hash) == "string" and #hce.hash == 40
+ and type(hce.mtime) == "number"
+ and type(hce.mtime_nsec) == "number"
+ and type(hce.ctime) == "number"
+ and type(hce.ctime_nsec) == "number"
+ and type(hce.size) == "number"
+ and type(hce.dev) == "number"
+ and type(hce.ino) == "number"
+ and type(hce.hit) == "number" then
+
+ if not hcachedict then
+ hcachedict = {}
+ end
+
+ hcachedict[path] = {
+ hash = hce.hash,
+ mtime = hce.mtime,
+ mtime_nsec = hce.mtime_nsec,
+ ctime = hce.ctime,
+ ctime_nsec = hce.ctime_nsec,
+ size = hce.size,
+ dev = hce.dev,
+ ino = hce.ino,
+ hit = hce.hit,
+ }
+ else
+ hcachedict = false
+ return false,
+ err.new("malformed hashcache entry, ignoring %q", filename)
+ end
+ end
+
+ return true
+end
+
+--- Save the hashcache to persistent storage, for later use. The hashcache file
+-- location set by calling hcache_load().
+-- @return True on success, false on error. Errors should usually be ignored.
+-- @return Error object on failure.
+-- @see hcache_load
+function hash.hcache_store()
+ local rc, re, hcachevec, e, out
+
+ if not hcachedict or not hcachestorage then
+ return true
+ end
+
+ hcachevec = {}
+ for path,hce in pairs(hcachedict) do
+ table.insert(hcachevec, {path=path, hce=hce})
+ end
+
+ local function comp(t1, t2)
+ if t1.hce.hit > t2.hce.hit then
+ return true
+ end
+ return false
+ end
+
+ table.sort(hcachevec, comp)
+
+ out = { "return {\n" }
+ for i,v in ipairs(hcachevec) do
+ table.insert(out,
+ string.format(
+ "[%q] = { hash=%q, mtime=%d, mtime_nsec=%d, ctime=%d, " ..
+ "ctime_nsec=%d, size=%d, dev=%d, ino=%d, hit=%d },\n",
+ v.path, v.hce.hash, v.hce.mtime, v.hce.mtime_nsec, v.hce.ctime,
+ v.hce.ctime_nsec, v.hce.size, v.hce.dev, v.hce.ino, v.hce.hit))
+
+ if v.hce.hit == 0 and i > 10000 then
+ break
+ end
+ end
+ table.insert(out, "}\n")
+
+ rc, re = eio.file_write(hcachestorage, table.concat(out))
+ if not rc then
+ e = err.new("writing hashcache file")
+ return false, e:cat(re)
+ end
+
+ return true
+end
--- Create a hash context.
-- @return Hash context object or false on error.
return true
end
---- Hash a file at once.
+--- Lookup the checksum for a file in the hashcache.
+-- @param path Absolute path to the file.
+-- @return Checksum or false if path is not in the cache or an error occured.
+local function hcache_lookup(path)
+ local sb, hce
+
+ if not hcachedict then
+ return false
+ end
+
+ -- Try not to return checksums for files which are inaccessible.
+ if not e2lib.exists(path, false) then
+ return false
+ end
+
+ sb = e2lib.stat(path)
+ if not sb then
+ return false
+ end
+
+ hce = hcachedict[path]
+ if not hce
+ or hce.mtime ~= sb.mtime
+ or hce.mtime_nsec ~= sb.mtime_nsec
+ or hce.ctime ~= sb.ctime
+ or hce.ctime_nsec ~= sb.ctime_nsec
+ or hce.size ~= sb.size
+ or hce.dev ~= sb.dev
+ or hce.ino ~= sb.ino then
+
+ return false
+ end
+
+ hce.hit = hce.hit + 1
+ return hce.hash
+end
+
+--- Add file and checksum to the hashcache.
+-- @param path Path to the file.
+-- @param hash SHA1 checksum string, length 40.
+-- @return True on success, false on error.
+local function hcache_add(path, hash)
+ assert(type(path) == "string" and #path > 0)
+ assert(type(hash) == "string" and #hash == 40)
+
+ local sb
+
+ if not hcachedict then
+ hcachedict = {}
+ end
+
+ sb = e2lib.stat(path)
+ if not sb then
+ return false
+ end
+
+ hcachedict[path] = {
+ hash = hash,
+ mtime = sb.mtime,
+ mtime_nsec = sb.mtime_nsec,
+ ctime = sb.ctime,
+ ctime_nsec = sb.ctime_nsec,
+ size = sb.size,
+ dev = sb.dev,
+ ino = sb.ino,
+ hit = 0,
+ }
+
+ return true
+end
+
+--- Hash a file at once. Unlike hash_file(), this function makes use of a
+-- persistent cache.
-- @param path Full path to the file.
-- @return Checksum string, or false on error.
-- @return Error object on failure.
+-- @see hcache_load
function hash.hash_file_once(path)
local rc, re, hc, cs
+ cs = hcache_lookup(path)
+ if cs then
+ return cs
+ end
+
hc, re = hash.hash_start()
if not hc then
return false, re
return false, re
end
- return hash.hash_finish(hc)
+ cs, re = hash.hash_finish(hc)
+ if not cs then
+ return false, re
+ end
+
+ hcache_add(path, cs)
+ return cs
end
--- Get checksum and release hash context.
return info
end
---- hashcache setup.
-local function hashcache_setup(info)
- local e = err.new("reading hash cache")
- local rc, re
- e2lib.logf(4, "loading hashcache from file: %s", info.hashcache_file)
- info.hashcache = {}
-
- local c, msg = loadfile(info.hashcache_file)
- if not c then
- e2lib.warnf("WHINT", "loading hashcache failed: %s", msg)
- return true
- end
- -- set empty environment for this chunk
- setfenv(c, {})
- local newcache = c()
-
- if type(newcache) ~= "table" then
- e2lib.warnf("WHINT", "ignoring malformed hashcache")
- return true
- end
-
- for id, hce in pairs(newcache) do
- if type(id) == "string" and id:match("([^:]+):(%S+)")
- and type(hce.hash) == "string" and string.len(hce.hash) == 40
- and type(hce.mtime) == "number"
- and type(hce.mtime_nsec) == "number"
- and type(hce.ctime) == "number"
- and type(hce.ctime_nsec) == "number"
- and type(hce.size) == "number"
- and type(hce.dev) == "number"
- and type(hce.ino) == "number" then
-
- info.hashcache[id] = {
- hash = hce.hash,
- mtime = hce.mtime,
- mtime_nsec = hce.mtime_nsec,
- ctime = hce.ctime,
- ctime_nsec = hce.ctime_nsec,
- size = hce.size,
- dev = hce.dev,
- ino = hce.ino,
- }
- else
- e2lib.warnf("WHINT", "ignoring malformed hashcache entry")
- end
- end
-
- return true
-end
-
--- check for configuration syntax compatibility and log informational
-- message including list of supported syntaxes if incompatibility is
-- detected.
e2lib.logf(4, "VERSION: %s", buildconfig.VERSION)
e2lib.logf(4, "VERSIONSTRING: %s", buildconfig.VERSIONSTRING)
+ hash.hcache_load(e2lib.join(info.root, ".e2/hashcache"))
+ -- no error check required
+
--XXX create some policy module where the following policy settings
--XXX and functions reside (server names, paths, etc.)
-- if x86_64 mode is requested.
info.chroot_call_prefix["x86_64"] = ""
- info.hashcache_file = e2lib.join(info.root, ".e2/hashcache")
- rc, re = hashcache_setup(info)
- if not rc then
- return false, e:cat(re)
- end
-
if e2option.opts["check"] then
local f = e2lib.join(info.root, e2lib.globals.e2version_file)
local v, re = e2lib.parse_e2versionfile(f)
return e2tool.dlist_recursive(info, info.project.default_results)
end
---- hash a file addressed by server name and location.
--- @param info info structure
--- @param server the server name
--- @param location file location relative to the server
--- @return string the hash value, nil on error
--- @return nil, an error string on error
-local function hash_file(info, server, location)
- local e = err.new("error hashing file")
- local cache_flags = { cache = true }
- local rc, re = info.cache:cache_file(server, location, cache_flags)
- if not rc then
- return nil, e:cat(re)
- end
- local path, re = info.cache:file_path(server, location, cache_flags)
- if not path then
- return nil, e:cat(re)
- end
- return hash.hash_file_once(path)
-end
-
--- verify that a file addressed by server name and location matches the
-- checksum given in the sha1 parameter.
-- @param info info structure
function e2tool.verify_hash(info, server, location, sha1)
local rc, re
local e = err.new("error verifying checksum")
- local is_sha1, re = hash_file(info, server, location)
+ local is_sha1, re = e2tool.fileid(info, {server=server, location=location})
if not is_sha1 then
return false, e:cat(re)
end
return info.projid
end
---- Write out hashcache file.
--- @param info Info table.
--- @return True on success, false on error.
--- @return Error object on failure.
-local function hashcache_write(info)
- local rc, re, e, out
-
- out = { "return {\n" }
- for k,hce in pairs(info.hashcache) do
- table.insert(out, string.format(
- "[%q] = { hash=%q, mtime=%d, mtime_nsec=%d, ctime=%d, " ..
- "ctime_nsec=%d, size=%d, dev=%d, ino=%d },\n",
- k, hce.hash, hce.mtime, hce.mtime_nsec,
- hce.ctime, hce.ctime_nsec, hce.size, hce.dev, hce.ino))
- end
- table.insert(out, "}\n")
-
- rc, re = eio.file_write(info.hashcache_file, table.concat(out))
- if not rc then
- e = err.new("writing hash cache file")
- return false, e:cat(re)
- end
-
- return true
-end
-
---- hashcache.
-local function hashcache(info, file)
- local e = err.new("getting fileid from hash cache failed")
- local rc, re, fileid
-
- local p, re = info.cache:file_path(file.server, file.location, {})
- if not p then
- return nil, e:cat(re)
- end
- local s, re = e2lib.stat(p)
- if not s then
- return nil, e:cat(re)
- end
-
- local id = string.format("%s:%s", file.server, file.location)
- local hce = info.hashcache[id]
- if hce
- -- We don't just care about the file contents (mtime),
- -- inode changes could make the file inaccessible, so check ctime too
- and s.mtime == hce.mtime
- and s.mtime_nsec == hce.mtime_nsec
- and s.ctime == hce.ctime
- and s.ctime_nsec == hce.ctime_nsec
- and s.size == hce.size
- and s.dev == hce.dev
- and s.ino == hce.ino then
- assert(type(hce.hash) == "string" and string.len(hce.hash) == 40)
- return hce.hash
- end
-
- local fileid
- fileid, re = hash_file(info, file.server, file.location)
- if not fileid then
- return nil, e:cat(re)
- end
-
- assert(type(fileid) == "string" and string.len(fileid) == 40)
- hce = {
- hash = fileid,
- mtime = s.mtime,
- mtime_nsec = s.mtime_nsec,
- ctime = s.ctime,
- ctime_nsec = s.ctime_nsec,
- size = s.size,
- dev = s.dev,
- ino = s.ino,
- }
- -- update hashcache and the hashcachefile
- -- TBD: mark hashcache dirty and write hashcachefile once.
- info.hashcache[id] = hce
- hashcache_write(info) -- an error here is not fatal
-
- return fileid
-end
-
--- verify that remote files match the checksum. The check is skipped when
-- check-remote is not enabled or cache is not enabled.
-- @param info
e:cat(err.new("Could not extract digest from digest table"))
end
elseif u.transport == "file" then
- hc, re = hash.hash_start()
- if not hc then
- return false, e:cat(re)
- end
- rc, re = hash.hash_file(hc, e2lib.join("/", u.path))
- if not rc then
- return false, e:cat(re)
- end
- remote_fileid, re = hash.hash_finish(hc)
+ remote_fileid, re = hash.hash_file_once(e2lib.join("/", u.path))
if not remote_fileid then
return false, e:cat(re)
end
return false, e:cat(re)
end
- hc, re = hash.hash_start()
- if not hc then
- return false, e:cat(re)
- end
- rc, re = hash.hash_file(hc, tmpfile)
- if not rc then
- return false, e:cat(re)
- end
- remote_fileid, re = hash.hash_finish(hc)
+ remote_fileid, re = hash.hash_file_once(tmpfile)
if not remote_fileid then
return false, e:cat(re)
end
-- @return fileid string: hash value, or nil
-- @return an error object on failure
function e2tool.fileid(info, file)
- local fileid
- local re
- local e = err.new("error calculating file id for file: %s:%s",
- file.server, file.location)
+ local rc, re, e, fileid, path
+ local cache_flags = { cache = true }
+
+ e = err.new("error calculating file id for file: %s:%s",
+ file.server, file.location)
+
if file.sha1 then
fileid = file.sha1
else
- fileid, re = hashcache(info, file)
+ rc, re = info.cache:cache_file(file.server, file.location, cache_flags)
+ if not rc then
+ return false, e:cat(re)
+ end
+
+ path, re = info.cache:file_path(file.server, file.location, cache_flags)
+ if not path then
+ return false, e:cat(re)
+ end
+
+ fileid, re = hash.hash_file_once(path)
if not fileid then
- return nil, e:cat(re)
+ return false, e:cat(re)
end
end
- local rc, re = verify_remote_fileid(info, file, fileid)
+
+ rc, re = verify_remote_fileid(info, file, fileid)
if not rc then
- return nil, re
+ return false, e:cat(re)
end
+
return fileid
end