---- Hash module with built-in caching.
--- @module generic.hash
+--- Hash module.
+-- @module local.hash
-- Copyright (C) 2007-2016 emlix GmbH, see file AUTHORS
--
local strict = require("strict")
local trace = require("trace")
---- The hashcache lookup dictionary.
-local hcachedict = false
---- Path to the persistent storage file.
-local hcachestorage = false
-
---- Internal hash cache entry dictionary.
--- @table hce
--- @field dev See stat.
--- @field ino See stat.
--- @field size See stat.
--- @field mtime See stat.
--- @field mtime_nsec See stat.
--- @field ctime See stat.
--- @field ctime_nsec See stat.
--- @field hash SHA-1 checksum
--- @field hit Count cache hits.
-
---- Load or create the persistent hashcache file.
--- @param filename Path to hashcache file. If filename does not exists, it
--- will be created when calling hcache_store().
--- @return True on success, false on error. Errors only have an effect on
--- performance, and should ususally be ignored.
--- @return Error object on failure.
--- @see hcache_store
-function hash.hcache_load(filename)
- local rc, re, hctab, chunk, msg
-
- if hcachedict then
- return false, err.new("hashcache already initialised")
- end
-
- hcachestorage = filename
-
- hctab = {}
- chunk, msg = loadfile(filename)
- if not chunk then
- return false, err.new("loading hashcache %q failed: %s", filename, msg)
- end
-
- -- set empty environment for this chunk
- setfenv(chunk, {})
- hctab = chunk()
- if type(hctab) ~= "table" then
- return false, err.new("ignoring malformed hashcache %q", filename)
- end
-
- for path,hce in pairs(hctab) do
- if type(path) == "string" and #path > 0
- and type(hce.hash) == "string" and #hce.hash == 40
- and type(hce.mtime) == "number"
- and type(hce.mtime_nsec) == "number"
- and type(hce.ctime) == "number"
- and type(hce.ctime_nsec) == "number"
- and type(hce.size) == "number"
- and type(hce.dev) == "number"
- and type(hce.ino) == "number"
- and type(hce.hit) == "number" then
-
- if not hcachedict then
- hcachedict = {}
- end
-
- hcachedict[path] = {
- hash = hce.hash,
- mtime = hce.mtime,
- mtime_nsec = hce.mtime_nsec,
- ctime = hce.ctime,
- ctime_nsec = hce.ctime_nsec,
- size = hce.size,
- dev = hce.dev,
- ino = hce.ino,
- hit = hce.hit,
- }
- else
- hcachedict = false
- return false,
- err.new("malformed hashcache entry, ignoring %q", filename)
- end
- end
-
- return true
-end
-
---- Save the hashcache to persistent storage, for later use. The hashcache file
--- location set by calling hcache_load().
--- @return True on success, false on error. Errors should usually be ignored.
--- @return Error object on failure.
--- @see hcache_load
-function hash.hcache_store()
- local rc, re, hcachevec, e, out
-
- if not hcachedict or not hcachestorage then
- return true
- end
-
- hcachevec = {}
- for path,hce in pairs(hcachedict) do
- table.insert(hcachevec, {path=path, hce=hce})
- end
-
- local function comp(t1, t2)
- if t1.hce.hit > t2.hce.hit then
- return true
- end
- return false
- end
-
- table.sort(hcachevec, comp)
-
- out = { "return {\n" }
- for i,v in ipairs(hcachevec) do
- table.insert(out,
- string.format(
- "[%q] = { hash=%q, mtime=%d, mtime_nsec=%d, ctime=%d, " ..
- "ctime_nsec=%d, size=%d, dev=%d, ino=%d, hit=%d },\n",
- v.path, v.hce.hash, v.hce.mtime, v.hce.mtime_nsec, v.hce.ctime,
- v.hce.ctime_nsec, v.hce.size, v.hce.dev, v.hce.ino, v.hce.hit))
-
- if v.hce.hit == 0 and i > 10000 then
- break
- end
- end
- table.insert(out, "}\n")
-
- rc, re = eio.file_write(hcachestorage, table.concat(out))
- if not rc then
- e = err.new("writing hashcache file")
- return false, e:cat(re)
- end
-
- return true
-end
-
--- Create a hash context. Throws error object on failure.
-- @return Hash context object.
function hash.hash_start()
hash.hash_append(hc, data .. "\n")
end
---- Lookup the checksum for a file in the hashcache.
--- @param path Absolute path to the file.
--- @return Checksum or false if path is not in the cache or an error occured.
-local function hcache_lookup(path)
- local sb, hce
-
- if not hcachedict then
- return false
- end
-
- -- Try not to return checksums for files which are inaccessible.
- if not e2lib.exists(path, false) then
- return false
- end
-
- sb = e2lib.stat(path)
- if not sb then
- return false
- end
-
- hce = hcachedict[path]
- if not hce
- or hce.mtime ~= sb.mtime
- or hce.mtime_nsec ~= sb.mtime_nsec
- or hce.ctime ~= sb.ctime
- or hce.ctime_nsec ~= sb.ctime_nsec
- or hce.size ~= sb.size
- or hce.dev ~= sb.dev
- or hce.ino ~= sb.ino then
-
- return false
- end
-
- hce.hit = hce.hit + 1
- return hce.hash
-end
-
---- Add file and checksum to the hashcache.
--- @param path Path to the file.
--- @param hash SHA1 checksum string, length 40.
--- @return True on success, false on error.
-local function hcache_add(path, hash)
- assert(type(path) == "string" and #path > 0)
- assert(type(hash) == "string" and #hash == 40)
-
- local sb
-
- if not hcachedict then
- hcachedict = {}
- end
-
- sb = e2lib.stat(path)
- if not sb then
- return false
- end
-
- hcachedict[path] = {
- hash = hash,
- mtime = sb.mtime,
- mtime_nsec = sb.mtime_nsec,
- ctime = sb.ctime,
- ctime_nsec = sb.ctime_nsec,
- size = sb.size,
- dev = sb.dev,
- ino = sb.ino,
- hit = 0,
- }
-
- return true
-end
-
--- Get checksum and release hash context. Throws error object on failure.
-- @param hc the hash context
-- @return SHA1 Checksum.