In my tests, it turns out that the file name of the cache file is not exactly the SHA-1 of the URL, but it is the SHA-1 of the URL prefixed by something I believe the source code in mozilla/gecko-dev calls "storageID", and a colon is inserted between the prefix and the URL.
At least on my machine, this storageID starts with O^partitionKey=
and then continues with the URI-encoded version of (https,<domain>),
, where the domain was derived from the URL (skipping subdomain prefixes) and the URI-encoding was done according to RFC3986, i.e. encoding also the parentheses.
Example: https://www.mozilla.org/en-US/ would be turned into 'O^partitionKey=%28https%2Cmozilla.org%29,:https://www.mozilla.org/en-US/
, translating to the file name 1B859BB06A1B06364AF4788A20B4E37A32557DD8.
If this works elsewhere, too, I would believe that it might be a nice feature for FirefoxCache2 to determine the cache file name automatically from the URL.
(Note: If you use multi-account containers, like I do, things seem to get a bit more complicated because the prefix includes something like userContextId=2&
after the O^
and before the partitionKey=
part.)
Since my Python-fu is very, very rusty, let me describe the proposed algorithm in a node.js snippet:
const getCacheFileName = url => {
const match = url.match(/^(https?):\/\/([^/]+\.)?([^./]+\.[^./]+)(\/|$)/)
if (!match) throw new Error(`Unhandled URL: ${url}`)
const sha1 = crypto.createHash('sha1')
sha1.update(`O^partitionKey=${encodeRFC3986URIComponent(`(${match[1]},${match[3]})`)},:${url}`)
return sha1.digest('hex').toUpperCase()
}
This relies on the custom encodeRFC3986URIComponent()
function:
const encodeRFC3986URIComponent = (str) =>
encodeURIComponent(str)
.replace(/[!'()*]/g, c => `%${c.charCodeAt(0).toString(16).toUpperCase()}`)
Here is a not completely polished node.js script that Works For Me™️:
// This script takes a file from
// $LOCALAPPDATA/Mozilla/Firefox/Profiles/*/cache2/entries/ and parses part of
// its contents, in particular the "key" (which consists of a storage ID, a
// colon, and the actual URL).
//
// Alternatively, it takes a URL and determines the file name from it.
(async () => {
const fs = require('fs/promises')
const firefoxProfilesPath = `${process.env.LOCALAPPDATA}/Mozilla/Firefox/Profiles`
let firefoxProfile
const getLatestFirefoxProfile = async () => {
let latestMTime = 0
for (const dir of await fs.readdir(firefoxProfilesPath)) {
const stats = await fs.stat(`${firefoxProfilesPath}/${dir}`)
if (stats.isDirectory() && stats.mtime > latestMTime) {
firefoxProfile = dir
latestMTime = stats.mtime
}
}
}
await getLatestFirefoxProfile()
const encodeRFC3986URIComponent = (str) =>
encodeURIComponent(str)
.replace(/[!'()*]/g, c => `%${c.charCodeAt(0).toString(16).toUpperCase()}`)
const crypto = require('crypto')
const getCacheFileName = url => {
const match = url.match(/^(https?):\/\/([^/]+\.)?([^./]+\.[^./]+)(\/|$)/)
if (!match) throw new Error(`Unhandled URL: ${url}`)
const sha1 = crypto.createHash('sha1')
sha1.update(`O^partitionKey=${encodeRFC3986URIComponent(`(${match[1]},${match[3]})`)},:${url}`)
return sha1.digest('hex').toUpperCase()
}
const getCacheFilePath = url =>
`${firefoxProfilesPath}/${firefoxProfile}/cache2/entries/${getCacheFileName(url)}`
const arg = process.argv[2]
const path = arg.startsWith('https://') ? getCacheFilePath(arg) : arg
const handle = await fs.open(path)
const stat = await handle.stat()
const uint32 = new DataView(new ArrayBuffer(4))
uint32.read = async (offset) => (await handle.read(uint32, 0, 4, offset)).buffer.getUint32()
const realSize = await uint32.read(stat.size - 4)
const chunkCount = ((realSize + (1<<18) - 1) >> 18)
const version = await uint32.read(realSize + 4 + 2 * chunkCount)
const fetchCount = await uint32.read(realSize + 4 + 2 * chunkCount + 4)
const lastFetch = await uint32.read(realSize + 4 + 2 * chunkCount + 8)
const mtime = await uint32.read(realSize + 4 + 2 * chunkCount + 12)
const frecency = await uint32.read(realSize + 4 + 2 * chunkCount + 16)
const expirationTime = await uint32.read(realSize + 4 + 2 * chunkCount + 20)
const keySize = await uint32.read(realSize + 4 + 2 * chunkCount + 24)
const flags = await uint32.read(realSize + 4 + 2 * chunkCount + 28)
const key = (await handle.read(Buffer.alloc(keySize), 0, keySize, realSize + 4 + 2 * chunkCount + 32)).buffer.toString('utf-8')
console.log(`size: ${stat.size}, real size: ${realSize}, chunk count: ${chunkCount}, version: ${version}, fetchCount: ${fetchCount}, lastFetch: ${new Date(lastFetch * 1000)}/${lastFetch}, mtime: ${new Date(mtime * 1000)}, expires: ${new Date(expirationTime * 1000)}, keySize: ${keySize}, flags: ${flags}, key: '${key}'`)
})().catch(console.log)