diff options
| -rw-r--r-- | cache.c | 427 | ||||
| -rw-r--r-- | cache.h | 38 | ||||
| -rw-r--r-- | cgit.c | 166 | ||||
| -rw-r--r-- | cgit.h | 1 | ||||
| -rw-r--r-- | cmd.c | 21 | ||||
| -rwxr-xr-x | tests/setup.sh | 2 | ||||
| -rwxr-xr-x | tests/t0020-validate-cache.sh | 67 |
7 files changed, 506 insertions, 216 deletions
| @@ -4,117 +4,408 @@ | |||
| 4 | * | 4 | * |
| 5 | * Licensed under GNU General Public License v2 | 5 | * Licensed under GNU General Public License v2 |
| 6 | * (see COPYING for full license text) | 6 | * (see COPYING for full license text) |
| 7 | * | ||
| 8 | * | ||
| 9 | * The cache is just a directory structure where each file is a cache slot, | ||
| 10 | * and each filename is based on the hash of some key (e.g. the cgit url). | ||
| 11 | * Each file contains the full key followed by the cached content for that | ||
| 12 | * key. | ||
| 13 | * | ||
| 7 | */ | 14 | */ |
| 8 | 15 | ||
| 9 | #include "cgit.h" | 16 | #include "cgit.h" |
| 10 | #include "cache.h" | 17 | #include "cache.h" |
| 11 | 18 | ||
| 12 | const int NOLOCK = -1; | 19 | #define CACHE_BUFSIZE (1024 * 4) |
| 13 | 20 | ||
| 14 | char *cache_safe_filename(const char *unsafe) | 21 | struct cache_slot { |
| 22 | const char *key; | ||
| 23 | int keylen; | ||
| 24 | int ttl; | ||
| 25 | cache_fill_fn fn; | ||
| 26 | void *cbdata; | ||
| 27 | int cache_fd; | ||
| 28 | int lock_fd; | ||
| 29 | const char *cache_name; | ||
| 30 | const char *lock_name; | ||
| 31 | int match; | ||
| 32 | struct stat cache_st; | ||
| 33 | struct stat lock_st; | ||
| 34 | int bufsize; | ||
| 35 | char buf[CACHE_BUFSIZE]; | ||
| 36 | }; | ||
| 37 | |||
| 38 | /* Open an existing cache slot and fill the cache buffer with | ||
| 39 | * (part of) the content of the cache file. Return 0 on success | ||
| 40 | * and errno otherwise. | ||
| 41 | */ | ||
| 42 | static int open_slot(struct cache_slot *slot) | ||
| 15 | { | 43 | { |
| 16 | static char buf[4][PATH_MAX]; | 44 | char *bufz; |
| 17 | static int bufidx; | 45 | int bufkeylen = -1; |
| 18 | char *s; | 46 | |
| 19 | char c; | 47 | slot->cache_fd = open(slot->cache_name, O_RDONLY); |
| 20 | 48 | if (slot->cache_fd == -1) | |
| 21 | bufidx++; | 49 | return errno; |
| 22 | bufidx &= 3; | 50 | |
| 23 | s = buf[bufidx]; | 51 | if (fstat(slot->cache_fd, &slot->cache_st)) |
| 24 | 52 | return errno; | |
| 25 | while(unsafe && (c = *unsafe++) != 0) { | 53 | |
| 26 | if (c == '/' || c == ' ' || c == '&' || c == '|' || | 54 | slot->bufsize = read(slot->cache_fd, slot->buf, sizeof(slot->buf)); |
| 27 | c == '>' || c == '<' || c == '.') | 55 | if (slot->bufsize == 0) |
| 28 | c = '_'; | 56 | return errno; |
| 29 | *s++ = c; | 57 | |
| 30 | } | 58 | bufz = memchr(slot->buf, 0, slot->bufsize); |
| 31 | *s = '\0'; | 59 | if (bufz) |
| 32 | return buf[bufidx]; | 60 | bufkeylen = bufz - slot->buf; |
| 61 | |||
| 62 | slot->match = bufkeylen == slot->keylen && | ||
| 63 | !memcmp(slot->key, slot->buf, bufkeylen + 1); | ||
| 64 | |||
| 65 | return 0; | ||
| 33 | } | 66 | } |
| 34 | 67 | ||
| 35 | int cache_exist(struct cacheitem *item) | 68 | /* Close the active cache slot */ |
| 69 | static void close_slot(struct cache_slot *slot) | ||
| 36 | { | 70 | { |
| 37 | if (stat(item->name, &item->st)) { | 71 | if (slot->cache_fd > 0) { |
| 38 | item->st.st_mtime = 0; | 72 | close(slot->cache_fd); |
| 39 | return 0; | 73 | slot->cache_fd = -1; |
| 40 | } | 74 | } |
| 41 | return 1; | ||
| 42 | } | 75 | } |
| 43 | 76 | ||
| 44 | int cache_create_dirs() | 77 | /* Print the content of the active cache slot (but skip the key). */ |
| 78 | static int print_slot(struct cache_slot *slot) | ||
| 45 | { | 79 | { |
| 46 | char *path; | 80 | ssize_t i, j = 0; |
| 81 | |||
| 82 | i = lseek(slot->cache_fd, slot->keylen + 1, SEEK_SET); | ||
| 83 | if (i != slot->keylen + 1) | ||
| 84 | return errno; | ||
| 85 | |||
| 86 | while((i=read(slot->cache_fd, slot->buf, sizeof(slot->buf))) > 0) | ||
| 87 | j = write(STDOUT_FILENO, slot->buf, i); | ||
| 47 | 88 | ||
| 48 | path = fmt("%s", ctx.cfg.cache_root); | 89 | if (j < 0) |
| 49 | if (mkdir(path, S_IRWXU) && errno!=EEXIST) | 90 | return errno; |
| 91 | else | ||
| 50 | return 0; | 92 | return 0; |
| 93 | } | ||
| 51 | 94 | ||
| 52 | if (!ctx.repo) | 95 | /* Check if the slot has expired */ |
| 96 | static int is_expired(struct cache_slot *slot) | ||
| 97 | { | ||
| 98 | if (slot->ttl < 0) | ||
| 53 | return 0; | 99 | return 0; |
| 100 | else | ||
| 101 | return slot->cache_st.st_mtime + slot->ttl*60 < time(NULL); | ||
| 102 | } | ||
| 54 | 103 | ||
| 55 | path = fmt("%s/%s", ctx.cfg.cache_root, | 104 | /* Check if the slot has been modified since we opened it. |
| 56 | cache_safe_filename(ctx.repo->url)); | 105 | * NB: If stat() fails, we pretend the file is modified. |
| 106 | */ | ||
| 107 | static int is_modified(struct cache_slot *slot) | ||
| 108 | { | ||
| 109 | struct stat st; | ||
| 57 | 110 | ||
| 58 | if (mkdir(path, S_IRWXU) && errno!=EEXIST) | 111 | if (stat(slot->cache_name, &st)) |
| 59 | return 0; | 112 | return 1; |
| 113 | return (st.st_ino != slot->cache_st.st_ino || | ||
| 114 | st.st_mtime != slot->cache_st.st_mtime || | ||
| 115 | st.st_size != slot->cache_st.st_size); | ||
| 116 | } | ||
| 60 | 117 | ||
| 61 | if (ctx.qry.page) { | 118 | /* Close an open lockfile */ |
| 62 | path = fmt("%s/%s/%s", ctx.cfg.cache_root, | 119 | static void close_lock(struct cache_slot *slot) |
| 63 | cache_safe_filename(ctx.repo->url), | 120 | { |
| 64 | ctx.qry.page); | 121 | if (slot->lock_fd > 0) { |
| 65 | if (mkdir(path, S_IRWXU) && errno!=EEXIST) | 122 | close(slot->lock_fd); |
| 66 | return 0; | 123 | slot->lock_fd = -1; |
| 67 | } | 124 | } |
| 68 | return 1; | ||
| 69 | } | 125 | } |
| 70 | 126 | ||
| 71 | int cache_refill_overdue(const char *lockfile) | 127 | /* Create a lockfile used to store the generated content for a cache |
| 128 | * slot, and write the slot key + \0 into it. | ||
| 129 | * Returns 0 on success and errno otherwise. | ||
| 130 | */ | ||
| 131 | static int lock_slot(struct cache_slot *slot) | ||
| 72 | { | 132 | { |
| 73 | struct stat st; | 133 | slot->lock_fd = open(slot->lock_name, O_RDWR|O_CREAT|O_EXCL, |
| 134 | S_IRUSR|S_IWUSR); | ||
| 135 | if (slot->lock_fd == -1) | ||
| 136 | return errno; | ||
| 137 | write(slot->lock_fd, slot->key, slot->keylen + 1); | ||
| 138 | return 0; | ||
| 139 | } | ||
| 74 | 140 | ||
| 75 | if (stat(lockfile, &st)) | 141 | /* Release the current lockfile. If `replace_old_slot` is set the |
| 76 | return 0; | 142 | * lockfile replaces the old cache slot, otherwise the lockfile is |
| 143 | * just deleted. | ||
| 144 | */ | ||
| 145 | static int unlock_slot(struct cache_slot *slot, int replace_old_slot) | ||
| 146 | { | ||
| 147 | int err; | ||
| 148 | |||
| 149 | if (replace_old_slot) | ||
| 150 | err = rename(slot->lock_name, slot->cache_name); | ||
| 77 | else | 151 | else |
| 78 | return (time(NULL) - st.st_mtime > ctx.cfg.cache_max_create_time); | 152 | err = unlink(slot->lock_name); |
| 153 | return err; | ||
| 79 | } | 154 | } |
| 80 | 155 | ||
| 81 | int cache_lock(struct cacheitem *item) | 156 | /* Generate the content for the current cache slot by redirecting |
| 157 | * stdout to the lock-fd and invoking the callback function | ||
| 158 | */ | ||
| 159 | static int fill_slot(struct cache_slot *slot) | ||
| 82 | { | 160 | { |
| 83 | int i = 0; | 161 | int tmp; |
| 84 | char *lockfile = xstrdup(fmt("%s.lock", item->name)); | ||
| 85 | 162 | ||
| 86 | top: | 163 | /* Preserve stdout */ |
| 87 | if (++i > ctx.cfg.max_lock_attempts) | 164 | tmp = dup(STDOUT_FILENO); |
| 88 | die("cache_lock: unable to lock %s: %s", | 165 | if (tmp == -1) |
| 89 | item->name, strerror(errno)); | 166 | return errno; |
| 90 | 167 | ||
| 91 | item->fd = open(lockfile, O_WRONLY|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR); | 168 | /* Redirect stdout to lockfile */ |
| 169 | if (dup2(slot->lock_fd, STDOUT_FILENO) == -1) | ||
| 170 | return errno; | ||
| 92 | 171 | ||
| 93 | if (item->fd == NOLOCK && errno == ENOENT && cache_create_dirs()) | 172 | /* Generate cache content */ |
| 94 | goto top; | 173 | slot->fn(slot->cbdata); |
| 95 | 174 | ||
| 96 | if (item->fd == NOLOCK && errno == EEXIST && | 175 | /* Restore stdout */ |
| 97 | cache_refill_overdue(lockfile) && !unlink(lockfile)) | 176 | if (dup2(tmp, STDOUT_FILENO) == -1) |
| 98 | goto top; | 177 | return errno; |
| 99 | 178 | ||
| 100 | free(lockfile); | 179 | /* Close the temporary filedescriptor */ |
| 101 | return (item->fd > 0); | 180 | close(tmp); |
| 181 | return 0; | ||
| 102 | } | 182 | } |
| 103 | 183 | ||
| 104 | int cache_unlock(struct cacheitem *item) | 184 | /* Crude implementation of 32-bit FNV-1 hash algorithm, |
| 185 | * see http://www.isthe.com/chongo/tech/comp/fnv/ for details | ||
| 186 | * about the magic numbers. | ||
| 187 | */ | ||
| 188 | #define FNV_OFFSET 0x811c9dc5 | ||
| 189 | #define FNV_PRIME 0x01000193 | ||
| 190 | |||
| 191 | unsigned long hash_str(const char *str) | ||
| 105 | { | 192 | { |
| 106 | close(item->fd); | 193 | unsigned long h = FNV_OFFSET; |
| 107 | return (rename(fmt("%s.lock", item->name), item->name) == 0); | 194 | unsigned char *s = (unsigned char *)str; |
| 195 | |||
| 196 | if (!s) | ||
| 197 | return h; | ||
| 198 | |||
| 199 | while(*s) { | ||
| 200 | h *= FNV_PRIME; | ||
| 201 | h ^= *s++; | ||
| 202 | } | ||
| 203 | return h; | ||
| 108 | } | 204 | } |
| 109 | 205 | ||
| 110 | int cache_cancel_lock(struct cacheitem *item) | 206 | static int process_slot(struct cache_slot *slot) |
| 111 | { | 207 | { |
| 112 | return (unlink(fmt("%s.lock", item->name)) == 0); | 208 | int err; |
| 209 | |||
| 210 | err = open_slot(slot); | ||
| 211 | if (!err && slot->match) { | ||
| 212 | if (is_expired(slot)) { | ||
| 213 | if (!lock_slot(slot)) { | ||
| 214 | /* If the cachefile has been replaced between | ||
| 215 | * `open_slot` and `lock_slot`, we'll just | ||
| 216 | * serve the stale content from the original | ||
| 217 | * cachefile. This way we avoid pruning the | ||
| 218 | * newly generated slot. The same code-path | ||
| 219 | * is chosen if fill_slot() fails for some | ||
| 220 | * reason. | ||
| 221 | * | ||
| 222 | * TODO? check if the new slot contains the | ||
| 223 | * same key as the old one, since we would | ||
| 224 | * prefer to serve the newest content. | ||
| 225 | * This will require us to open yet another | ||
| 226 | * file-descriptor and read and compare the | ||
| 227 | * key from the new file, so for now we're | ||
| 228 | * lazy and just ignore the new file. | ||
| 229 | */ | ||
| 230 | if (is_modified(slot) || fill_slot(slot)) { | ||
| 231 | unlock_slot(slot, 0); | ||
| 232 | close_lock(slot); | ||
| 233 | } else { | ||
| 234 | close_slot(slot); | ||
| 235 | unlock_slot(slot, 1); | ||
| 236 | slot->cache_fd = slot->lock_fd; | ||
| 237 | } | ||
| 238 | } | ||
| 239 | } | ||
| 240 | print_slot(slot); | ||
| 241 | close_slot(slot); | ||
| 242 | return 0; | ||
| 243 | } | ||
| 244 | |||
| 245 | /* If the cache slot does not exist (or its key doesn't match the | ||
| 246 | * current key), lets try to create a new cache slot for this | ||
| 247 | * request. If this fails (for whatever reason), lets just generate | ||
| 248 | * the content without caching it and fool the caller to belive | ||
| 249 | * everything worked out (but print a warning on stdout). | ||
| 250 | */ | ||
| 251 | |||
| 252 | close_slot(slot); | ||
| 253 | if ((err = lock_slot(slot)) != 0) { | ||
| 254 | cache_log("[cgit] Unable to lock slot %s: %s (%d)\n", | ||
| 255 | slot->lock_name, strerror(err), err); | ||
| 256 | slot->fn(slot->cbdata); | ||
| 257 | return 0; | ||
| 258 | } | ||
| 259 | |||
| 260 | if ((err = fill_slot(slot)) != 0) { | ||
| 261 | cache_log("[cgit] Unable to fill slot %s: %s (%d)\n", | ||
| 262 | slot->lock_name, strerror(err), err); | ||
| 263 | unlock_slot(slot, 0); | ||
| 264 | close_lock(slot); | ||
| 265 | slot->fn(slot->cbdata); | ||
| 266 | return 0; | ||
| 267 | } | ||
| 268 | // We've got a valid cache slot in the lock file, which | ||
| 269 | // is about to replace the old cache slot. But if we | ||
| 270 | // release the lockfile and then try to open the new cache | ||
| 271 | // slot, we might get a race condition with a concurrent | ||
| 272 | // writer for the same cache slot (with a different key). | ||
| 273 | // Lets avoid such a race by just printing the content of | ||
| 274 | // the lock file. | ||
| 275 | slot->cache_fd = slot->lock_fd; | ||
| 276 | unlock_slot(slot, 1); | ||
| 277 | err = print_slot(slot); | ||
| 278 | close_slot(slot); | ||
| 279 | return err; | ||
| 113 | } | 280 | } |
| 114 | 281 | ||
| 115 | int cache_expired(struct cacheitem *item) | 282 | /* Print cached content to stdout, generate the content if necessary. */ |
| 283 | int cache_process(int size, const char *path, const char *key, int ttl, | ||
| 284 | cache_fill_fn fn, void *cbdata) | ||
| 116 | { | 285 | { |
| 117 | if (item->ttl < 0) | 286 | unsigned long hash; |
| 287 | int len, i; | ||
| 288 | char filename[1024]; | ||
| 289 | char lockname[1024 + 5]; /* 5 = ".lock" */ | ||
| 290 | struct cache_slot slot; | ||
| 291 | |||
| 292 | /* If the cache is disabled, just generate the content */ | ||
| 293 | if (size <= 0) { | ||
| 294 | fn(cbdata); | ||
| 295 | return 0; | ||
| 296 | } | ||
| 297 | |||
| 298 | /* Verify input, calculate filenames */ | ||
| 299 | if (!path) { | ||
| 300 | cache_log("[cgit] Cache path not specified, caching is disabled\n"); | ||
| 301 | fn(cbdata); | ||
| 118 | return 0; | 302 | return 0; |
| 119 | return item->st.st_mtime + item->ttl * 60 < time(NULL); | 303 | } |
| 304 | len = strlen(path); | ||
| 305 | if (len > sizeof(filename) - 10) { /* 10 = "/01234567\0" */ | ||
| 306 | cache_log("[cgit] Cache path too long, caching is disabled: %s\n", | ||
| 307 | path); | ||
| 308 | fn(cbdata); | ||
| 309 | return 0; | ||
| 310 | } | ||
| 311 | if (!key) | ||
| 312 | key = ""; | ||
| 313 | hash = hash_str(key) % size; | ||
| 314 | strcpy(filename, path); | ||
| 315 | if (filename[len - 1] != '/') | ||
| 316 | filename[len++] = '/'; | ||
| 317 | for(i = 0; i < 8; i++) { | ||
| 318 | sprintf(filename + len++, "%x", | ||
| 319 | (unsigned char)(hash & 0xf)); | ||
| 320 | hash >>= 4; | ||
| 321 | } | ||
| 322 | filename[len] = '\0'; | ||
| 323 | strcpy(lockname, filename); | ||
| 324 | strcpy(lockname + len, ".lock"); | ||
| 325 | slot.fn = fn; | ||
| 326 | slot.cbdata = cbdata; | ||
| 327 | slot.ttl = ttl; | ||
| 328 | slot.cache_name = filename; | ||
| 329 | slot.lock_name = lockname; | ||
| 330 | slot.key = key; | ||
| 331 | slot.keylen = strlen(key); | ||
| 332 | return process_slot(&slot); | ||
| 120 | } | 333 | } |
| 334 | |||
| 335 | /* Return a strftime formatted date/time | ||
| 336 | * NB: the result from this function is to shared memory | ||
| 337 | */ | ||
| 338 | char *sprintftime(const char *format, time_t time) | ||
| 339 | { | ||
| 340 | static char buf[64]; | ||
| 341 | struct tm *tm; | ||
| 342 | |||
| 343 | if (!time) | ||
| 344 | return NULL; | ||
| 345 | tm = gmtime(&time); | ||
| 346 | strftime(buf, sizeof(buf)-1, format, tm); | ||
| 347 | return buf; | ||
| 348 | } | ||
| 349 | |||
| 350 | int cache_ls(const char *path) | ||
| 351 | { | ||
| 352 | DIR *dir; | ||
| 353 | struct dirent *ent; | ||
| 354 | int err = 0; | ||
| 355 | struct cache_slot slot; | ||
| 356 | char fullname[1024]; | ||
| 357 | char *name; | ||
| 358 | |||
| 359 | if (!path) { | ||
| 360 | cache_log("[cgit] cache path not specified\n"); | ||
