diff options
-rw-r--r-- | cache.c | 359 | ||||
-rw-r--r-- | cache.h | 33 | ||||
-rw-r--r-- | cgit.c | 166 | ||||
-rw-r--r-- | cgit.h | 1 | ||||
-rwxr-xr-x | tests/setup.sh | 2 | ||||
-rwxr-xr-x | tests/t0020-validate-cache.sh | 67 |
6 files changed, 418 insertions, 210 deletions
@@ -4,117 +4,340 @@ | |||
4 | * | 4 | * |
5 | * Licensed under GNU General Public License v2 | 5 | * Licensed under GNU General Public License v2 |
6 | * (see COPYING for full license text) | 6 | * (see COPYING for full license text) |
7 | * | ||
8 | * | ||
9 | * The cache is just a directory structure where each file is a cache slot, | ||
10 | * and each filename is based on the hash of some key (e.g. the cgit url). | ||
11 | * Each file contains the full key followed by the cached content for that | ||
12 | * key. | ||
13 | * | ||
7 | */ | 14 | */ |
8 | 15 | ||
9 | #include "cgit.h" | 16 | #include "cgit.h" |
10 | #include "cache.h" | 17 | #include "cache.h" |
11 | 18 | ||
12 | const int NOLOCK = -1; | 19 | #define CACHE_BUFSIZE (1024 * 4) |
20 | |||
21 | struct cache_slot { | ||
22 | const char *key; | ||
23 | int keylen; | ||
24 | int ttl; | ||
25 | cache_fill_fn fn; | ||
26 | void *cbdata; | ||
27 | int cache_fd; | ||
28 | int lock_fd; | ||
29 | const char *cache_name; | ||
30 | const char *lock_name; | ||
31 | int match; | ||
32 | struct stat cache_st; | ||
33 | struct stat lock_st; | ||
34 | int bufsize; | ||
35 | char buf[CACHE_BUFSIZE]; | ||
36 | }; | ||
13 | 37 | ||
14 | char *cache_safe_filename(const char *unsafe) | 38 | /* Open an existing cache slot and fill the cache buffer with |
39 | * (part of) the content of the cache file. Return 0 on success | ||
40 | * and errno otherwise. | ||
41 | */ | ||
42 | static int open_slot(struct cache_slot *slot) | ||
15 | { | 43 | { |
16 | static char buf[4][PATH_MAX]; | 44 | char *bufz; |
17 | static int bufidx; | 45 | int bufkeylen = -1; |
18 | char *s; | 46 | |
19 | char c; | 47 | slot->cache_fd = open(slot->cache_name, O_RDONLY); |
20 | 48 | if (slot->cache_fd == -1) | |
21 | bufidx++; | 49 | return errno; |
22 | bufidx &= 3; | 50 | |
23 | s = buf[bufidx]; | 51 | if (fstat(slot->cache_fd, &slot->cache_st)) |
24 | 52 | return errno; | |
25 | while(unsafe && (c = *unsafe++) != 0) { | 53 | |
26 | if (c == '/' || c == ' ' || c == '&' || c == '|' || | 54 | slot->bufsize = read(slot->cache_fd, slot->buf, sizeof(slot->buf)); |
27 | c == '>' || c == '<' || c == '.') | 55 | if (slot->bufsize == 0) |
28 | c = '_'; | 56 | return errno; |
29 | *s++ = c; | 57 | |
30 | } | 58 | bufz = memchr(slot->buf, 0, slot->bufsize); |
31 | *s = '\0'; | 59 | if (bufz) |
32 | return buf[bufidx]; | 60 | bufkeylen = bufz - slot->buf; |
61 | |||
62 | slot->match = bufkeylen == slot->keylen && | ||
63 | !memcmp(slot->key, slot->buf, bufkeylen + 1); | ||
64 | |||
65 | return 0; | ||
33 | } | 66 | } |
34 | 67 | ||
35 | int cache_exist(struct cacheitem *item) | 68 | /* Close the active cache slot */ |
69 | static void close_slot(struct cache_slot *slot) | ||
36 | { | 70 | { |
37 | if (stat(item->name, &item->st)) { | 71 | if (slot->cache_fd > 0) { |
38 | item->st.st_mtime = 0; | 72 | close(slot->cache_fd); |
39 | return 0; | 73 | slot->cache_fd = -1; |
40 | } | 74 | } |
41 | return 1; | ||
42 | } | 75 | } |
43 | 76 | ||
44 | int cache_create_dirs() | 77 | /* Print the content of the active cache slot (but skip the key). */ |
78 | static int print_slot(struct cache_slot *slot) | ||
45 | { | 79 | { |
46 | char *path; | 80 | ssize_t i, j = 0; |
81 | |||
82 | i = lseek(slot->cache_fd, slot->keylen + 1, SEEK_SET); | ||
83 | if (i != slot->keylen + 1) | ||
84 | return errno; | ||
47 | 85 | ||
48 | path = fmt("%s", ctx.cfg.cache_root); | 86 | while((i=read(slot->cache_fd, slot->buf, sizeof(slot->buf))) > 0) |
49 | if (mkdir(path, S_IRWXU) && errno!=EEXIST) | 87 | j = write(STDOUT_FILENO, slot->buf, i); |
88 | |||
89 | if (j < 0) | ||
90 | return errno; | ||
91 | else | ||
50 | return 0; | 92 | return 0; |
93 | } | ||
51 | 94 | ||
52 | if (!ctx.repo) | 95 | /* Check if the slot has expired */ |
96 | static int is_expired(struct cache_slot *slot) | ||
97 | { | ||
98 | if (slot->ttl < 0) | ||
53 | return 0; | 99 | return 0; |
100 | else | ||
101 | return slot->cache_st.st_mtime + slot->ttl*60 < time(NULL); | ||
102 | } | ||
54 | 103 | ||
55 | path = fmt("%s/%s", ctx.cfg.cache_root, | 104 | /* Check if the slot has been modified since we opened it. |
56 | cache_safe_filename(ctx.repo->url)); | 105 | * NB: If stat() fails, we pretend the file is modified. |
106 | */ | ||
107 | static int is_modified(struct cache_slot *slot) | ||
108 | { | ||
109 | struct stat st; | ||
57 | 110 | ||
58 | if (mkdir(path, S_IRWXU) && errno!=EEXIST) | 111 | if (stat(slot->cache_name, &st)) |
59 | return 0; | 112 | return 1; |
113 | return (st.st_ino != slot->cache_st.st_ino || | ||
114 | st.st_mtime != slot->cache_st.st_mtime || | ||
115 | st.st_size != slot->cache_st.st_size); | ||
116 | } | ||
60 | 117 | ||
61 | if (ctx.qry.page) { | 118 | /* Close an open lockfile */ |
62 | path = fmt("%s/%s/%s", ctx.cfg.cache_root, | 119 | static void close_lock(struct cache_slot *slot) |
63 | cache_safe_filename(ctx.repo->url), | 120 | { |
64 | ctx.qry.page); | 121 | if (slot->lock_fd > 0) { |
65 | if (mkdir(path, S_IRWXU) && errno!=EEXIST) | 122 | close(slot->lock_fd); |
66 | return 0; | 123 | slot->lock_fd = -1; |
67 | } | 124 | } |
68 | return 1; | ||
69 | } | 125 | } |
70 | 126 | ||
71 | int cache_refill_overdue(const char *lockfile) | 127 | /* Create a lockfile used to store the generated content for a cache |
128 | * slot, and write the slot key + \0 into it. | ||
129 | * Returns 0 on success and errno otherwise. | ||
130 | */ | ||
131 | static int lock_slot(struct cache_slot *slot) | ||
72 | { | 132 | { |
73 | struct stat st; | 133 | slot->lock_fd = open(slot->lock_name, O_RDWR|O_CREAT|O_EXCL, |
134 | S_IRUSR|S_IWUSR); | ||
135 | if (slot->lock_fd == -1) | ||
136 | return errno; | ||
137 | write(slot->lock_fd, slot->key, slot->keylen + 1); | ||
138 | return 0; | ||
139 | } | ||
74 | 140 | ||
75 | if (stat(lockfile, &st)) | 141 | /* Release the current lockfile. If `replace_old_slot` is set the |
76 | return 0; | 142 | * lockfile replaces the old cache slot, otherwise the lockfile is |
143 | * just deleted. | ||
144 | */ | ||
145 | static int unlock_slot(struct cache_slot *slot, int replace_old_slot) | ||
146 | { | ||
147 | int err; | ||
148 | |||
149 | if (replace_old_slot) | ||
150 | err = rename(slot->lock_name, slot->cache_name); | ||
77 | else | 151 | else |
78 | return (time(NULL) - st.st_mtime > ctx.cfg.cache_max_create_time); | 152 | err = unlink(slot->lock_name); |
153 | return err; | ||
79 | } | 154 | } |
80 | 155 | ||
81 | int cache_lock(struct cacheitem *item) | 156 | /* Generate the content for the current cache slot by redirecting |
157 | * stdout to the lock-fd and invoking the callback function | ||
158 | */ | ||
159 | static int fill_slot(struct cache_slot *slot) | ||
82 | { | 160 | { |
83 | int i = 0; | 161 | int tmp; |
84 | char *lockfile = xstrdup(fmt("%s.lock", item->name)); | ||
85 | 162 | ||
86 | top: | 163 | /* Preserve stdout */ |
87 | if (++i > ctx.cfg.max_lock_attempts) | 164 | tmp = dup(STDOUT_FILENO); |
88 | die("cache_lock: unable to lock %s: %s", | 165 | if (tmp == -1) |
89 | item->name, strerror(errno)); | 166 | return errno; |
90 | 167 | ||
91 | item->fd = open(lockfile, O_WRONLY|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR); | 168 | /* Redirect stdout to lockfile */ |
169 | if (dup2(slot->lock_fd, STDOUT_FILENO) == -1) | ||
170 | return errno; | ||
92 | 171 | ||
93 | if (item->fd == NOLOCK && errno == ENOENT && cache_create_dirs()) | 172 | /* Generate cache content */ |
94 | goto top; | 173 | slot->fn(slot->cbdata); |
95 | 174 | ||
96 | if (item->fd == NOLOCK && errno == EEXIST && | 175 | /* Restore stdout */ |
97 | cache_refill_overdue(lockfile) && !unlink(lockfile)) | 176 | if (dup2(tmp, STDOUT_FILENO) == -1) |
98 | goto top; | 177 | return errno; |
99 | 178 | ||
100 | free(lockfile); | 179 | /* Close the temporary filedescriptor */ |
101 | return (item->fd > 0); | 180 | close(tmp); |
181 | return 0; | ||
102 | } | 182 | } |
103 | 183 | ||
104 | int cache_unlock(struct cacheitem *item) | 184 | /* Crude implementation of 32-bit FNV-1 hash algorithm, |
185 | * see http://www.isthe.com/chongo/tech/comp/fnv/ for details | ||
186 | * about the magic numbers. | ||
187 | */ | ||
188 | #define FNV_OFFSET 0x811c9dc5 | ||
189 | #define FNV_PRIME 0x01000193 | ||
190 | |||
191 | unsigned long hash_str(const char *str) | ||
105 | { | 192 | { |
106 | close(item->fd); | 193 | unsigned long h = FNV_OFFSET; |
107 | return (rename(fmt("%s.lock", item->name), item->name) == 0); | 194 | unsigned char *s = (unsigned char *)str; |
195 | |||
196 | if (!s) | ||
197 | return h; | ||
198 | |||
199 | while(*s) { | ||
200 | h *= FNV_PRIME; | ||
201 | h ^= *s++; | ||
202 | } | ||
203 | return h; | ||
108 | } | 204 | } |
109 | 205 | ||
110 | int cache_cancel_lock(struct cacheitem *item) | 206 | static int process_slot(struct cache_slot *slot) |
111 | { | 207 | { |
112 | return (unlink(fmt("%s.lock", item->name)) == 0); | 208 | int err; |
209 | |||
210 | err = open_slot(slot); | ||
211 | if (!err && slot->match) { | ||
212 | if (is_expired(slot)) { | ||
213 | if (!lock_slot(slot)) { | ||
214 | /* If the cachefile has been replaced between | ||
215 | * `open_slot` and `lock_slot`, we'll just | ||
216 | * serve the stale content from the original | ||
217 | * cachefile. This way we avoid pruning the | ||
218 | * newly generated slot. The same code-path | ||
219 | * is chosen if fill_slot() fails for some | ||
220 | * reason. | ||
221 | * | ||
222 | * TODO? check if the new slot contains the | ||
223 | * same key as the old one, since we would | ||
224 | * prefer to serve the newest content. | ||
225 | * This will require us to open yet another | ||
226 | * file-descriptor and read and compare the | ||
227 | * key from the new file, so for now we're | ||
228 | * lazy and just ignore the new file. | ||
229 | */ | ||
230 | if (is_modified(slot) || fill_slot(slot)) { | ||
231 | unlock_slot(slot, 0); | ||
232 | close_lock(slot); | ||
233 | } else { | ||
234 | close_slot(slot); | ||
235 | unlock_slot(slot, 1); | ||
236 | slot->cache_fd = slot->lock_fd; | ||
237 | } | ||
238 | } | ||
239 | } | ||
240 | print_slot(slot); | ||
241 | close_slot(slot); | ||
242 | return 0; | ||
243 | } | ||
244 | |||
245 | /* If the cache slot does not exist (or its key doesn't match the | ||
246 | * current key), lets try to create a new cache slot for this | ||
247 | * request. If this fails (for whatever reason), lets just generate | ||
248 | * the content without caching it and fool the caller to belive | ||
249 | * everything worked out (but print a warning on stdout). | ||
250 | */ | ||
251 | |||
252 | close_slot(slot); | ||
253 | if ((err = lock_slot(slot)) != 0) { | ||
254 | cache_log("[cgit] Unable to lock slot %s: %s (%d)\n", | ||
255 | slot->lock_name, strerror(err), err); | ||
256 | slot->fn(slot->cbdata); | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | if ((err = fill_slot(slot)) != 0) { | ||
261 | cache_log("[cgit] Unable to fill slot %s: %s (%d)\n", | ||
262 | slot->lock_name, strerror(err), err); | ||
263 | unlock_slot(slot, 0); | ||
264 | close_lock(slot); | ||
265 | slot->fn(slot->cbdata); | ||
266 | return 0; | ||
267 | } | ||
268 | // We've got a valid cache slot in the lock file, which | ||
269 | // is about to replace the old cache slot. But if we | ||
270 | // release the lockfile and then try to open the new cache | ||
271 | // slot, we might get a race condition with a concurrent | ||
272 | // writer for the same cache slot (with a different key). | ||
273 | // Lets avoid such a race by just printing the content of | ||
274 | // the lock file. | ||
275 | slot->cache_fd = slot->lock_fd; | ||
276 | unlock_slot(slot, 1); | ||
277 | err = print_slot(slot); | ||
278 | close_slot(slot); | ||
279 | return err; | ||
113 | } | 280 | } |
114 | 281 | ||
115 | int cache_expired(struct cacheitem *item) | 282 | /* Print cached content to stdout, generate the content if necessary. */ |
283 | int cache_process(int size, const char *path, const char *key, int ttl, | ||
284 | cache_fill_fn fn, void *cbdata) | ||
116 | { | 285 | { |
117 | if (item->ttl < 0) | 286 | unsigned long hash; |
287 | int len, i; | ||
288 | char filename[1024]; | ||
289 | char lockname[1024 + 5]; /* 5 = ".lock" */ | ||
290 | struct cache_slot slot; | ||
291 | |||
292 | /* If the cache is disabled, just generate the content */ | ||
293 | if (size <= 0) { | ||
294 | fn(cbdata); | ||
118 | return 0; | 295 | return 0; |
119 | return item->st.st_mtime + item->ttl * 60 < time(NULL); | 296 | } |
297 | |||
298 | /* Verify input, calculate filenames */ | ||
299 | if (!path) { | ||
300 | cache_log("[cgit] Cache path not specified, caching is disabled\n"); | ||
301 | fn(cbdata); | ||
302 | return 0; | ||
303 | } | ||
304 | len = strlen(path); | ||
305 | if (len > sizeof(filename) - 10) { /* 10 = "/01234567\0" */ | ||
306 | cache_log("[cgit] Cache path too long, caching is disabled: %s\n", | ||
307 | path); | ||
308 | fn(cbdata); | ||
309 | return 0; | ||
310 | } | ||
311 | if (!key) | ||
312 | key = ""; | ||
313 | hash = hash_str(key) % size; | ||
314 | strcpy(filename, path); | ||
315 | if (filename[len - 1] != '/') | ||
316 | filename[len++] = '/'; | ||
317 | for(i = 0; i < 8; i++) { | ||
318 | sprintf(filename + len++, "%x", | ||
319 | (unsigned char)(hash & 0xf)); | ||
320 | hash >>= 4; | ||
321 | } | ||
322 | filename[len] = '\0'; | ||
323 | strcpy(lockname, filename); | ||
324 | strcpy(lockname + len, ".lock"); | ||
325 | slot.fn = fn; | ||
326 | slot.cbdata = cbdata; | ||
327 | slot.ttl = ttl; | ||
328 | slot.cache_name = filename; | ||
329 | slot.lock_name = lockname; | ||
330 | slot.key = key; | ||
331 | slot.keylen = strlen(key); | ||
332 | return process_slot(&slot); | ||
120 | } | 333 | } |
334 | |||
335 | /* Print a message to stdout */ | ||
336 | void cache_log(const char *format, ...) | ||
337 | { | ||
338 | va_list args; | ||
339 | va_start(args, format); | ||
340 | vfprintf(stderr, format, args); | ||
341 | va_end(args); | ||
342 | } | ||
343 | |||
@@ -6,18 +6,27 @@ | |||
6 | #ifndef CGIT_CACHE_H | 6 | #ifndef CGIT_CACHE_H |
7 | #define CGIT_CACHE_H | 7 | #define CGIT_CACHE_H |
8 | 8 | ||
9 | struct cacheitem { | 9 | typedef void (*cache_fill_fn)(void *cbdata); |
10 | char *name; | ||
11 | struct stat st; | ||
12 | int ttl; | ||
13 | int fd; | ||
14 | }; | ||
15 | 10 | ||
16 | extern char *cache_safe_filename(const char *unsafe); | 11 | |
17 | extern int cache_lock(struct cacheitem *item); | 12 | /* Print cached content to stdout, generate the content if necessary. |
18 | extern int cache_unlock(struct cacheitem *item); | 13 | * |
19 | extern int cache_cancel_lock(struct cacheitem *item); | 14 | * Parameters |
20 | extern int cache_exist(struct cacheitem *item); | 15 | * size max number of cache files |
21 | extern int cache_expired(struct cacheitem *item); | 16 | * path directory used to store cache files |
17 | * key the key used to lookup cache files | ||
18 | * ttl max cache time in seconds for this key | ||
19 | * fn content generator function for this key | ||
20 | * cbdata user-supplied data to the content generator function | ||
21 | * | ||
22 | * Return value | ||
23 | * 0 indicates success, everyting else is an error | ||
24 | */ | ||
25 | extern int cache_process(int size, const char *path, const char *key, int ttl, | ||
26 | cache_fill_fn fn, void *cbdata); | ||
27 | |||
28 | |||
29 | /* Print a message to stdout */ | ||
30 | extern void cache_log(const char *format, ...); | ||
22 | 31 | ||
23 | #endif /* CGIT_CACHE_H */ | 32 | #endif /* CGIT_CACHE_H */ |
@@ -45,6 +45,8 @@ void config_cb(const char *name, const char *value) | |||
45 | ctx.cfg.enable_log_filecount = atoi(value); | 45 | ctx.cfg.enable_log_filecount = atoi(value); |
46 | else if (!strcmp(name, "enable-log-linecount")) | 46 | else if (!strcmp(name, "enable-log-linecount")) |
47 | ctx.cfg.enable_log_linecount = atoi(value); | 47 | ctx.cfg.enable_log_linecount = atoi(value); |
48 | else if (!strcmp(name, "cache-size")) | ||
49 | ctx.cfg.cache_size = atoi(value); | ||
48 | else if (!strcmp(name, "cache-root")) | 50 | else if (!strcmp(name, "cache-root")) |
49 | ctx.cfg.cache_root = xstrdup(value); | 51 | ctx.cfg.cache_root = xstrdup(value); |
50 | else if (!strcmp(name, "cache-root-ttl")) | 52 | else if (!strcmp(name, "cache-root-ttl")) |
@@ -143,6 +145,8 @@ static void prepare_context(struct cgit_context *ctx) | |||
143 | { | 145 | { |
144 | memset(ctx, 0, sizeof(ctx)); | 146 | memset(ctx, 0, sizeof(ctx)); |
145 | ctx->cfg.agefile = "info/web/last-modified"; | 147 | ctx->cfg.agefile = "info/web/last-modified"; |
148 | ctx->cfg.nocache = 0; | ||
149 | ctx->cfg.cache_size = 0; | ||
146 | ctx->cfg.cache_dynamic_ttl = 5; | 150 | ctx->cfg.cache_dynamic_ttl = 5; |
147 | ctx->cfg.cache_max_create_time = 5; | 151 | ctx->cfg.cache_max_create_time = 5; |
148 | ctx->cfg.cache_repo_ttl = 5; | 152 | ctx->cfg.cache_repo_ttl = 5; |
@@ -163,47 +167,8 @@ static void prepare_context(struct cgit_context *ctx) | |||
163 | ctx->page.mimetype = "text/html"; | 167 | ctx->page.mimetype = "text/html"; |
164 | ctx->page.charset = PAGE_ENCODING; | 168 | ctx->page.charset = PAGE_ENCODING; |
165 | ctx->page.filename = NULL; | 169 | ctx->page.filename = NULL; |
166 | } | 170 | ctx->page.modified = time(NULL); |
167 | 171 | ctx->page.expires = ctx->page.modified; | |
168 | static int cgit_prepare_cache(struct cacheitem *item) | ||
169 | { | ||
170 | if (!ctx.repo && ctx.qry.repo) { | ||
171 | ctx.page.title = fmt("%s - %s", ctx.cfg.root_title, | ||
172 | "Bad request"); | ||
173 | cgit_print_http_headers(&ctx); | ||
174 | cgit_print_docstart(&ctx); | ||
175 | cgit_print_pageheader(&ctx); | ||
176 | cgit_print_error(fmt("Unknown repo: %s", ctx.qry.repo)); | ||
177 | cgit_print_docend(); | ||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | if (!ctx.repo) { | ||
182 | item->name = xstrdup(fmt("%s/index.%s.html", | ||
183 |