Debugging inappropriately cached content: take one.

From: Bert Driehuis <bert_driehuis@dont-contact.us>
Date: Mon, 03 Apr 2000 21:55:44 +0200

I've butchered together a prototype cache_manager interface to show
information about a cached URL (against 2.2stable5-hno.20000113). It can
be used like:

        client cache_object://localhost//objectinfo=http://www.foo.com/bar.gif

It will give output like

----------------------------------------------------------------------
Cache information for http://www.foo.com/bar.gif:
KEY 45F3421D19D57890BE4DA4489C4BF0B7
        STORE_OK NOT_IN_MEMORY SWAPOUT_DONE PING_NONE
        CACHABLE,DISPATCHED,VALIDATED
        LV:954776607 LU:954783918 LM:941721829 EX:-1
        0 locks, 0 clients, 7 refs
        Swap File 0X000016

Now : Mon, 03 Apr 2000 19:37:25 GMT
Validated at : Mon, 03 Apr 2000 15:43:27 GMT (14038)
Last used at : Mon, 03 Apr 2000 17:45:18 GMT (6727)
Last modified at: Thu, 04 Nov 1999 13:23:49 GMT (13068816)
Expires at : <none given>

In an ideal world, the rules we'd check would be:
using refresh_pattern '. 0 20% 43200'
        (note: min and max are show here in SECONDS)
The objects age is 14038 (at Mon, 03 Apr 2000 19pr 2000 19:37:25 GMT)
It was last validated at Mon, 03 Apr 2000 15:43:27 GMT
(If the user agent specified Max-age, it would be checked here)
The object is not too old: age <= max
Object has a validator, no min age check done
Refresh factor: 0.107532%
Object is fresh, 0.11% < 20.00%
----------------------------------------------------------------------

I don't know if I'm the only one, but it really helps me visualize
what's going on inside the beast. Let's face it, the docs explain it all
but it requires somewhat of a recursive mind to grasp it all.

I'm not sure it's appropriate to stick it in though. Almost the same
information can be gotten out of the cache log with debug=22,3.

On a technical level, I couldn't find a clean way to pass parameters to
the cache_object interface. The code I added works, but it is not very
clean (to put it mildly).

Oh, and finally, the refcount field in the StoreEntry had me on the
wrong foot (either that, or I miss completely what it does). Am I right
in assuming it just counts how often the object has been downloaded or
verified?

If there is interest, I'll tackle the cachemgr.cgi side and clean it up.

I'm not offended if it doesn't make it in. It was a fun exercise, but
the result is a bit ugly. :-)

Cheers,

                                -- Bert

-- 
Bert Driehuis, MIS -- bert_driehuis@nl.compuware.com -- +31-20-3116119
Every nonzero finite dimensional inner product space has an
orthonormal basis. It makes sense, when you don't think about it.

diff -rc2 ../../squid-hno/src/cache_manager.c ./cache_manager.c
*** ../../squid-hno/src/cache_manager.c Tue Jan 19 03:24:21 1999
--- ./cache_manager.c Mon Apr 3 13:00:45 2000
***************
*** 92,97 ****
  {
      action_table *a;
      for (a = ActionTable; a != NULL; a = a->next) {
! if (0 == strcmp(a->action, action))
              return a;
      }
--- 92,103 ----
  {
      action_table *a;
+ int len;
+ char *cp;
+ if ((cp = strchr(action, '=')) != NULL)
+ len = cp - action;
+ else
+ len = strlen(action);
      for (a = ActionTable; a != NULL; a = a->next) {
! if (0 == strncmp(a->action, action, len))
              return a;
      }
diff -rc2 ../../squid-hno/src/protos.h ./protos.h
*** ../../squid-hno/src/protos.h Mon Jan 10 19:56:42 2000
--- ./protos.h Mon Apr 3 21:11:06 2000
***************
*** 685,688 ****
--- 685,689 ----
  extern time_t getMaxAge(const char *url);
  extern void refreshInit(void);
+ extern void statDebugRefreshCheck(StoreEntry *, const StoreEntry *, const char *);
  
  extern void serverConnectionsClose(void);
diff -rc2 ../../squid-hno/src/refresh.c ./refresh.c
*** ../../squid-hno/src/refresh.c Wed Jan 20 00:18:01 1999
--- ./refresh.c Mon Apr 3 21:24:30 2000
***************
*** 395,396 ****
--- 395,520 ----
          1);
  }
+
+ void
+ statDebugRefreshCheck(StoreEntry *s, const StoreEntry * entry,
+ const char *uri)
+ {
+ const refresh_t *R;
+ time_t min = REFRESH_DEFAULT_MIN;
+ double pct = REFRESH_DEFAULT_PCT;
+ time_t max = REFRESH_DEFAULT_MAX;
+ #if HTTP_VIOLATIONS
+ int override_expire = 0;
+ int override_lastmod = 0;
+ int reload_into_ims = 0;
+ int ignore_reload = 0;
+ #endif
+ const char *pattern = "<none>";
+ time_t age;
+ double factor;
+ time_t check_time = squid_curtime;
+ storeAppendPrintf(s, "\nIn an ideal world, the rules we'd check would be:\n");
+ if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE)) {
+ storeAppendPrintf(s, "always: authentication required\n");
+ return;
+ }
+ if ((R = uri ? refreshLimits(uri) : refreshUncompiledPattern("."))) {
+ min = R->min;
+ pct = R->pct;
+ max = R->max;
+ pattern = R->pattern;
+ #if HTTP_VIOLATIONS
+ override_expire = R->flags.override_expire;
+ override_lastmod = R->flags.override_lastmod;
+ reload_into_ims = R->flags.reload_into_ims;
+ ignore_reload = R->flags.ignore_reload;
+ #endif
+ }
+ #if HTTP_VIOLATIONS
+ if (!reload_into_ims)
+ reload_into_ims = Config.onoff.reload_into_ims;
+ #endif
+ storeAppendPrintf(s, "using refresh_pattern '%s %d %d%% %d'\n",
+ pattern, (int) min, (int) (100.0 * pct), (int) max);
+ storeAppendPrintf(s, "\t(note: min and max are show here in SECONDS)\n");
+ age = check_time - entry->timestamp;
+ storeAppendPrintf(s, "The objects age is %d (at %s)\n", (int) age,
+ mkrfc1123(check_time));
+ storeAppendPrintf(s, "It was last validated at %s\n",
+ mkrfc1123(entry->timestamp));
+ #if HTTP_VIOLATIONS
+ if (ignore_reload)
+ storeAppendPrintf(s, "User specified reload would be IGNORED!\n");
+ if (override_expire)
+ storeAppendPrintf(s, "Web server specified expire would be IGNORED!\n");
+ if (override_lastmod)
+ storeAppendPrintf(s,
+ "Web server specified modification time would be IGNORED!\n");
+ if (reload_into_ims)
+ storeAppendPrintf(s,
+ "User specified reload would be turned into IMS!\n");
+ #endif
+ if (age < 0) {
+ storeAppendPrintf(s,
+ "Age is negative, so the object is treated as stale\n");
+ return;
+ }
+ storeAppendPrintf(s,
+ "(If the user agent specified Max-age, it would be checked here)\n");
+ #if HTTP_VIOLATIONS
+ if (override_expire) {
+ if (age <= min) {
+ storeAppendPrintf(s,
+ "(The object is fresh because override expire is on)\n");
+ return;
+ }
+ storeAppendPrintf(s, "age <= min, no override-expire check done\n");
+ }
+ #endif
+ if (entry->expires > -1) {
+ if (entry->expires <= check_time) {
+ storeAppendPrintf(s, "The object is stale, it has expired\n");
+ return;
+ } else {
+ storeAppendPrintf(s, "The object is fresh: explicit expiration\n");
+ return;
+ }
+ }
+ if (age > max) {
+ storeAppendPrintf(s, "The object is stale, age > max\n");
+ return;
+ }
+ storeAppendPrintf(s, "The object is not too old: age <= max\n");
+ #if HTTP_VIOLATIONS
+ if (override_lastmod > -1 && entry->timestamp == entry->lastmod) {
+ storeAppendPrintf(s,
+ "The object is fresh, age < min, override_lastmod\n");
+ return;
+ }
+ #endif
+ if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) {
+ factor = (double) age / (double) (entry->timestamp - entry->lastmod);
+ storeAppendPrintf(s, "Object has a validator, no min age check done\n");
+ storeAppendPrintf(s, "Refresh factor: %f%%\n", factor * 100.0);
+ if (factor < pct) {
+ storeAppendPrintf(s, "Object is fresh, %.2f%% < %.2f%%\n",
+ factor * 100.0, pct * 100.0);
+ return;
+ } else {
+ storeAppendPrintf(s, "Object is stale, %.2f%% >= %.2f%%\n",
+ factor * 100.0, pct * 100.0);
+ return;
+ }
+ } else if (entry->lastmod > -1 && entry->timestamp == entry->lastmod) {
+ storeAppendPrintf(s, "Object is stale: last-modified 'now'\n");
+ return;
+ } else if (entry->lastmod > -1 && entry->timestamp < entry->lastmod) {
+ storeAppendPrintf(s,
+ "We ignore a last-modified date in the future\n");
+ }
+ if (age <= min)
+ storeAppendPrintf(s, "The object is fresh, age < min\n");
+ else
+ storeAppendPrintf(s,
+ "The object is stale (none of the rules matched)\n");
+ }
diff -rc2 ../../squid-hno/src/stat.c ./stat.c
*** ../../squid-hno/src/stat.c Mon Jan 10 19:56:42 2000
--- ./stat.c Mon Apr 3 21:21:30 2000
***************
*** 64,67 ****
--- 64,68 ----
  static OBJH stat_objects_get;
  static OBJH stat_vmobjects_get;
+ static OBJH stat_objectinfo_get;
  #if DEBUG_OPENFD
  static OBJH statOpenfdObj;
***************
*** 370,373 ****
--- 371,417 ----
  }
  
+ static const char *
+ fmt_time(time_t t)
+ {
+ static char buf[80];
+ if (t == (time_t)-1)
+ return "<none given>";
+ if (t == squid_curtime)
+ return mkrfc1123(t);
+ snprintf(buf, 79, "%s (%d)", mkrfc1123(t), (squid_curtime - t));
+ return buf;
+ }
+
+ static void
+ stat_objectinfo_get(StoreEntry * s)
+ {
+ StoreEntry *f;
+ char *uri;
+ MemObject *mem = s->mem_obj;
+ if (mem == NULL || mem->log_url == NULL) {
+ storeAppendPrintf(s, "stat_objectinfo_get: can't get at my url\n");
+ return;
+ }
+ uri = strchr(mem->log_url, '=');
+ if (!uri) {
+ storeAppendPrintf(s, "stat_objectinfo_get: no uri\n");
+ return;
+ }
+ uri++;
+ storeAppendPrintf(s, "Cache information for %s:\n", uri);
+ f = storeGetPublic(uri, METHOD_GET);
+ if (f == NULL) {
+ storeAppendPrintf(s, "object not in cache\n");
+ return;
+ }
+ statStoreEntry(s, f);
+ storeAppendPrintf(s, "Now : %s\n", fmt_time(squid_curtime));
+ storeAppendPrintf(s, "Validated at : %s\n", fmt_time(f->timestamp));
+ storeAppendPrintf(s, "Last used at : %s\n", fmt_time(f->lastref));
+ storeAppendPrintf(s, "Last modified at: %s\n", fmt_time(f->lastmod));
+ storeAppendPrintf(s, "Expires at : %s\n", fmt_time(f->expires));
+ statDebugRefreshCheck(s, f, uri);
+ }
+
  static int
  statLargeVmObjectsFilter(const StoreEntry * e)
***************
*** 865,868 ****
--- 909,915 ----
          "In-Memory and In-Transit Objects",
          stat_vmobjects_get, 0, 0);
+ cachemgrRegister("objectinfo",
+ "Information about a specific URL's cache status",
+ stat_objectinfo_get, 0, 1);
      cachemgrRegister("vm_largeobjects",
          "Large In-Memory and In-Transit Objects",
Received on Mon Apr 03 2000 - 13:56:06 MDT

This archive was generated by hypermail pre-2.1.9 : Tue Dec 09 2003 - 16:12:22 MST