Extracting extended attributes from Apple Double format

When files from native MacOSX filesystems (like HFS+) are copied to some storage that doesn’t support extended attributes (xattrs) natively, those attributes are not lost, instead they are placed in special files with a “._” prefix. For archives these paths may also contain “__MACOSX/” directory prefix.
These files have an ancient format called AppleDouble, which once was well documented but sadly lacks any support from current-gen Apple’s APIs.
Sometimes it’s necessary to work with such format, in my case – with compressed files inside zip archives. Ignoring separately-stored extended attributes may cause unwished consequences, mostly related with a user experience (invalid encodings, lost Finder’s labels etc) and is generally bad.
Here below is a code snippet which parses “._” file’s content and extracts a list of extended attributes with it’s data. This data may be passed to setxattr(..) function or be interpreted somehow. Some structures layout and functions are taken from Apple’s copyfile.c source.

Header (AppleDouble.h):
#pragma once
#include <stdint.h>
struct AppleDoubleEA
{
    // no allocations, only pointing at original memory buffer
    const void* data;
    const char* name; // null-terminated UTF-8 string
    uint32_t    data_sz;
   uint32_t    name_len; // length excluding zero-terminator. no zero-length names are allowed
};

/**
 * ExtractEAFromAppleDouble interprets a memory block of EAs packed into AppleDouble file, usually for archives.
 * Returns NULL or an array of AppleDoubleEA (number of _ea_count) allocated with malloc.
 * Caller is responsible for deallocating this memory.
 */
AppleDoubleEA *ExtractEAFromAppleDouble(const void *_memory_buf,
                                        size_t      _memory_size,
                                        size_t     *_ea_count
                                        );

Source file (AppleDouble.cpp):
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/xattr.h>
#include <libkern/OSByteOrder.h>
#include “AppleDouble.h”

#define ADH_MAGIC     0x00051607
#define ADH_VERSION   0x00020000
#define ADH_MACOSX    “Mac OS X        “
#define AD_DATA          1   /* Data fork */
#define AD_RESOURCE      2   /* Resource fork */
#define AD_REALNAME      3   /* File’s name on home file system */
#define AD_COMMENT       4   /* Standard Mac comment */
#define AD_ICONBW        5   /* Mac black & white icon */
#define AD_ICONCOLOR     6   /* Mac color icon */
#define AD_UNUSED        7   /* Not used */
#define AD_FILEDATES     8   /* File dates; create, modify, etc */
#define AD_FINDERINFO    9   /* Mac Finder info & extended info */
#define AD_MACINFO      10   /* Mac file info, attributes, etc */
#define AD_PRODOSINFO   11   /* Pro-DOS file info, attrib., etc */
#define AD_MSDOSINFO    12   /* MS-DOS file info, attributes, etc */
#define AD_AFPNAME      13   /* Short name on AFP server */
#define AD_AFPINFO      14   /* AFP file info, attrib., etc */
#define AD_AFPDIRID     15   /* AFP directory ID */
#define AD_ATTRIBUTES   AD_FINDERINFO
#define ATTR_HDR_MAGIC     0x41545452   /* ‘ATTR’ */
#define FINDERINFOSIZE 32

#pragma pack(1)
typedef struct apple_double_entry
{
u_int32_t   type;     /* entry type: see list, 0 invalid */
u_int32_t   offset;   /* entry data offset from the beginning of the file. */
u_int32_t   length;   /* entry data length in bytes. */
} apple_double_entry_t;

/* Entries are aligned on 4 byte boundaries */
typedef struct attr_entry
{
u_int32_t   offset;    /* file offset to data */
u_int32_t   length;    /* size of attribute data */
u_int16_t   flags;
u_int8_t    namelen;   /* length of name including NULL termination char */
u_int8_t    name[1];   /* NULL-terminated UTF-8 name (up to 128 bytes max) */
} attr_entry_t;

typedef struct apple_double_header
{
u_int32_t   magic;         /* == ADH_MAGIC */
u_int32_t   version;       /* format version: 2 = 0x00020000 */
u_int32_t   filler[4];
u_int16_t   numEntries;   /* number of entries which follow */
apple_double_entry_t   entries[2];  /* ‘finfo’ & ‘rsrc’ always exist */
u_int8_t    finfo[FINDERINFOSIZE];  /* Must start with Finder Info (32 bytes) */
u_int8_t    pad[2];        /* get better alignment inside attr_header */
} apple_double_header_t;

/* Header + entries must fit into 64K <– guess not true since 10.7 .MK. */
typedef struct attr_header
{
apple_double_header_t  appledouble;
u_int32_t   magic;        /* == ATTR_HDR_MAGIC */
u_int32_t   debug_tag;    /* for debugging == file id of owning file */
u_int32_t   total_size;   /* total size of attribute header + entries + data */
u_int32_t   data_start;   /* file offset to attribute data area */
u_int32_t   data_length;  /* length of attribute data area */
u_int32_t   reserved[3];
u_int16_t   flags;
u_int16_t   num_attrs;
} attr_header_t;
#pragma pack()

#define SWAP16(x) OSSwapBigToHostInt16(x)
#define SWAP32(x) OSSwapBigToHostInt32(x)
#define SWAP64(x) OSSwapBigToHostInt64(x)
#define ATTR_ALIGN 3L  /* Use four-byte alignment */
#define ATTR_DATA_ALIGN 1L  /* Use two-byte alignment */
#define ATTR_ENTRY_LENGTH(namelen)  
((sizeof(attr_entry_t) – 1 + (namelen) + ATTR_ALIGN) & (~ATTR_ALIGN))
#define ATTR_NEXT(ae)  
(attr_entry_t *)((u_int8_t *)(ae) + ATTR_ENTRY_LENGTH((ae)->namelen))

static const u_int32_t emptyfinfo[8] = {0};

/*
 * Endian swap Apple Double header
 */
static void
swap_adhdr(apple_double_header_t *adh)
{
   int count;
   int i;
    
   count = (adh->magic == ADH_MAGIC) ? adh->numEntries : SWAP16(adh->numEntries);
    
   adh->magic      = SWAP32 (adh->magic);
   adh->version    = SWAP32 (adh->version);
   adh->numEntries = SWAP16 (adh->numEntries);
    
   for (i = 0; i < count; i++)
   {
      adh->entries[i].type   = SWAP32 (adh->entries[i].type);
      adh->entries[i].offset = SWAP32 (adh->entries[i].offset);
      adh->entries[i].length = SWAP32 (adh->entries[i].length);
   }
}

/*
 * Endian swap extended attributes header
 */
static void
swap_attrhdr(attr_header_t *ah)
{
   ah->magic       = SWAP32 (ah->magic);
   ah->debug_tag   = SWAP32 (ah->debug_tag);
   ah->total_size  = SWAP32 (ah->total_size);
   ah->data_start  = SWAP32 (ah->data_start);
   ah->data_length = SWAP32 (ah->data_length);
   ah->flags       = SWAP16 (ah->flags);
   ah->num_attrs   = SWAP16 (ah->num_attrs);
}

static bool IsAppleDouble(const void *_memory_buf, size_t _memory_size)
{
    const apple_double_header_t *adhdr = (const apple_double_header_t *)_memory_buf;
    if(_memory_size < sizeof(apple_double_header_t) – 2 ||
       SWAP32(adhdr->magic) != ADH_MAGIC ||
       SWAP32(adhdr->version) != ADH_VERSION ||
       SWAP16(adhdr->numEntries) != 2 ||
       SWAP32(adhdr->entries[0].type) != AD_FINDERINFO
       )
        return false;
    
    return true;
}

AppleDoubleEA *ExtractEAFromAppleDouble(const void *_memory_buf,
                                        size_t      _memory_size,
                                        size_t     *_ea_count
                                        )
{
    if(!_memory_buf || !_memory_size || !_ea_count)
        return 0;
    
    if(!IsAppleDouble(_memory_buf, _memory_size))
        return 0;
    
    apple_double_header_t adhdr = *(const apple_double_header_t *) _memory_buf;
    swap_adhdr(&adhdr);
    
    bool has_finfo = memcmp(adhdr.finfo, emptyfinfo, sizeof(emptyfinfo)) != 0;
    
    AppleDoubleEA *eas = 0;
    int eas_last = 0;
    
    if(adhdr.entries[0].length > FINDERINFOSIZE)
    {
        attr_header_t attrhdr = *(const attr_header_t *)_memory_buf;
        swap_attrhdr(&attrhdr);
        
        if (attrhdr.magic == ATTR_HDR_MAGIC)
        {
            int count = attrhdr.num_attrs;
            eas = (AppleDoubleEA*) malloc( sizeof(AppleDoubleEA) * (has_finfo ? count + 1 : count) );
            
            const attr_entry_t *entry = (const attr_entry_t *)((char*)_memory_buf + sizeof(attr_header_t));
            for (int i = 0; i < count; i++)
            {
                if((char*)entry + sizeof(attr_entry_t) > (char*)_memory_buf + _memory_size)
                    break; // out-of-boundary guard to be safe about memory (not)corrupting
                
                u_int32_t offset = SWAP32(entry->offset);
                u_int32_t length = SWAP32(entry->length);
                u_int32_t namelen = 0;
                const char *name = (const char*)&entry->name[0];
                
                // safely calculate a name len
                for(const char *si = name; si < (char*)_memory_buf + _memory_size && (*si) != 0; ++si, ++namelen)
                    ;
                
                
                if(namelen > 0 &&
                   name + namelen < (char*)_memory_buf + _memory_size &&
                   name[namelen] == 0 &&
                   offset + length <= _memory_size)
                { // seems to be a valid EA
                    eas[eas_last].data = (char*)_memory_buf + offset;
                    eas[eas_last].data_sz = length;
                    eas[eas_last].name = name;
                    eas[eas_last].name_len = namelen;
                    ++eas_last;
                }
                entry = ATTR_NEXT(entry);
            }
        }
    }
    
    if(has_finfo)
    {
        if(!eas) // no extended attributes except FinderInfo was found
            eas = (AppleDoubleEA*) malloc( sizeof(AppleDoubleEA) );
        eas[eas_last].data = &((const apple_double_header_t *)_memory_buf)->finfo[0];
        eas[eas_last].data_sz = 32;
        eas[eas_last].name = XATTR_FINDERINFO_NAME; // “com.apple.FinderInfo”
        eas[eas_last].name_len = 20;
        ++eas_last;
    }
    
    *_ea_count = eas_last;
    
    return eas;
}

A “million files” test

Writing a file manager is definitely a quite special kind of fun. Despite a seeming simplicity there’s a lot of details that should be considered when implementing it. Using efficient structures, algorithms and architecture can mean much, regardless that all remains under the hood and is not visible to user. Since the main purpose of a file management app is navigation between folders and showing their’s content, I’ve managed to perform some tests to show how different implementations can handle this (quite simple?) task. At the moment my collection of Mac OS X file managers was 11 different ones (all this software can be easily found in Google, also I don’t claim I’ve tested all file managers for Mac OS X – there’re others).
OK, to be precise – this is a stress test. By stress I mean not a usual stress, but STRESS.
The job is very simple: reading and showing a content of a directory with 1,000,000 files. Nothing more, just it. I’ve taken my old 8Gb USB stick, plugged it into my Macbook Pro running OS X Mavericks and formatted it into HFS+ journaled, also turned off Spotlight on this volume. Then run a tiny app, which I called fs_killer 🙂
int main(int argc, const char * argv[]) {
    char tmp[MAXPATHLEN];
    for(int i = 0; i < 1000000; ++i) {
        sprintf(tmp, “/Volumes/test/%6.6d.txt”, i);
        close(open(tmp, O_WRONLY|O_CREAT, S_IRUSR|S_IWUSR|S_IRGRP));
    }
    return 0;
}

Here’s the test itself: run an app, check if it will be able to open this folder, record how much memory it has consumed and check if app is still usable (cursor movements and scrolling ability). Every app was given a plenty of time to load the directory listing, and this parameter wasn’t counted. The comparison table in alphabetical order is below:

Results
Application name Opened Memory Usable
DCommander yes ~1.7Gb yes
FastCommander yes ~1.1Gb yes
Nimble Commander yes 150Mb yes
Finder
yes
~2Gb yes
ForkLift yes ~1.7Gb yes
Macintosh Explorer yes ~1Gb yes
Midnight Commander yes 133Mb yes
Moroshka File Manager yes 160Mb no
Mover yes ~2Gb no
muCommander no n/a n/a
ZCommander yes 850Mb yes
A few words for conclusion. Two things can be clearly seen:
1) Midnight Commander (mc) is the winner (and the only to run in console) and muCommander is very bad – it was the only one to fail the opening test.
2) These file managers can be divided into 2 groups by memory consumption: less than roughly 200Mb and above it. I suppose this difference to be consequence of an internal data storage structure – while the first group relies on plain C / C++ memory management, the others use Objective C / Cocoa infrastructure to handle directory listing data, which is considerable less efficient in this aspect.
As the bottom line I can only give a link to Nimble Commander, it’s free now: http://magnumbytes.com/

File management and file system stuff

One can ask a reasonable question – why on earth do we need another file management app if we have Finder for example?
Funny, but there are many not-so-obvious motives, I’ll tell about one of them below.Today’s reasoning is the file fragmentation.
Apple’s old HFS+ is, of course, bad and ugly (it was ugly in classic Mac OS, and is still ugly in Mac OS X), but it supports some of the modern concepts such as extents.
Some reading can be found in Wikipedia:
http://en.wikipedia.org/wiki/HFS+
http://en.wikipedia.org/wiki/Extent_(file_systems)
http://en.wikipedia.org/wiki/File_fragmentation
In the ideal situation, a file can be described in file allocation table (any variant of it) with a single record, like position and length in cluster terms. It’s good for the disk (especially for spinning drives), but it is also good for all file system logic, which is involved in file operations. The more extents are used for file allocation – the more logic overhead is later needed to operate with that file.

Now it’s time for an experiment: copy a big file (OS X 10.9 distro) with Finder under OS X 10.8.4. My hard drive has a plenty of free space, by the way.
After it’s done just use a fancy tool fileXray to see what’s happened on file system layout level:

mbp:~ migun$ sudo su
Password:
sh-3.2# fileXray /Users/migun/!/OS X 10.9.dmg 
  path                 = Macintosh HD:/Users/migun/!/OS X 10.9.dmg
# Catalog File Thread Record
# Record 0 in node 48278 beginning at 512-byte sector 0x227170
  parentID             = 20404714
  nodeName             = OS X 10.9.dmg
# Catalog File Record
# Record 12 in node 62013 beginning at 512-byte sector 0xc77170
  type                 = file
  file ID              = 20405288
  flags                = 0000000010000010
                       . File has a thread record in the catalog.
                       . File has date-added stored in Finder Info.
  reserved1            = 0
  createDate           = Sun Jun 16 13:37:35 2013
  contentModDate       = Sun Jun 16 14:12:37 2013
  attributeModDate     = Tue Aug 20 10:03:55 2013
  accessDate           = Tue Aug 20 10:01:09 2013
  backupDate           = 0
  # BSD Info
  ownerID              = 501 (migun)
  groupID              = 20 (staff)
  adminFlags           = 00000000
  ownerFlags           = 00000000
  fileMode             = -rw-r–r– 
  linkCount            = 1
  textEncoding         = 0
  reserved2            = 0
  # Finder Info
  fdType               = 0
  fdCreator            = 0
  fdFlags              = 0000000000000000
  fdLocation           = (v = 0, h = 0)
  opaque               = 0
  # Extended Finder Info
  reserved1            = 0
  date_added           = Tue Aug 20 10:01:09 2013
  extended_flags       = 0000000000000000
  reserved2            = 0
  reserved3            = 0
  # Data Fork
  logicalSize          = 4965827853 bytes (5.0 GB)
  totalBlocks          = 1212361
  fork temperature     = no record in Hot File B-Tree
  clumpSize            = 0
  extents              =   startBlock   blockCount      % of file

                            0x1a71af4        0x800         0.17 %
                            0x1c58c9f       0x1000         0.34 %
                            0x1c5c079       0x2000         0.68 %
                            0x1c5fe93       0x2000         0.68 %
                            0x1c64311       0x2000         0.68 %
                            0x1c918e8       0x2000         0.68 %
                            0x1c93b05       0x4000         1.35 %
                            0x1c9d8b8       0x2000         0.68 %

                            0x1ca5787       0x2000         0.68 %
                            0x1ca8689       0x4000         1.35 %
                            0x1cac6b2       0x2000         0.68 %
                            0x1d2efd2       0x2000         0.68 %
                            0x1ecdcaf       0x2000         0.68 %
                            0x215ce83       0x2000         0.68 %
                            0x215efab       0x4000         1.35 %
                            0x216325f       0x2000         0.68 %

                            0x2165512       0x2000         0.68 %
                            0x2167809       0x2000         0.68 %
                            0x2171a13       0x2000         0.68 %
                            0x2173a29       0x2000         0.68 %
                            0x2175ca9       0x2000         0.68 %
                            0x2179a62       0x2000         0.68 %
                            0x21c3fb2       0x2000         0.68 %
                            0x21c6cc1       0x2000         0.68 %

                            0x21c8cd1       0x2000         0.68 %
                            0x21cb1d4       0x2000         0.68 %
                            0x21d8aa0       0x2000         0.68 %
                            0x21dcff3       0x2000         0.68 %
                            0x21e936d       0x2000         0.68 %
                            0x21eb36e       0x2000         0.68 %
                            0x21ed371       0x2000         0.68 %
                            0x21ef7dc       0x2000         0.68 %

                            0x21f17df       0x6000         2.03 %
                            0x21f77e2       0x6000         2.03 %
                            0x21fd90a       0x2000         0.68 %
                            0x221fa99       0xe000         4.73 %
                            0x2237d36       0xc000         4.05 %
                            0x22884a4       0x4000         1.35 %
                            0x228c4a6       0x2000         0.68 %
                            0x229ccfb       0x2000         0.68 %

                            0x23e14b7       0x2000         0.68 %
                            0x23e35f4       0xe000         4.73 %
                            0x23f2ff7       0x2000         0.68 %
                            0x23f4ff9       0x2000         0.68 %
                            0x23f7126       0x2000         0.68 %
                            0x23f9127       0x2000         0.68 %
                            0x240e765       0x8000         2.70 %
                            0x242b007       0x2000         0.68 %

                            0x242d00a       0x6000         2.03 %
                            0x245c6b2       0x8000         2.70 %
                            0x24d0b58       0x2000         0.68 %
                            0x24d2e0b       0x8000         2.70 %
                            0x24db22b       0xc000         4.05 %
                            0x24e722c      0x10000         5.41 %
                            0x24f722d       0xa000         3.38 %
                            0x250122e      0x10000         5.41 %

                            0x25175f2       0x4000         1.35 %
                            0x251b5f3       0x2000         0.68 %
                            0x251d740       0x4000         1.35 %
                            0x2521747       0x6000         2.03 %
                            0x252774e       0x2000         0.68 %
                            0x2529997       0xa000         3.38 %
                            0x2533c49       0x2000         0.68 %
                            0x2535efc       0x4000         1.35 %

                            0x2539f00       0x4000         1.35 %
                            0x253e1b2       0x2000         0.68 %
                            0x254046c       0x6000         2.03 %
                            0x2546763       0x2000         0.68 %
                            0x25734b5       0x6000         2.03 %
                            0x25794b8       0x4000         1.35 %
                            0x257d5e2       0x4000         1.35 %
                            0x25815e5       0x6000         2.03 %

                            0x25875e7        0x7c9         0.16 %

                         1212361 allocation blocks in 73 extents total.
                         16607.68 allocation blocks per extent on an average.
  # Resource Fork
  logicalSize          = 0 bytes

The file was allocated with 73 extents. Now try to do the same with Files manager:

sh-3.2# fileXray /Users/migun/!/OS X 10.9.dmg 
  path                 = Macintosh HD:/Users/migun/!/OS X 10.9.dmg
# Catalog File Thread Record
# Record 184 in node 18946 beginning at 512-byte sector 0x1b4830
  parentID             = 20404714
  nodeName             = OS X 10.9.dmg
# Catalog File Record
# Record 11 in node 62013 beginning at 512-byte sector 0xc77170
  type                 = file
  file ID              = 20408503
  flags                = 0000000010000010
                       . File has a thread record in the catalog.
                       . File has date-added stored in Finder Info.
  reserved1            = 0
  createDate           = Sun Jun 16 13:37:35 2013
  contentModDate       = Sun Jun 16 14:12:37 2013
  attributeModDate     = Tue Aug 20 10:25:18 2013
  accessDate           = Tue Aug 20 10:22:10 2013
  backupDate           = 0
  # BSD Info
  ownerID              = 501 (migun)
  groupID              = 20 (staff)
  adminFlags           = 00000000
  ownerFlags           = 00000000
  fileMode             = -rw-r–r– 
  linkCount            = 1
  textEncoding         = 0
  reserved2            = 0
  # Finder Info
  fdType               = 0
  fdCreator            = 0
  fdFlags              = 0000000000000000
  fdLocation           = (v = 0, h = 0)
  opaque               = 0
  # Extended Finder Info
  reserved1            = 0
  date_added           = Tue Aug 20 10:22:10 2013
  extended_flags       = 0000000000000000
  reserved2            = 0
  reserved3            = 0
  # Data Fork
  logicalSize          = 4965827853 bytes (5.0 GB)
  totalBlocks          = 1212361
  fork temperature     = no record in Hot File B-Tree
  clumpSize            = 0
  extents              =   startBlock   blockCount      % of file

                            0xac38808      0x19000         8.45 %
                            0xad436aa      0x19000         8.45 %
                            0x24d2e0b      0x32000        16.89 %
                            0x26e1f5d      0x19000         8.45 %
                            0x2702f63      0x32000        16.89 %
                            0x2735846      0x19000         8.45 %
                            0x2a0ebc1      0x19000         8.45 %
                            0x2a4cc68      0x46fc9        23.98 %

                         1212361 allocation blocks in 8 extents total.
                         151545.12 allocation blocks per extent on an average.
  # Resource Fork
  logicalSize          = 0 bytes

After copying with Files manager the OS X 10.9 distro was allocated with 8 extents versus 73 when copied with Finder. And that’s the today’s reason why you need a good alternative file manager.
Here’s the one: http://filesmanager.info/  🙂