/* Unpack an ITS file stored in a WEENIX file using Alan Bawden's evacuated file format. By John Wilson. 04/11/1993 JMBW Created. 08/09/1993 JMBW Convert dates, uncompress .Z files automatically. 07/14/1998 JMBW Separated from DUMP.C. */ #include #include #include #include #include #include #include FILE *in, *out; FILE *zopen(); static void flush(); /* Message: 2881200, 91 lines Posted: 4:07pm EDT, Thu May 14/92, imported: 4:05pm EDT, Thu May 14/92 Subject: ITS filesystems To: John Wilson, bruce@think.com From: alan%ai.mit.edu@life.ai.mit.edu Date: Wed, 13 May 92 21:56:02 EDT From: John_Wilson@mts.rpi.edu ... I have a question -- how are the 36-bit words packed into 8-bit bytes? ... From: Bruce Walker Date: Wed, 13 May 92 18:17:20 EDT ... What is the representation of binary files? ... ------- Storing 36-Bit Words In 8-Bit Bytes Here are the details of the encoding used to store 36-bit PDP-10 words in 8-bit byte file systems. It is easiest to explain the encoding by describing how to reconstruct the 36-bit words from the 8-bit bytes. Going the other direction is harder (you will see why). Algorithm to decode a sequence of 8-bit bytes into a sequence of 36-bit words: Each 8-bit byte between 0 and 357 (octal) is decoded into one or two 7-bit bytes (see table below). These 7-bit bytes are then assembled into 36-bit words in the usual PDP-10 byte ordering. The lowest order bit of each such word is always set to 0. Each 8-bit byte between 360 and 377 (octal) is combined with the next 4 8-bit bytes to form a complete 36-bit word (see figure below). It is an error to encounter such a byte when there is a partially assembled output word. If there is a partially assembled output word at the end of the sequence of 8-bit bytes, it is padded out with 7-bit bytes that contain the value 3 (control-C in ASCII). Here is the table for decoding bytes between 0 and 357 (all values are in octal): input byte 1st output 2nd output ---------- ---------- ---------- 0 -- 11 0 -- 11 none 12 15 12 13 -- 14 13 -- 14 none 15 12 none 16 -- 176 16 -- 176 none 177 177 7 200 -- 206 177 0 -- 6 207 177 177 210 -- 211 177 10 -- 11 212 177 15 213 -- 214 177 13 -- 14 215 177 12 216 -- 355 177 16 -- 155 356 15 none 357 177 none For bytes between 360 and 377 (octal) the 36-bit word is reconstructed as follows: byte: 000011111111222222 223333333344444444 bit: 321076543210765432 107654321076543210 Where byte 0 is the current byte (the one between 360 and 377), byte 1 is the next in sequence, and so forth. Going in the other direction, from 36-bit words to 8-bit bytes, is harder only because there are choices to be made. For example, you can encode every 36-bit word using 5 bytes where the first is between 360 and 377 -- but if you did this, files that were stored as ASCII packed in 36-bit words in the usual way wouldn't be readable. A -good- encoder will produce a sequence of 8-bit bytes that can be read as an ordinary text file under Unix whenever the input words contain only PDP-10 ASCII. (You may be puzzled by all two byte sequences starting with 177 in the table above. The explanation is that this is done to preserve not just ASCII files, but also the files written by the Lisp Machine system using the Lisp Machine character set. But you don't really need to worry about this, as long as you have the above table, you know all you need to know about it.) I have a library of C routines that know how to encode and decode files in this format. I'm not willing to make a widely public release of it, but if you really need to use it let me know. (It is best to keep the number of different programs that understand the format small in order to minimize the chances of introducing incompatibilities.) ------- */ /* first code written for each input code from 000 to 357 */ char first[0360] = { 0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007, 0010, 0011, 0015, 0013, 0014, 0012, 0016, 0017, /* '\n' => CRLF, '\r' => LF */ 0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027, 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047, 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057, 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077, 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, 0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147, 0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157, 0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167, 0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 200 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 210 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 220 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 230 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 240 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 250 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 260 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 270 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 300 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 310 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 320 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 330 */ 0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 340 */ 0177, 0177, 0177, 0177, 0177, 0177, 0015, 0177 /* 350 */ }; /* use the sign bit for NONE, maybe cc's optimizer will catch on (yeah right) */ #define NONE 0200 /* second code written for each input code from 000 to 357, or NONE if none */ char second[0360] = { NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 000 */ NONE, NONE, 0012, NONE, NONE, NONE, NONE, NONE, /* 010 -- '\n' => CRLF */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 020 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 030 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 040 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 050 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 060 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 070 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 100 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 110 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 120 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 130 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 140 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 150 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 160 */ NONE, NONE, NONE, NONE, NONE, NONE, NONE, 0007, /* 170 */ 0000, 0001, 0002, 0003, 0004, 0005, 0006, 0177, /* 200 */ 0010, 0011, 0015, 0013, 0014, 0012, 0016, 0017, /* 210 */ 0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027, /* 220 */ 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, /* 230 */ 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047, /* 240 */ 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057, /* 250 */ 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, /* 260 */ 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077, /* 270 */ 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, /* 300 */ 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, /* 310 */ 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, /* 320 */ 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, /* 330 */ 0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147, /* 340 */ 0150, 0151, 0152, 0153, 0154, 0155, NONE, NONE /* 350 */ }; unpack(char *file) { register int c; register char b; register int i; static unsigned long word[5]; static unsigned long incnt; in=zopen(file); /* uncompress/open file */ if(in==NULL) { perror(file); exit(1); } incnt=0L; /* used for error msgs if file invalid */ while((incnt++,c=getc(in))!=EOF) { if(c>=0360) { /* quoted binary word */ word[0]=(c&017); for(i=1;i<=4;i++) { /* 4 more bytes */ if((incnt++,word[i]=getc(in))==EOF) { fprintf(stderr, "?Unexpected EOF: %s\n",file); exit(1); } } /* assemble the 36-bit binary word */ outword((word[0]<<14L)|(word[1]<<6L)| ((word[2]>>2L)&077L), ((word[2]&003L)<<16L)| (word[3]<<8L)|word[4]); } else { word[0]=first[c], i=1; /* write first char */ if(!((b=second[c])&NONE)) word[i++]=b; /* loop until word boundary */ while(i) { /* start next sequence */ if((incnt++,c=getc(in))==EOF) { /* pad with ^C's on EOF */ while(i<5) word[i++]=003; flush(word); goto done; } /* quoted word not allowed mid-word */ if(c>=0360) { fprintf(stderr, "?Invalid input file: %s, char %lu\n", file,incnt); exit(1); } /* save the first char */ word[i++]=first[c]; if(i==5) flush(word), i=0; /* save 2nd char if any */ if(!((b=second[c])&NONE)) { word[i++]=b; if(i==5) flush(word), i=0; } } } } done: fclose(in); // unlink(file); /* delete when done - /tmp isn't big enough on */ /* CIEUNIX.RPI.EDU */ } /* flush 5 7-bit ASCII chars as a 36-bit word */ static void flush(unsigned long word[5]) { register unsigned long l, r; l=(word[0]<<11L)|(word[1]<<4L)|((word[2]>>3L)&017L); r=((word[2]&07L)<<15L)|(word[3]<<8L)|(word[4]<<1L); outword(l,r); }