/*############################################################################## FUNNNELWEB COPYRIGHT ==================== FunnelWeb is a literate-programming macro preprocessor. The FunnelWeb web is at http://www.ross.net/funnelweb/ Copyright (c) Ross N. Williams 1992. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of Version 2 of the GNU General Public License as published by the Free Software Foundation (http://www.gnu.org/). This program is distributed WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See Version 2 of the GNU General Public License for more details. You should have received a copy of Version 2 of the GNU General Public License along with this program. If not, you can obtain a copy as follows: ftp://prep.ai.mit.edu/pub/gnu/COPYING-2.0 or write to: Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA Section 2a of the license requires that all changes to this file be recorded prominently in this file. Please record all changes here. Programmers: RNW Ross N. Williams (ross@ross.net) Changes: 07-May-1992 RNW Program prepared for release under GNU GPL V2. 08-May-1999 RNW Eliminated the 64K restriction for MS-DOS. ##############################################################################*/ /******************************************************************************/ /* MAPPER.C */ /******************************************************************************/ #include #include "style.h" #include "as.h" #include "machin.h" #include "mapper.h" #include "memory.h" /******************************************************************************/ /* When we allocate a block of memory for a mapped file, we have to allocate */ /* a little more than the official length of the file. Here are some reasons: */ /* */ /* - We may wish to append an EOF character later on in the program. */ /* - fgets, when requested to fetch n characters, fetches n characters and */ /* then puts an EOS on the end of them. */ /* - It is conceivable that fgets will insert a "\n" at the end of the */ /* block if it doesn't see a "\n" just before the end of file. */ /* */ /* For all these reasons, we allocate a little more memory than we need. How */ /* much more is determined by BLK_FUDGE which is set to be on the safe side. */ #define BLK_FUDGE (20) /* The following fudge is useful for avoiding the limits of types. */ #define LIM_FUDGE (10) /* On PCs, size_t is 16 bits and malloc cannot allocate contiguous chunks of */ /* memory of more than about 64K. This really stuffs up FunnelWeb's capacity */ /* map in files of length greater than 64K. I didn't think of this problem */ /* when I designed FunnelWeb because I was thinking of the VOLUME of memory */ /* available nowadays, rather than its organization on small computers. */ /* Anyway, this problem should really be fixed by allowing files to be read */ /* in in segments. Perhaps the mapper should hand over a linked list of */ /* chunks rather than a single chunk. Unfortunately, I don't have the time to */ /* do this now, so users of the PC version of FunnelWeb will have to be */ /* content with an (approx) 64K limit to input files. This isn't as bad as it */ /* sounds, as large input files can be split using the include facility. */ /* 63K is chosen instead of 64K so as to be paranoid. */ #define PCFILEMX (63L*1024L) /******************************************************************************/ /* */ /* Error Strings */ /* ------------- */ /* Routines in this package return pointers to error strings. These pointers */ /* are subsequently bandied about by other packages. Therefore, they must be */ /* pointers to constant strings. The trick of returning a pointer to a static */ /* character array containing a specific sprintf message will backfire here */ /* if there is an error opening more than one input file. Using mm_* to */ /* create space for each error string would work though. */ /* STOP PRESS: Now that the lister copies error messages, there may no longer */ /* be a problem here. */ /* */ /******************************************************************************/ /* Finding The Length of a File */ /* ---------------------------- */ /* We have to be able to find out the length of a file before reading it in */ /* because, in this version of FunnelWeb, the entire file must be read into */ /* one contiguous block of memory. */ /* */ /* As it turns out, finding the length of a file in portable C turns out to */ /* be a very hard problem. Here are some possible solutions: */ /* */ /* 1. Read the entire file in and see how long it is. */ /* 2. Use fseek to move to the end of the file and then use ftell. */ /* 3. Use the Unix 'stat' call. */ /* */ /* Of these, only the first is portable. The second alternative is */ /* non-portable because some environments do not support the SEEK_END symbol */ /* required to perform a seek to the end of file. */ /* Alternatives to needing the length are as follows: */ /* */ /* 4. Read the file/allocate memory in 64K blocks. */ /* 5. Read the file in 64K blocks and then copy them to a contiguous one. */ /* */ /* Perhaps options 4 or 5 could be implemented later. However, right now I */ /* haven't got the time to do anything except strive for portability, so */ /* option 1 it is. */ LOCAL char *file_len P_((char *,ulong *)); LOCAL char *file_len (p_name,p_length) /* Given a filename (in 'p_name'), returns the length of the binary image of */ /* the file in *p_length. Returns NULL upon success or a pointer to a string */ /* describing an error upon failure. The length is measured in bytes. */ char *p_name; ulong *p_length; { FILE *infile; STAVAR char *p_buf = NULL; /* The length of the buffer handed to fgets is non-critical. However, it */ /* mustn't be bigger than 15 bits, as we are passing it as an int to fgets. */ #define LENBUFLEN (1024L) #if LENBUFLEN > 30000L #error "mapper.c: LENBUFLEN must be less than 15 bits." #endif /* Allocate the buffer if it has not already been allocated. */ if (p_buf == NULL) p_buf=mm_perm((size_t) LENBUFLEN); /* Open for TEXT reading. Earlier, I tried this using a binary read, but */ /* I had problems with this on the VAX (I forget what the problems were) and */ /* so I have switched back to a text read which is slower, but more reliable.*/ infile=fopen(p_name,"r"); if (infile == FOPEN_F) return "Error fopen()ing input file (to determine its length)."; /* Start with a zero length. */ *p_length=0; /* Read the file as text and count the number of bytes it contains. */ while (!feof(infile)) { /* Set the buffer to the empty string so it is valid even if fgets fails. */ p_buf[0]=EOS; /* Read in a whole lot of bytes. */ fgets(p_buf,LENBUFLEN,infile); /* If there is an error, abort. */ if (ferror(infile)) { fclose(infile); return "Error fgets()ing input file (as part of determining its length)."; } /* Count the bytes that we have got. If EOF occurred above AND no bytes */ /* were read, the EOS we planted earlier saves us. */ (*p_length)+=strlen(p_buf); } if (fclose(infile) == FCLOSE_F) return "Error fclose()ing input file (as part of determining its length)."; return NULL; } /******************************************************************************/ #if (OS_DOS || OS_W32) && UNIX_EOL #error Attempt to use unix_map function on a PC! #endif #if UNIX_EOL LOCAL char *unix_map P_((char *,char **,ulong *)); LOCAL char *unix_map(p_name,pp_mem,p_length) /* If we know that the enclosing environment represents text files in UNIX */ /* form, then there is no need to process the file on the way in. All we need */ /* to do is map it directly into memory. This will be very fast. */ char *p_name; char **pp_mem; ulong *p_length; { ulong file_length; /* Number of bytes in the target input file. */ char *p_err; /* Temporary to store pointer to error message. */ FILE *infile; /* The file variable for the input file we are reading. */ ulong num_bytes; /* Number of bytes actually read in (may be different). */ char *p_bytes; /* Pointer to the memory block where all the action is. */ /* Obtain the length of the file we are about to map in. */ p_err=file_len(p_name,&file_length); if (p_err != NULL) return p_err; /* Allocate memory to hold the mapped file. */ /* Note: The memory allocation package bombs if there is no more memory. */ p_bytes=(char *) mm_temp((size_t) file_length+BLK_FUDGE); /* Open the file in BINARY mode. */ infile=fopen(p_name,"rb"); if (infile==FOPEN_F) return "Error fopen()ing the input file (binary open)."; /* Read in the file and complain if we haven't read in enough bytes. */ num_bytes=fread(p_bytes,(size_t) 1L,(size_t) file_length,infile); if (num_bytes != file_length) return "Error fread()ing the input file."; if (fclose(infile) == FCLOSE_F) return "Error fclose()ing the input file."; /* Success. We got through the IO calls. Now fill in the blanks and go home. */ *pp_mem =p_bytes; *p_length =num_bytes; return NULL; } #endif /******************************************************************************/ #if !UNIX_EOL LOCAL char *stan_map P_((char *,char **,ulong *)); LOCAL char *stan_map(p_name,pp_mem,p_length) /* Maps in a file using refined text stream IO calls. */ char *p_name; char **pp_mem; ulong *p_length; { ulong file_length; /* Number of bytes in the target input file. */ char *p_err; /* Temporary to store pointer to error message. */ FILE *infile; /* The file variable for the input file we are reading. */ ulong num_bytes; /* Number of bytes actually read in (may be different). */ char *p_bytes; /* Pointer to the memory block where all the action is. */ char *p_curr; /* Pointer to current position in memory block. */ long bytes_left; /* Number of bytes still left to read. */ /* Obtain the length of the file we are about to map in. */ p_err=file_len(p_name,&file_length); if (p_err != NULL) return p_err; /* Complain on the PC if the file is too big to fit in one 64K segment. */ /* 08-May-1999 RNW I commented out this restriction as DOS extenders have */ /* made this restriction obsolete. */ /* #if OS_DOS if (file_length+BLK_FUDGE > PCFILEMX) return "Error: Input file too big for PC FunnelWeb. Split using include files."; #endif */ /* TRACE printf("MAPPER: LENGTH OF INPUT FILE=%lu\n",file_length); */ /* Allocate memory to hold the mapped file. */ /* Note: The memory allocation package bombs if there is no more memory. */ /* Note: As Unix stream format (the format to which we are converting) uses */ /* just one byte to mark the end of file, it seems highly unlikely */ /* that the file read in will be longer than the binary image. */ p_bytes=(char *) mm_temp((size_t) file_length+BLK_FUDGE); /* If the file is of zero length, we already know its contents! */ /* This is probably not strictly necessary, but why pressure the code below? */ if (file_length==0) { *pp_mem = p_bytes; *p_length = 0; return NULL; } /* Open the file afresh in TEXT mode for portable reading. */ infile=fopen(p_name,"r"); if (infile==FOPEN_F) return "Error fopen()ing input file (for reading)."; /* Read in as much of the file as we can without actually overflowing the */ /* buffer. If the file finished before the buffer, things have probably gone */ /* OK. If the buffer finishes before the file, we have a problem. */ /* Note: bytes_left = file_length+1 because in the following loop, we might */ /* read exactly up to the EOF and then be unable to actually trigger the EOF */ /* condition without another read. So we add one on to allow this extra read */ /* to take place. I don't know if this is necessary, but I certainly can't */ /* find anything in all my C books that will tell me. Anyway, it doesn't */ /* matter because we fudged in a few extra bytes earlier. */ p_curr = p_bytes; /* p_curr runs through the memory. */ bytes_left = file_length+1; /* Bytes left in memory allocated to hold file. */ while (!feof(infile)) { int bytes_try; int bytes_read; /* Try to read as much as we possibly can, but not more than the integer */ /* limit (which can be as low as 15 bits). */ bytes_try = bytes_left >= ((ulong) (INT_MAX-LIM_FUDGE)) ? (int) INT_MAX-LIM_FUDGE : (int) bytes_left; /* Plan an EOS to cover the EOF case. */ *p_curr=EOS; /* Attempt to read bytes_try bytes. +1 is because fgets reads n-1 bytes. */ fgets(p_curr,bytes_try+1,infile); if (ferror(infile)) { fclose(infile); return "Error fgets()ing the input file."; } /* fgets doesn't return a length so we have to hobble behind with strlen. */ /* It's still probably better than using lots of fgetc calls. */ /* If we hit EOF and no bytes were read, the EOS we planted saves us. */ bytes_read = strlen(p_curr); p_curr += bytes_read; bytes_left -= bytes_read; /* If we have run out of bytes in our allocated memory zone, then the */ /* file is longer than the length reported earlier. This is a bad thing. */ as_cold(bytes_left >=0, "stan_map: Text representation was longer than binary rep."); } num_bytes=(file_length+1)-bytes_left; /* See earlier to undserstand +1. */ /* Note: We can't get fussy here and check the read in length with the */ /* official length, as the read-in length is the text representation which */ /* is allowed to be shorter than the physical (binary) length of the file. */ if (fclose(infile) == FCLOSE_F) return "Error fclose()ing input file."; /* Success. We got through the IO calls. Now fill in the blanks and go home. */ *pp_mem = p_bytes; *p_length = num_bytes; return NULL; } #endif /******************************************************************************/ EXPORT char *map_file(p_name,pp_mem,p_length) char *p_name; char **pp_mem; ulong *p_length; { /* We choose one of two mapper routines depending on where the operating */ /* system represents text files using EOL (\n) as end of line markers. */ /* The UNIX_EOL symbol is defined in the machin.h file. */ /* Note: I thought of using sprintf to test to see what "\n" translated into */ /* so I wouldn't need the UNIXEOL symbol. However, unlike printf (which it */ /* is supposed to be like), sprintf does not translate "\n" into its outside */ /* form. The only other alternative is writing a short text file and reading */ /* it back in binary - an action that would probably take longer than */ /* reading the original file to be read using fgetc()! */ #if UNIX_EOL return unix_map(p_name,pp_mem,p_length); #else return stan_map(p_name,pp_mem,p_length); #endif } /******************************************************************************/ /* End of MAPPER.C */ /******************************************************************************/