/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/file.c
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/src/file.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (hide annotations) (download)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch point for: Import, MAIN
File MIME type: text/plain
Initial revision

1 adcroft 1.1 /*
2     $Id: file.c,v 1.43 2002/07/09 16:14:21 whmoseley Exp $
3     **
4     ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
5     ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
6     **
7     ** This program and library is free software; you can redistribute it and/or
8     ** modify it under the terms of the GNU (Library) General Public License
9     ** as published by the Free Software Foundation; either version 2
10     ** of the License, or any later version.
11     **
12     ** This program is distributed in the hope that it will be useful,
13     ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14     ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15     ** GNU (Library) General Public License for more details.
16     **
17     ** You should have received a copy of the GNU (Library) General Public License
18     ** along with this program; if not, write to the Free Software
19     ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20     **-------------------------------------------------------------
21     ** Changed getdefaults to allow metaNames in the user
22     ** configuration file
23     ** G.Hill 4/16/97 ghill@library.berkeley.edu
24     **
25     ** change sprintf to snprintf to avoid corruption, and use MAXSTRLEN from swish.h
26     ** added safestrcpy() macro to avoid corruption from strcpy overflow
27     ** SRE 11/17/99
28     **
29     ** added buffer size arg to grabStringValue - core dumping from overrun
30     ** fixed logical OR and other problems pointed out by "gcc -Wall"
31     ** SRE 2/22/00
32     **
33     ** counter modulo 128 had parens typo
34     ** SRE 2/23/00
35     **
36     ** read stopwords from file
37     ** Rainer Scherg (rasc) 2000-06-15
38     **
39     ** 2000-11-15 rasc
40     ** file_properties retrieves last mod date, filesize, and evals some swish
41     ** config flags for this file!
42     **
43     ** 2001-02-12 rasc errormsg "print" changed...
44     ** 2001-03-16 rasc truncateDoc [read_stream] (if doc to large, truncate... )
45     ** 2001-03-17 rasc fprop enhanced by "real_filename"
46     **
47     */
48    
49     #ifdef HAVE_CONFIG_H
50     #include "acconfig.h"
51     #endif
52    
53     #ifdef HAVE_STDLIB_H
54     #include <stdlib.h>
55     #endif
56     #ifdef HAVE_UNISTD_H
57     #include <unistd.h>
58     #endif
59     #include "swish.h"
60     #include "mem.h"
61     #include "string.h"
62     #include "file.h"
63     #include "error.h"
64     #include "list.h"
65     #include "hash.h"
66     #include "check.h"
67     #include "index.h"
68     #include "filter.h"
69     #include "metanames.h"
70    
71    
72     /* Cough, hack, cough - convert slash to backslash for programs that are run via the shell */
73     #ifdef _WIN32
74     void make_windows_path( char *path )
75     {
76     char *c;
77    
78     for ( c = path; *c; c++ )
79     if ( '/' == *c )
80     *c = '\\';
81     }
82     #endif
83    
84     /* Flip any backslashes to forward slashes, and remove trailing slash */
85    
86    
87     void normalize_path(char *path)
88     {
89     int len = strlen( path );
90     char *c;
91    
92     /* For windows users */
93     for ( c = path; *c; c++ )
94     if ( '\\' == *c )
95     *c = '/';
96    
97     while( len > 1 && path[len-1] == '/' )
98     {
99     path[len-1] = '\0';
100     len--;
101     }
102     }
103    
104    
105    
106     /* Is a file a directory?
107     */
108    
109     int isdirectory(char *path)
110     {
111     struct stat stbuf;
112    
113     if (stat(path, &stbuf))
114     return 0;
115     return ((stbuf.st_mode & S_IFMT) == S_IFDIR) ? 1 : 0;
116     }
117    
118     /* Is a file a regular file?
119     */
120    
121     int isfile(char *path)
122     {
123     struct stat stbuf;
124    
125     if (stat(path, &stbuf))
126     return 0;
127     return ((stbuf.st_mode & S_IFMT) == S_IFREG) ? 1 : 0;
128     }
129    
130     /* Is a file a link?
131     */
132    
133     int islink(char *path)
134     {
135     #ifndef NO_SYMBOLIC_FILE_LINKS
136     struct stat stbuf;
137    
138     if (lstat(path, &stbuf))
139     return 0;
140     return ((stbuf.st_mode & S_IFLNK) == S_IFLNK) ? 1 : 0;
141     #else
142     return 0;
143     #endif
144     }
145    
146     /* Get the size, in bytes, of a file.
147     ** Return -1 if there's a problem.
148     */
149    
150     int getsize(char *path)
151     {
152     struct stat stbuf;
153    
154     if (stat(path, &stbuf))
155     return -1;
156     return stbuf.st_size;
157     }
158    
159    
160    
161     FILE *openIndexFILEForWrite(char *filename)
162     {
163     return fopen(filename, F_WRITE_BINARY);
164     }
165    
166     FILE *openIndexFILEForRead(char *filename)
167     {
168     return fopen(filename, F_READ_BINARY);
169     }
170    
171     FILE *openIndexFILEForReadAndWrite(char *filename)
172     {
173     return fopen(filename, F_READWRITE_BINARY);
174     }
175    
176     void CreateEmptyFile(char *filename)
177     {
178     FILE *fp;
179    
180     if (!(fp = openIndexFILEForWrite(filename)))
181     {
182     progerrno("Couldn't write the file \"%s\": ", filename);
183     }
184     fclose(fp);
185     }
186    
187     /*
188     * Invoke the methods of the current Indexing Data Source
189     */
190     void indexpath(SWISH * sw, char *path)
191     {
192     /* invoke routine to index a "path" */
193     (*IndexingDataSource->indexpath_fn) (sw, path);
194     }
195    
196    
197     /*
198     -- read file into a buffer
199     -- truncate file if necessary (truncateDocSize)
200     -- return: buffer
201     -- 2001-03-16 rasc truncateDoc
202     */
203    
204     /* maybe some day this could be chunked reading? */
205    
206     char *read_stream(SWISH *sw, char *name, FILE * fp, long filelen, long max_size)
207     {
208     long c,
209     offset;
210     long bufferlen;
211     unsigned char *buffer, *tmp = NULL;
212     size_t bytes_read;
213    
214    
215     if (filelen)
216     {
217    
218     /* truncate doc? */
219     if (max_size && (filelen > max_size))
220     {
221     filelen = max_size;
222     }
223    
224     buffer = (unsigned char *)Mem_ZoneAlloc(sw->Index->perDocTmpZone, filelen + 1);
225     *buffer = '\0';
226     bytes_read = fread(buffer, 1, filelen, fp);
227    
228    
229     buffer[filelen] = '\0';
230    
231     /* JFP - substitute null chars, VFC record may have null char in reclen word, try to discard them */
232     if ( strlen( (char *)buffer ) < bytes_read )
233     {
234     int i;
235     progwarn("Substituted possible embedded null character(s) in file '%s'\n", name);
236     for (i = 0; i < bytes_read; ++i)
237     if (buffer[i] == '\0') buffer[i] = '\n';
238     }
239    
240     }
241     else
242     { /* if we are reading from a popen call, filelen is 0 */
243    
244     buffer = (unsigned char *)Mem_ZoneAlloc(sw->Index->perDocTmpZone,(bufferlen = RD_BUFFER_SIZE) + 1);
245     *buffer = '\0';
246     for (offset = 0; (c = fread(buffer + offset, 1, RD_BUFFER_SIZE, fp)) == RD_BUFFER_SIZE; offset += RD_BUFFER_SIZE)
247     {
248     /* truncate? break if to much read */
249     if (max_size && (bufferlen > max_size))
250     {
251     break;
252     }
253     tmp = (unsigned char *)Mem_ZoneAlloc(sw->Index->perDocTmpZone, bufferlen + RD_BUFFER_SIZE + 1);
254     memcpy(tmp,buffer,bufferlen+1);
255     buffer = tmp;
256     bufferlen += RD_BUFFER_SIZE;
257     }
258     filelen = offset + c;
259    
260     if (max_size && (filelen > max_size))
261     {
262     filelen = max_size;
263     }
264     buffer[filelen] = '\0';
265     }
266     return (char *) buffer;
267     }
268    
269     /* Sept 25, 2001 - moseley
270     * Flush the file -- for use with -S prog, when either Truncate is in use, or
271     * the parser aborted for some reason (e.g. !isoktitle).
272     */
273    
274     void flush_stream( FileProp *fprop )
275     {
276     char tmpbuf[4096];
277     int read;
278    
279     while ( fprop->bytes_read < fprop->fsize )
280     {
281     if ( ( fprop->fsize - fprop->bytes_read ) > 4096 )
282     {
283     if ( !(read = fread(tmpbuf, 1, 4096, fprop->fp)))
284     break;
285    
286     fprop->bytes_read += read;
287     }
288     else
289     {
290     read = fread(tmpbuf, 1, fprop->fsize - fprop->bytes_read, fprop->fp);
291     break;
292     }
293     }
294     }
295    
296    
297     /* Mar 27, 2001 - moseley
298     * Separate out the creation of the file properties
299     *
300     */
301    
302     FileProp *init_file_properties(SWISH * sw)
303     {
304     FileProp *fprop;
305    
306     fprop = (FileProp *) emalloc(sizeof(FileProp));
307     /* emalloc checks fail and aborts... */
308    
309     memset( fprop, 0, sizeof(FileProp) );
310    
311     return fprop;
312     }
313    
314    
315     /* Mar 27, 2001 - moseley
316     * Separate out the adjusting of file properties by config settings
317     * 2001-04-09 rasc changed filters
318     */
319    
320     void init_file_prop_settings(SWISH * sw, FileProp * fprop)
321     {
322    
323     /* Basename of document path => document filename */
324     fprop->real_filename = str_basename(fprop->real_path);
325    
326    
327     /* -- get Doc Type as is in IndexContents or Defaultcontents
328     -- doctypes by jruiz
329     */
330    
331     /* Might already be set by a header in extpro.c */
332     if ( !fprop->doctype )
333     {
334     /* Get the type by file extension -- or return NODOCTYPE */
335     fprop->doctype = getdoctype(fprop->real_path, sw->indexcontents);
336    
337     /* If was not set by getdoctype() then assign it the default parser */
338     /* This could still be NODOCTYPE, or it might be something set by DefaultContents */
339    
340     if (fprop->doctype == NODOCTYPE)
341     fprop->doctype = sw->DefaultDocType;
342     }
343    
344    
345     /* -- index just the filename (or doc title tags)?
346     -- this param was "wrongly" named indextitleonly */
347    
348     fprop->index_no_content = (sw->nocontentslist != NULL) && isoksuffix(fprop->real_path, sw->nocontentslist);
349    
350     /* -- Any filter for this file type?
351     -- NULL = No Filter, (char *) path to filter prog.
352     */
353    
354     fprop->hasfilter = hasfilter(sw, fprop->real_path);
355    
356     fprop->stordesc = hasdescription(fprop->doctype, sw->storedescription);
357    
358     }
359    
360    
361    
362     /*
363     -- file_properties
364     -- Get/eval information about a file and return it.
365     -- Some flags are calculated from swish configs for this "real_path"
366     -- Structure has to be freed using free_file_properties
367     -- 2000-11-15 rasc
368     -- return: (FileProp *)
369     -- A failed stat returns an empty (default) structure
370    
371     -- 2000-12
372     -- Added StoreDescription
373     */
374    
375     FileProp *file_properties(char *real_path, char *work_file, SWISH * sw)
376     {
377     FileProp *fprop;
378     struct stat stbuf;
379    
380     /* create an initilized fprop structure */
381    
382     fprop = init_file_properties(sw);
383    
384    
385     /* Dup these, since the real_path may be reallocated by FileRules */
386     fprop->real_path = estrdup( real_path );
387     fprop->work_path = estrdup( work_file ? work_file : real_path );
388     fprop->orig_path = estrdup( real_path );
389    
390    
391     /* Stat the file */
392     /* This is really the wrong place for this, as it's really only useful for fs.c method */
393     /* for http.c it means the last mod date is the temp file date */
394     /* Probably this entire function isn't needed - moseley */
395    
396     if (!stat(fprop->work_path, &stbuf))
397     {
398     fprop->fsize = (long) stbuf.st_size;
399     fprop->mtime = stbuf.st_mtime;
400     }
401    
402    
403     /* Now set various fprop settings based mostly on file name */
404    
405     init_file_prop_settings(sw, fprop);
406    
407    
408    
409     #ifdef DEBUG
410     fprintf(stderr, "file_properties: path=%s, (workpath=%s), fsize=%ld, last_mod=%ld Doctype: %d Filter: %p\n",
411     fprop->real_path, fprop->work_path, (long) fprop->fsize, (long) fprop->mtime, fprop->doctype, fprop->filterprog);
412     #endif
413    
414     return fprop;
415     }
416    
417    
418     /* -- Free FileProp structure
419     -- unless no alloc for strings simple free structure
420     */
421    
422     void free_file_properties(FileProp * fprop)
423     {
424     efree( fprop->real_path );
425     efree( fprop->work_path );
426     efree( fprop->orig_path );
427     efree(fprop);
428     }
429    
430    
431     static char *temp_file_template = "XXXXXX";
432     /***********************************************************************
433     * Create a temporary file
434     *
435     * Call With:
436     * *SWISH = to get at the TmpDir config setting which I don't like
437     * *prefix = chars to prepend to the file name
438     * **file_name_buffer = where to store address of file name
439     * unlink = if true, will unlink file
440     * if not unlinked, then caller must free the name
441     * Return:
442     * *FILE
443     * modified file_name_buffer
444     *
445     * Will create temp files in the directory specified by environment vars
446     * TMPDIR and TMP, and by the config.h setting of TMPDIR in that order.
447     *
448     * Note:
449     * It's expected that swish is not run suid, so
450     * (getuid()==geteuid()) && (getgid()==getegid())
451     * if not checked. I'm not sure if that would choke on other platforms.
452     *
453     *
454     * Source:
455     * http://www.linuxdoc.org/HOWTO/Secure-Programs-HOWTO/avoid-race.html
456     *
457     * Questions:
458     * Can non-unix OS unlink the file and continue to hold the fd?
459     *
460     ***********************************************************************/
461    
462     FILE *create_tempfile(SWISH *sw, const char *f_mode, char *prefix, char **file_name_buffer, int remove_file_name )
463     {
464     int temp_fd;
465     mode_t old_mode;
466     FILE *temp_file;
467     char *file_name;
468     int file_name_len;
469     struct MOD_Index *idx = sw->Index;
470     char *tmpdir = NULL;
471     file_name_len = (prefix ? strlen(prefix) : 0) + strlen( temp_file_template ) + strlen( TEMP_FILE_PREFIX );
472    
473    
474    
475     /* Perl is nice sometimes */
476     if ( !( tmpdir = getenv("TMPDIR")) )
477     if ( !(tmpdir = getenv("TMP")) )
478     if( !(tmpdir = getenv("TEMP")) )
479     tmpdir = idx->tmpdir;
480    
481     if ( tmpdir && !*tmpdir )
482     tmpdir = NULL; // just in case it's the empty string
483    
484     if ( tmpdir )
485     file_name_len += strlen( tmpdir ) + 1; // for path separator
486    
487    
488    
489     file_name = emalloc( file_name_len + 1 );
490    
491     *file_name = '\0';
492    
493     if ( tmpdir )
494     {
495     strcat( file_name, tmpdir );
496     normalize_path( file_name );
497     strcat( file_name, "/" );
498     }
499    
500     strcat( file_name, TEMP_FILE_PREFIX );
501    
502     if ( prefix )
503     strcat( file_name, prefix );
504    
505     strcat( file_name, temp_file_template );
506    
507     old_mode = umask(077); /* Create file with restrictive permissions */
508    
509     temp_fd = mkstemp( file_name );
510    
511     (void) umask(old_mode);
512    
513     if (temp_fd == -1)
514     progerrno("Couldn't open temporary file '%s': ", file_name );
515    
516     if (!(temp_file = fdopen(temp_fd, f_mode)))
517     progerrno("Couldn't create temporary file '%s' file descriptor: ", file_name);
518    
519     if ( remove_file_name )
520     {
521     if ( remove( file_name ) == -1 )
522     progerrno("Couldn't unlink temporary file '%s' :", file_name);
523    
524     efree( file_name );
525     }
526     else
527     *file_name_buffer = file_name;
528    
529    
530     return temp_file;
531     }
532    
533    

  ViewVC Help
Powered by ViewVC 1.1.22