/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/file.c
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/src/file.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Importing web-site building process.

1 /*
2 $Id: file.c,v 1.43 2002/07/09 16:14:21 whmoseley Exp $
3 **
4 ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
5 ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
6 **
7 ** This program and library is free software; you can redistribute it and/or
8 ** modify it under the terms of the GNU (Library) General Public License
9 ** as published by the Free Software Foundation; either version 2
10 ** of the License, or any later version.
11 **
12 ** This program is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ** GNU (Library) General Public License for more details.
16 **
17 ** You should have received a copy of the GNU (Library) General Public License
18 ** along with this program; if not, write to the Free Software
19 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 **-------------------------------------------------------------
21 ** Changed getdefaults to allow metaNames in the user
22 ** configuration file
23 ** G.Hill 4/16/97 ghill@library.berkeley.edu
24 **
25 ** change sprintf to snprintf to avoid corruption, and use MAXSTRLEN from swish.h
26 ** added safestrcpy() macro to avoid corruption from strcpy overflow
27 ** SRE 11/17/99
28 **
29 ** added buffer size arg to grabStringValue - core dumping from overrun
30 ** fixed logical OR and other problems pointed out by "gcc -Wall"
31 ** SRE 2/22/00
32 **
33 ** counter modulo 128 had parens typo
34 ** SRE 2/23/00
35 **
36 ** read stopwords from file
37 ** Rainer Scherg (rasc) 2000-06-15
38 **
39 ** 2000-11-15 rasc
40 ** file_properties retrieves last mod date, filesize, and evals some swish
41 ** config flags for this file!
42 **
43 ** 2001-02-12 rasc errormsg "print" changed...
44 ** 2001-03-16 rasc truncateDoc [read_stream] (if doc to large, truncate... )
45 ** 2001-03-17 rasc fprop enhanced by "real_filename"
46 **
47 */
48
49 #ifdef HAVE_CONFIG_H
50 #include "acconfig.h"
51 #endif
52
53 #ifdef HAVE_STDLIB_H
54 #include <stdlib.h>
55 #endif
56 #ifdef HAVE_UNISTD_H
57 #include <unistd.h>
58 #endif
59 #include "swish.h"
60 #include "mem.h"
61 #include "string.h"
62 #include "file.h"
63 #include "error.h"
64 #include "list.h"
65 #include "hash.h"
66 #include "check.h"
67 #include "index.h"
68 #include "filter.h"
69 #include "metanames.h"
70
71
72 /* Cough, hack, cough - convert slash to backslash for programs that are run via the shell */
73 #ifdef _WIN32
74 void make_windows_path( char *path )
75 {
76 char *c;
77
78 for ( c = path; *c; c++ )
79 if ( '/' == *c )
80 *c = '\\';
81 }
82 #endif
83
84 /* Flip any backslashes to forward slashes, and remove trailing slash */
85
86
87 void normalize_path(char *path)
88 {
89 int len = strlen( path );
90 char *c;
91
92 /* For windows users */
93 for ( c = path; *c; c++ )
94 if ( '\\' == *c )
95 *c = '/';
96
97 while( len > 1 && path[len-1] == '/' )
98 {
99 path[len-1] = '\0';
100 len--;
101 }
102 }
103
104
105
106 /* Is a file a directory?
107 */
108
109 int isdirectory(char *path)
110 {
111 struct stat stbuf;
112
113 if (stat(path, &stbuf))
114 return 0;
115 return ((stbuf.st_mode & S_IFMT) == S_IFDIR) ? 1 : 0;
116 }
117
118 /* Is a file a regular file?
119 */
120
121 int isfile(char *path)
122 {
123 struct stat stbuf;
124
125 if (stat(path, &stbuf))
126 return 0;
127 return ((stbuf.st_mode & S_IFMT) == S_IFREG) ? 1 : 0;
128 }
129
130 /* Is a file a link?
131 */
132
133 int islink(char *path)
134 {
135 #ifndef NO_SYMBOLIC_FILE_LINKS
136 struct stat stbuf;
137
138 if (lstat(path, &stbuf))
139 return 0;
140 return ((stbuf.st_mode & S_IFLNK) == S_IFLNK) ? 1 : 0;
141 #else
142 return 0;
143 #endif
144 }
145
146 /* Get the size, in bytes, of a file.
147 ** Return -1 if there's a problem.
148 */
149
150 int getsize(char *path)
151 {
152 struct stat stbuf;
153
154 if (stat(path, &stbuf))
155 return -1;
156 return stbuf.st_size;
157 }
158
159
160
161 FILE *openIndexFILEForWrite(char *filename)
162 {
163 return fopen(filename, F_WRITE_BINARY);
164 }
165
166 FILE *openIndexFILEForRead(char *filename)
167 {
168 return fopen(filename, F_READ_BINARY);
169 }
170
171 FILE *openIndexFILEForReadAndWrite(char *filename)
172 {
173 return fopen(filename, F_READWRITE_BINARY);
174 }
175
176 void CreateEmptyFile(char *filename)
177 {
178 FILE *fp;
179
180 if (!(fp = openIndexFILEForWrite(filename)))
181 {
182 progerrno("Couldn't write the file \"%s\": ", filename);
183 }
184 fclose(fp);
185 }
186
187 /*
188 * Invoke the methods of the current Indexing Data Source
189 */
190 void indexpath(SWISH * sw, char *path)
191 {
192 /* invoke routine to index a "path" */
193 (*IndexingDataSource->indexpath_fn) (sw, path);
194 }
195
196
197 /*
198 -- read file into a buffer
199 -- truncate file if necessary (truncateDocSize)
200 -- return: buffer
201 -- 2001-03-16 rasc truncateDoc
202 */
203
204 /* maybe some day this could be chunked reading? */
205
206 char *read_stream(SWISH *sw, char *name, FILE * fp, long filelen, long max_size)
207 {
208 long c,
209 offset;
210 long bufferlen;
211 unsigned char *buffer, *tmp = NULL;
212 size_t bytes_read;
213
214
215 if (filelen)
216 {
217
218 /* truncate doc? */
219 if (max_size && (filelen > max_size))
220 {
221 filelen = max_size;
222 }
223
224 buffer = (unsigned char *)Mem_ZoneAlloc(sw->Index->perDocTmpZone, filelen + 1);
225 *buffer = '\0';
226 bytes_read = fread(buffer, 1, filelen, fp);
227
228
229 buffer[filelen] = '\0';
230
231 /* JFP - substitute null chars, VFC record may have null char in reclen word, try to discard them */
232 if ( strlen( (char *)buffer ) < bytes_read )
233 {
234 int i;
235 progwarn("Substituted possible embedded null character(s) in file '%s'\n", name);
236 for (i = 0; i < bytes_read; ++i)
237 if (buffer[i] == '\0') buffer[i] = '\n';
238 }
239
240 }
241 else
242 { /* if we are reading from a popen call, filelen is 0 */
243
244 buffer = (unsigned char *)Mem_ZoneAlloc(sw->Index->perDocTmpZone,(bufferlen = RD_BUFFER_SIZE) + 1);
245 *buffer = '\0';
246 for (offset = 0; (c = fread(buffer + offset, 1, RD_BUFFER_SIZE, fp)) == RD_BUFFER_SIZE; offset += RD_BUFFER_SIZE)
247 {
248 /* truncate? break if to much read */
249 if (max_size && (bufferlen > max_size))
250 {
251 break;
252 }
253 tmp = (unsigned char *)Mem_ZoneAlloc(sw->Index->perDocTmpZone, bufferlen + RD_BUFFER_SIZE + 1);
254 memcpy(tmp,buffer,bufferlen+1);
255 buffer = tmp;
256 bufferlen += RD_BUFFER_SIZE;
257 }
258 filelen = offset + c;
259
260 if (max_size && (filelen > max_size))
261 {
262 filelen = max_size;
263 }
264 buffer[filelen] = '\0';
265 }
266 return (char *) buffer;
267 }
268
269 /* Sept 25, 2001 - moseley
270 * Flush the file -- for use with -S prog, when either Truncate is in use, or
271 * the parser aborted for some reason (e.g. !isoktitle).
272 */
273
274 void flush_stream( FileProp *fprop )
275 {
276 char tmpbuf[4096];
277 int read;
278
279 while ( fprop->bytes_read < fprop->fsize )
280 {
281 if ( ( fprop->fsize - fprop->bytes_read ) > 4096 )
282 {
283 if ( !(read = fread(tmpbuf, 1, 4096, fprop->fp)))
284 break;
285
286 fprop->bytes_read += read;
287 }
288 else
289 {
290 read = fread(tmpbuf, 1, fprop->fsize - fprop->bytes_read, fprop->fp);
291 break;
292 }
293 }
294 }
295
296
297 /* Mar 27, 2001 - moseley
298 * Separate out the creation of the file properties
299 *
300 */
301
302 FileProp *init_file_properties(SWISH * sw)
303 {
304 FileProp *fprop;
305
306 fprop = (FileProp *) emalloc(sizeof(FileProp));
307 /* emalloc checks fail and aborts... */
308
309 memset( fprop, 0, sizeof(FileProp) );
310
311 return fprop;
312 }
313
314
315 /* Mar 27, 2001 - moseley
316 * Separate out the adjusting of file properties by config settings
317 * 2001-04-09 rasc changed filters
318 */
319
320 void init_file_prop_settings(SWISH * sw, FileProp * fprop)
321 {
322
323 /* Basename of document path => document filename */
324 fprop->real_filename = str_basename(fprop->real_path);
325
326
327 /* -- get Doc Type as is in IndexContents or Defaultcontents
328 -- doctypes by jruiz
329 */
330
331 /* Might already be set by a header in extpro.c */
332 if ( !fprop->doctype )
333 {
334 /* Get the type by file extension -- or return NODOCTYPE */
335 fprop->doctype = getdoctype(fprop->real_path, sw->indexcontents);
336
337 /* If was not set by getdoctype() then assign it the default parser */
338 /* This could still be NODOCTYPE, or it might be something set by DefaultContents */
339
340 if (fprop->doctype == NODOCTYPE)
341 fprop->doctype = sw->DefaultDocType;
342 }
343
344
345 /* -- index just the filename (or doc title tags)?
346 -- this param was "wrongly" named indextitleonly */
347
348 fprop->index_no_content = (sw->nocontentslist != NULL) && isoksuffix(fprop->real_path, sw->nocontentslist);
349
350 /* -- Any filter for this file type?
351 -- NULL = No Filter, (char *) path to filter prog.
352 */
353
354 fprop->hasfilter = hasfilter(sw, fprop->real_path);
355
356 fprop->stordesc = hasdescription(fprop->doctype, sw->storedescription);
357
358 }
359
360
361
362 /*
363 -- file_properties
364 -- Get/eval information about a file and return it.
365 -- Some flags are calculated from swish configs for this "real_path"
366 -- Structure has to be freed using free_file_properties
367 -- 2000-11-15 rasc
368 -- return: (FileProp *)
369 -- A failed stat returns an empty (default) structure
370
371 -- 2000-12
372 -- Added StoreDescription
373 */
374
375 FileProp *file_properties(char *real_path, char *work_file, SWISH * sw)
376 {
377 FileProp *fprop;
378 struct stat stbuf;
379
380 /* create an initilized fprop structure */
381
382 fprop = init_file_properties(sw);
383
384
385 /* Dup these, since the real_path may be reallocated by FileRules */
386 fprop->real_path = estrdup( real_path );
387 fprop->work_path = estrdup( work_file ? work_file : real_path );
388 fprop->orig_path = estrdup( real_path );
389
390
391 /* Stat the file */
392 /* This is really the wrong place for this, as it's really only useful for fs.c method */
393 /* for http.c it means the last mod date is the temp file date */
394 /* Probably this entire function isn't needed - moseley */
395
396 if (!stat(fprop->work_path, &stbuf))
397 {
398 fprop->fsize = (long) stbuf.st_size;
399 fprop->mtime = stbuf.st_mtime;
400 }
401
402
403 /* Now set various fprop settings based mostly on file name */
404
405 init_file_prop_settings(sw, fprop);
406
407
408
409 #ifdef DEBUG
410 fprintf(stderr, "file_properties: path=%s, (workpath=%s), fsize=%ld, last_mod=%ld Doctype: %d Filter: %p\n",
411 fprop->real_path, fprop->work_path, (long) fprop->fsize, (long) fprop->mtime, fprop->doctype, fprop->filterprog);
412 #endif
413
414 return fprop;
415 }
416
417
418 /* -- Free FileProp structure
419 -- unless no alloc for strings simple free structure
420 */
421
422 void free_file_properties(FileProp * fprop)
423 {
424 efree( fprop->real_path );
425 efree( fprop->work_path );
426 efree( fprop->orig_path );
427 efree(fprop);
428 }
429
430
431 static char *temp_file_template = "XXXXXX";
432 /***********************************************************************
433 * Create a temporary file
434 *
435 * Call With:
436 * *SWISH = to get at the TmpDir config setting which I don't like
437 * *prefix = chars to prepend to the file name
438 * **file_name_buffer = where to store address of file name
439 * unlink = if true, will unlink file
440 * if not unlinked, then caller must free the name
441 * Return:
442 * *FILE
443 * modified file_name_buffer
444 *
445 * Will create temp files in the directory specified by environment vars
446 * TMPDIR and TMP, and by the config.h setting of TMPDIR in that order.
447 *
448 * Note:
449 * It's expected that swish is not run suid, so
450 * (getuid()==geteuid()) && (getgid()==getegid())
451 * if not checked. I'm not sure if that would choke on other platforms.
452 *
453 *
454 * Source:
455 * http://www.linuxdoc.org/HOWTO/Secure-Programs-HOWTO/avoid-race.html
456 *
457 * Questions:
458 * Can non-unix OS unlink the file and continue to hold the fd?
459 *
460 ***********************************************************************/
461
462 FILE *create_tempfile(SWISH *sw, const char *f_mode, char *prefix, char **file_name_buffer, int remove_file_name )
463 {
464 int temp_fd;
465 mode_t old_mode;
466 FILE *temp_file;
467 char *file_name;
468 int file_name_len;
469 struct MOD_Index *idx = sw->Index;
470 char *tmpdir = NULL;
471 file_name_len = (prefix ? strlen(prefix) : 0) + strlen( temp_file_template ) + strlen( TEMP_FILE_PREFIX );
472
473
474
475 /* Perl is nice sometimes */
476 if ( !( tmpdir = getenv("TMPDIR")) )
477 if ( !(tmpdir = getenv("TMP")) )
478 if( !(tmpdir = getenv("TEMP")) )
479 tmpdir = idx->tmpdir;
480
481 if ( tmpdir && !*tmpdir )
482 tmpdir = NULL; // just in case it's the empty string
483
484 if ( tmpdir )
485 file_name_len += strlen( tmpdir ) + 1; // for path separator
486
487
488
489 file_name = emalloc( file_name_len + 1 );
490
491 *file_name = '\0';
492
493 if ( tmpdir )
494 {
495 strcat( file_name, tmpdir );
496 normalize_path( file_name );
497 strcat( file_name, "/" );
498 }
499
500 strcat( file_name, TEMP_FILE_PREFIX );
501
502 if ( prefix )
503 strcat( file_name, prefix );
504
505 strcat( file_name, temp_file_template );
506
507 old_mode = umask(077); /* Create file with restrictive permissions */
508
509 temp_fd = mkstemp( file_name );
510
511 (void) umask(old_mode);
512
513 if (temp_fd == -1)
514 progerrno("Couldn't open temporary file '%s': ", file_name );
515
516 if (!(temp_file = fdopen(temp_fd, f_mode)))
517 progerrno("Couldn't create temporary file '%s' file descriptor: ", file_name);
518
519 if ( remove_file_name )
520 {
521 if ( remove( file_name ) == -1 )
522 progerrno("Couldn't unlink temporary file '%s' :", file_name);
523
524 efree( file_name );
525 }
526 else
527 *file_name_buffer = file_name;
528
529
530 return temp_file;
531 }
532
533

  ViewVC Help
Powered by ViewVC 1.1.22