1 |
adcroft |
1.1 |
/* |
2 |
|
|
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company |
3 |
|
|
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94 |
4 |
|
|
** |
5 |
|
|
** This program and library is free software; you can redistribute it and/or |
6 |
|
|
** modify it under the terms of the GNU (Library) General Public License |
7 |
|
|
** as published by the Free Software Foundation; either version 2 |
8 |
|
|
** of the License, or any later version. |
9 |
|
|
** |
10 |
|
|
** This program is distributed in the hope that it will be useful, |
11 |
|
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 |
|
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 |
|
|
** GNU (Library) General Public License for more details. |
14 |
|
|
** |
15 |
|
|
** You should have received a copy of the GNU (Library) General Public License |
16 |
|
|
** long with this program; if not, write to the Free Software |
17 |
|
|
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
18 |
|
|
**-------------------------------------------------------------------- |
19 |
|
|
** |
20 |
|
|
** Mar 27, 2001 - created moseley |
21 |
|
|
** |
22 |
|
|
*/ |
23 |
|
|
|
24 |
|
|
#ifndef _WIN32 |
25 |
|
|
#include <unistd.h> |
26 |
|
|
#endif |
27 |
|
|
|
28 |
|
|
#include "swish.h" |
29 |
|
|
#include "mem.h" |
30 |
|
|
#include "string.h" |
31 |
|
|
#include "index.h" |
32 |
|
|
#include "file.h" |
33 |
|
|
#include "error.h" |
34 |
|
|
#include "parse_conffile.h" |
35 |
|
|
|
36 |
|
|
struct MOD_Prog |
37 |
|
|
{ |
38 |
|
|
/* prog system specific configuration parameters */ |
39 |
|
|
struct swline *progparameterslist; |
40 |
|
|
}; |
41 |
|
|
|
42 |
|
|
|
43 |
|
|
/* |
44 |
|
|
-- init structures for this module |
45 |
|
|
*/ |
46 |
|
|
|
47 |
|
|
void initModule_Prog (SWISH *sw) |
48 |
|
|
{ |
49 |
|
|
struct MOD_Prog *self; |
50 |
|
|
|
51 |
|
|
self = (struct MOD_Prog *) emalloc(sizeof(struct MOD_Prog)); |
52 |
|
|
sw->Prog = self; |
53 |
|
|
|
54 |
|
|
/* initialize buffers used by indexstring */ |
55 |
|
|
self->progparameterslist = (struct swline *) NULL; |
56 |
|
|
|
57 |
|
|
return; |
58 |
|
|
} |
59 |
|
|
|
60 |
|
|
void freeModule_Prog (SWISH *sw) |
61 |
|
|
{ |
62 |
|
|
struct MOD_Prog *self = sw->Prog; |
63 |
|
|
|
64 |
|
|
|
65 |
|
|
if ( self->progparameterslist ) |
66 |
|
|
efree( self->progparameterslist ); |
67 |
|
|
|
68 |
|
|
efree ( self ); |
69 |
|
|
sw->Prog = NULL; |
70 |
|
|
|
71 |
|
|
return; |
72 |
|
|
} |
73 |
|
|
|
74 |
|
|
int configModule_Prog (SWISH *sw, StringList *sl) |
75 |
|
|
|
76 |
|
|
{ |
77 |
|
|
struct MOD_Prog *self = sw->Prog; |
78 |
|
|
char *w0 = sl->word[0]; |
79 |
|
|
|
80 |
|
|
if (strcasecmp(w0, "SwishProgParameters") == 0) |
81 |
|
|
{ |
82 |
|
|
if (sl->n > 1) |
83 |
|
|
{ |
84 |
|
|
grabCmdOptions(sl, 1, &self->progparameterslist); |
85 |
|
|
} |
86 |
|
|
else |
87 |
|
|
progerr("%s: requires at least one value", w0); |
88 |
|
|
} |
89 |
|
|
|
90 |
|
|
else |
91 |
|
|
{ |
92 |
|
|
return 0; /* not a module directive */ |
93 |
|
|
} |
94 |
|
|
|
95 |
|
|
return 1; |
96 |
|
|
} |
97 |
|
|
|
98 |
|
|
|
99 |
|
|
|
100 |
|
|
static FILE *open_external_program(SWISH * sw, char *prog) |
101 |
|
|
{ |
102 |
|
|
char *cmd; |
103 |
|
|
FILE *fp; |
104 |
|
|
size_t total_len; |
105 |
|
|
struct stat stbuf; |
106 |
|
|
struct swline *progparameterslist = sw->Prog->progparameterslist; |
107 |
|
|
|
108 |
|
|
if ( ! strcmp( prog, "stdin") ) |
109 |
|
|
return stdin; |
110 |
|
|
|
111 |
|
|
|
112 |
|
|
/* get total length of configuration parameters */ |
113 |
|
|
|
114 |
|
|
total_len = strlen(prog); |
115 |
|
|
|
116 |
|
|
while (progparameterslist) |
117 |
|
|
{ |
118 |
|
|
total_len += strlen(progparameterslist->line) + 1; /* separate by spaces */ |
119 |
|
|
progparameterslist = progparameterslist->next; |
120 |
|
|
} |
121 |
|
|
|
122 |
|
|
cmd = emalloc(total_len + 20); |
123 |
|
|
strcpy(cmd, prog); |
124 |
|
|
|
125 |
|
|
normalize_path( cmd ); /* for stat calls */ |
126 |
|
|
|
127 |
|
|
|
128 |
|
|
/* this should probably be in file.c so filters.c can check, too */ |
129 |
|
|
/* note this won't catch errors in a shebang line, of course */ |
130 |
|
|
|
131 |
|
|
if (stat(cmd, &stbuf)) |
132 |
|
|
progerrno("External program '%s': ", cmd); |
133 |
|
|
|
134 |
|
|
if ( stbuf.st_mode & S_IFDIR) |
135 |
|
|
progerr("External program '%s' is a directory.", cmd); |
136 |
|
|
|
137 |
|
|
#ifndef _WIN32 |
138 |
|
|
|
139 |
|
|
if ( access( cmd, R_OK|X_OK ) ) |
140 |
|
|
progerrno("Cannot execute '%s': ", cmd); |
141 |
|
|
|
142 |
|
|
#endif |
143 |
|
|
|
144 |
|
|
#ifdef _WIN32 |
145 |
|
|
|
146 |
|
|
make_windows_path( cmd ); |
147 |
|
|
|
148 |
|
|
#endif |
149 |
|
|
|
150 |
|
|
|
151 |
|
|
|
152 |
|
|
|
153 |
|
|
progparameterslist = sw->Prog->progparameterslist; |
154 |
|
|
while (progparameterslist) |
155 |
|
|
{ |
156 |
|
|
strcat(cmd, " "); |
157 |
|
|
strcat(cmd, progparameterslist->line); |
158 |
|
|
progparameterslist = progparameterslist->next; |
159 |
|
|
} |
160 |
|
|
|
161 |
|
|
|
162 |
|
|
fp = popen(cmd, F_READ_TEXT); |
163 |
|
|
|
164 |
|
|
if (!fp) |
165 |
|
|
progerrno("Failed to spawn external program '%s': ", cmd); |
166 |
|
|
|
167 |
|
|
efree(cmd); |
168 |
|
|
return fp; |
169 |
|
|
} |
170 |
|
|
|
171 |
|
|
/* To make filters work with prog, need to write the file out to a temp file */ |
172 |
|
|
/* It will be faster to do the filtering from within the "prog" program */ |
173 |
|
|
/* This may not be safe if running as a threaded app, and I'm not clear on how portable this is */ |
174 |
|
|
/* This also uses read_stream to read in the file -- so the entire file is read into memory instead of chunked to the temp file */ |
175 |
|
|
|
176 |
|
|
static void save_to_temp_file(SWISH *sw, FileProp *fprop) |
177 |
|
|
{ |
178 |
|
|
FILE *out; |
179 |
|
|
char *rd_buffer = NULL; /* complete file read into buffer */ |
180 |
|
|
size_t bytes; |
181 |
|
|
|
182 |
|
|
|
183 |
|
|
/* slirp entire file into memory -- yuck */ |
184 |
|
|
rd_buffer = read_stream(sw, fprop->real_path, fprop->fp, fprop->fsize, 0); |
185 |
|
|
|
186 |
|
|
|
187 |
|
|
/* Save content to a temporary file */ |
188 |
|
|
efree( fprop->work_path ); |
189 |
|
|
out = create_tempfile(sw, F_WRITE_TEXT, "fltr", &fprop->work_path, 0 ); |
190 |
|
|
|
191 |
|
|
bytes = fwrite( rd_buffer, 1, fprop->fsize, out ); |
192 |
|
|
|
193 |
|
|
if ( bytes != (size_t)fprop->fsize ) |
194 |
|
|
progerrno("Failed to write temporary filter file '%s': ", fprop->work_path); |
195 |
|
|
|
196 |
|
|
|
197 |
|
|
/* hide the fact that it's an external program */ |
198 |
|
|
fprop->fp = (FILE *) NULL; |
199 |
|
|
|
200 |
|
|
|
201 |
|
|
//***JMRUIZ efree(rd_buffer); |
202 |
|
|
fclose( out ); |
203 |
|
|
|
204 |
|
|
} |
205 |
|
|
|
206 |
|
|
|
207 |
|
|
|
208 |
|
|
static void extprog_indexpath(SWISH * sw, char *prog) |
209 |
|
|
{ |
210 |
|
|
FileProp *fprop; |
211 |
|
|
FILE *fp; |
212 |
|
|
char *ln; |
213 |
|
|
char *real_path; |
214 |
|
|
long fsize; |
215 |
|
|
time_t mtime; |
216 |
|
|
int index_no_content; |
217 |
|
|
long truncate_doc_size; |
218 |
|
|
int docType = 0; |
219 |
|
|
|
220 |
|
|
mtime = 0; |
221 |
|
|
fsize = 0; |
222 |
|
|
index_no_content = 0; |
223 |
|
|
real_path = NULL; |
224 |
|
|
|
225 |
|
|
fp = open_external_program(sw, prog); |
226 |
|
|
|
227 |
|
|
ln = emalloc(MAXSTRLEN + 1); |
228 |
|
|
|
229 |
|
|
truncate_doc_size = sw->truncateDocSize; |
230 |
|
|
sw->truncateDocSize = 0; /* can't truncate -- prog should make sure doc is not too large */ |
231 |
|
|
// $$$ This is no longer true with libxml push parser |
232 |
|
|
|
233 |
|
|
// $$$ next time, break out the header parsing in its own function, please |
234 |
|
|
|
235 |
|
|
/* loop on headers */ |
236 |
|
|
while (fgets(ln, MAXSTRLEN, fp) != NULL) |
237 |
|
|
{ |
238 |
|
|
char *end; |
239 |
|
|
char *line; |
240 |
|
|
int has_filter = 0; |
241 |
|
|
|
242 |
|
|
line = str_skip_ws(ln); /* skip leading white space */ |
243 |
|
|
end = strrchr(line, '\n'); /* replace \n with null -- better to remove trailing white space */ |
244 |
|
|
|
245 |
|
|
/* trim white space */ |
246 |
|
|
if (end) |
247 |
|
|
{ |
248 |
|
|
while ( end > line && isspace( (int)*(end-1) ) ) |
249 |
|
|
end--; |
250 |
|
|
|
251 |
|
|
*end = '\0'; |
252 |
|
|
} |
253 |
|
|
|
254 |
|
|
if (strlen(line) == 0) /* blank line indicates body */ |
255 |
|
|
{ |
256 |
|
|
if (!fsize || !real_path) |
257 |
|
|
progerr("External program failed to return required headers Path-Name: & Content-Length:"); |
258 |
|
|
|
259 |
|
|
|
260 |
|
|
/* Create the FileProp entry to describe this "file" */ |
261 |
|
|
|
262 |
|
|
/* This is not great -- really should make creating a fprop more generic */ |
263 |
|
|
/* this was done because file.c assumed that the "file" was on disk */ |
264 |
|
|
/* which has changed over time due to filters, http, and prog */ |
265 |
|
|
|
266 |
|
|
fprop = init_file_properties(sw); |
267 |
|
|
fprop->real_path = real_path; |
268 |
|
|
fprop->work_path = estrdup( real_path ); |
269 |
|
|
fprop->orig_path = estrdup( real_path ); |
270 |
|
|
|
271 |
|
|
/* Set the doc type from the header */ |
272 |
|
|
if ( docType ) |
273 |
|
|
fprop->doctype = docType; |
274 |
|
|
|
275 |
|
|
|
276 |
|
|
/* set real_path, doctype, index_no_content, filter, stordesc */ |
277 |
|
|
init_file_prop_settings(sw, fprop); |
278 |
|
|
|
279 |
|
|
fprop->fp = fp; /* stream to read from */ |
280 |
|
|
fprop->fsize = fsize; /* how much to read */ |
281 |
|
|
fprop->mtime = mtime; |
282 |
|
|
|
283 |
|
|
/* header can force index_no_content */ |
284 |
|
|
if (index_no_content) |
285 |
|
|
fprop->index_no_content++; |
286 |
|
|
|
287 |
|
|
|
288 |
|
|
/* the quick hack to make filters work is for FilterOpen |
289 |
|
|
* to see that fprop->fp is set, read it into a buffer |
290 |
|
|
* write it to a temporary file, then call the filter |
291 |
|
|
* program as noramlly is done. But much smarter to |
292 |
|
|
* simply filter in the prog, after all. Faster, too. |
293 |
|
|
*/ |
294 |
|
|
|
295 |
|
|
if (fprop->hasfilter) |
296 |
|
|
{ |
297 |
|
|
save_to_temp_file( sw , fprop ); |
298 |
|
|
has_filter++; /* save locally, in case it gets reset somewhere else */ |
299 |
|
|
} |
300 |
|
|
|
301 |
|
|
if (sw->verbose >= 3) |
302 |
|
|
printf("%s", real_path); |
303 |
|
|
else if (sw->verbose >= 2) |
304 |
|
|
printf("Processing %s...\n", real_path); |
305 |
|
|
|
306 |
|
|
|
307 |
|
|
do_index_file(sw, fprop); |
308 |
|
|
|
309 |
|
|
if ( has_filter && remove( fprop->work_path ) ) |
310 |
|
|
progwarnno("Error removing temporary file '%s': ", fprop->work_path); |
311 |
|
|
|
312 |
|
|
free_file_properties(fprop); |
313 |
|
|
// efree(real_path); free_file_properties will free the paths |
314 |
|
|
real_path = NULL; |
315 |
|
|
mtime = 0; |
316 |
|
|
fsize = 0; |
317 |
|
|
index_no_content = 0; |
318 |
|
|
|
319 |
|
|
} |
320 |
|
|
|
321 |
|
|
|
322 |
|
|
else /* we are reading headers */ |
323 |
|
|
{ |
324 |
|
|
if (strncasecmp(line, "Content-Length", 14) == 0) |
325 |
|
|
{ |
326 |
|
|
char *x = strchr(line, ':'); |
327 |
|
|
if (!x) |
328 |
|
|
progerr("Failed to parse Content-Length header '%s'", line); |
329 |
|
|
fsize = strtol(++x, NULL, 10); |
330 |
|
|
continue; |
331 |
|
|
} |
332 |
|
|
|
333 |
|
|
if (strncasecmp(line, "Last-Mtime", 10) == 0) |
334 |
|
|
{ |
335 |
|
|
char *x = strchr(line, ':'); |
336 |
|
|
if (!x) |
337 |
|
|
progerr("Failed to parse Last-Mtime header '%s'", line); |
338 |
|
|
mtime = strtol(++x, NULL, 10); |
339 |
|
|
continue; |
340 |
|
|
} |
341 |
|
|
|
342 |
|
|
if (strncasecmp(line, "No-Contents:", 12) == 0) |
343 |
|
|
{ |
344 |
|
|
index_no_content++; |
345 |
|
|
continue; |
346 |
|
|
} |
347 |
|
|
|
348 |
|
|
|
349 |
|
|
if (strncasecmp(line, "Path-Name", 9) == 0) |
350 |
|
|
{ |
351 |
|
|
char *x = strchr(line, ':'); |
352 |
|
|
if (!x) |
353 |
|
|
progerr("Failed to parse Path-Name header '%s'", line); |
354 |
|
|
|
355 |
|
|
x = str_skip_ws(++x); |
356 |
|
|
if (!*x) |
357 |
|
|
progerr("Failed to find path name in Path-Name header '%s'", line); |
358 |
|
|
|
359 |
|
|
real_path = emalloc(strlen(x) + 1); |
360 |
|
|
strcpy(real_path, x); |
361 |
|
|
continue; |
362 |
|
|
} |
363 |
|
|
|
364 |
|
|
if (strncasecmp(line, "Document-Type", 13) == 0) |
365 |
|
|
{ |
366 |
|
|
char *x = strchr(line, ':'); |
367 |
|
|
if (!x) |
368 |
|
|
progerr("Failed to parse Document-Type '%s'", line); |
369 |
|
|
|
370 |
|
|
x = str_skip_ws(++x); |
371 |
|
|
if (!*x) |
372 |
|
|
progerr("Failed to documnet type in Document-Type header '%s'", line); |
373 |
|
|
|
374 |
|
|
if ( !(docType = strtoDocType( x )) ) |
375 |
|
|
progerr("documnet type '%s' not a valid Swish-e document type in Document-Type header '%s'", x, line); |
376 |
|
|
|
377 |
|
|
continue; |
378 |
|
|
} |
379 |
|
|
|
380 |
|
|
progwarn("Unknown header line: '%s' from program %s", line, prog); |
381 |
|
|
|
382 |
|
|
} |
383 |
|
|
} |
384 |
|
|
|
385 |
|
|
efree(ln); |
386 |
|
|
|
387 |
|
|
/* restore the setting */ |
388 |
|
|
sw->truncateDocSize = truncate_doc_size; |
389 |
|
|
|
390 |
|
|
if ( pclose(fp) == -1 ) /* progerr("Failed to properly close external program"); */ |
391 |
|
|
progwarnno("Failed to properly close external program: "); |
392 |
|
|
|
393 |
|
|
} |
394 |
|
|
|
395 |
|
|
|
396 |
|
|
|
397 |
|
|
|
398 |
|
|
|
399 |
|
|
/* Don't use old method of config checking */ |
400 |
|
|
static int extprog_parseconfline(SWISH * sw, StringList *l) |
401 |
|
|
{ |
402 |
|
|
return 0; |
403 |
|
|
} |
404 |
|
|
|
405 |
|
|
|
406 |
|
|
|
407 |
|
|
struct _indexing_data_source_def ExternalProgramDataSource = { |
408 |
|
|
"External-Program", |
409 |
|
|
"prog", |
410 |
|
|
extprog_indexpath, |
411 |
|
|
extprog_parseconfline |
412 |
|
|
}; |