1 |
/* |
2 |
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company |
3 |
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94 |
4 |
** |
5 |
** This program and library is free software; you can redistribute it and/or |
6 |
** modify it under the terms of the GNU (Library) General Public License |
7 |
** as published by the Free Software Foundation; either version 2 |
8 |
** of the License, or any later version. |
9 |
** |
10 |
** This program is distributed in the hope that it will be useful, |
11 |
** but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 |
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 |
** GNU (Library) General Public License for more details. |
14 |
** |
15 |
** You should have received a copy of the GNU (Library) General Public License |
16 |
** long with this program; if not, write to the Free Software |
17 |
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
18 |
**-------------------------------------------------------------------- |
19 |
** |
20 |
** Mar 27, 2001 - created moseley |
21 |
** |
22 |
*/ |
23 |
|
24 |
#ifndef _WIN32 |
25 |
#include <unistd.h> |
26 |
#endif |
27 |
|
28 |
#include "swish.h" |
29 |
#include "mem.h" |
30 |
#include "string.h" |
31 |
#include "index.h" |
32 |
#include "file.h" |
33 |
#include "error.h" |
34 |
#include "parse_conffile.h" |
35 |
|
36 |
struct MOD_Prog |
37 |
{ |
38 |
/* prog system specific configuration parameters */ |
39 |
struct swline *progparameterslist; |
40 |
}; |
41 |
|
42 |
|
43 |
/* |
44 |
-- init structures for this module |
45 |
*/ |
46 |
|
47 |
void initModule_Prog (SWISH *sw) |
48 |
{ |
49 |
struct MOD_Prog *self; |
50 |
|
51 |
self = (struct MOD_Prog *) emalloc(sizeof(struct MOD_Prog)); |
52 |
sw->Prog = self; |
53 |
|
54 |
/* initialize buffers used by indexstring */ |
55 |
self->progparameterslist = (struct swline *) NULL; |
56 |
|
57 |
return; |
58 |
} |
59 |
|
60 |
void freeModule_Prog (SWISH *sw) |
61 |
{ |
62 |
struct MOD_Prog *self = sw->Prog; |
63 |
|
64 |
|
65 |
if ( self->progparameterslist ) |
66 |
efree( self->progparameterslist ); |
67 |
|
68 |
efree ( self ); |
69 |
sw->Prog = NULL; |
70 |
|
71 |
return; |
72 |
} |
73 |
|
74 |
int configModule_Prog (SWISH *sw, StringList *sl) |
75 |
|
76 |
{ |
77 |
struct MOD_Prog *self = sw->Prog; |
78 |
char *w0 = sl->word[0]; |
79 |
|
80 |
if (strcasecmp(w0, "SwishProgParameters") == 0) |
81 |
{ |
82 |
if (sl->n > 1) |
83 |
{ |
84 |
grabCmdOptions(sl, 1, &self->progparameterslist); |
85 |
} |
86 |
else |
87 |
progerr("%s: requires at least one value", w0); |
88 |
} |
89 |
|
90 |
else |
91 |
{ |
92 |
return 0; /* not a module directive */ |
93 |
} |
94 |
|
95 |
return 1; |
96 |
} |
97 |
|
98 |
|
99 |
|
100 |
static FILE *open_external_program(SWISH * sw, char *prog) |
101 |
{ |
102 |
char *cmd; |
103 |
FILE *fp; |
104 |
size_t total_len; |
105 |
struct stat stbuf; |
106 |
struct swline *progparameterslist = sw->Prog->progparameterslist; |
107 |
|
108 |
if ( ! strcmp( prog, "stdin") ) |
109 |
return stdin; |
110 |
|
111 |
|
112 |
/* get total length of configuration parameters */ |
113 |
|
114 |
total_len = strlen(prog); |
115 |
|
116 |
while (progparameterslist) |
117 |
{ |
118 |
total_len += strlen(progparameterslist->line) + 1; /* separate by spaces */ |
119 |
progparameterslist = progparameterslist->next; |
120 |
} |
121 |
|
122 |
cmd = emalloc(total_len + 20); |
123 |
strcpy(cmd, prog); |
124 |
|
125 |
normalize_path( cmd ); /* for stat calls */ |
126 |
|
127 |
|
128 |
/* this should probably be in file.c so filters.c can check, too */ |
129 |
/* note this won't catch errors in a shebang line, of course */ |
130 |
|
131 |
if (stat(cmd, &stbuf)) |
132 |
progerrno("External program '%s': ", cmd); |
133 |
|
134 |
if ( stbuf.st_mode & S_IFDIR) |
135 |
progerr("External program '%s' is a directory.", cmd); |
136 |
|
137 |
#ifndef _WIN32 |
138 |
|
139 |
if ( access( cmd, R_OK|X_OK ) ) |
140 |
progerrno("Cannot execute '%s': ", cmd); |
141 |
|
142 |
#endif |
143 |
|
144 |
#ifdef _WIN32 |
145 |
|
146 |
make_windows_path( cmd ); |
147 |
|
148 |
#endif |
149 |
|
150 |
|
151 |
|
152 |
|
153 |
progparameterslist = sw->Prog->progparameterslist; |
154 |
while (progparameterslist) |
155 |
{ |
156 |
strcat(cmd, " "); |
157 |
strcat(cmd, progparameterslist->line); |
158 |
progparameterslist = progparameterslist->next; |
159 |
} |
160 |
|
161 |
|
162 |
fp = popen(cmd, F_READ_TEXT); |
163 |
|
164 |
if (!fp) |
165 |
progerrno("Failed to spawn external program '%s': ", cmd); |
166 |
|
167 |
efree(cmd); |
168 |
return fp; |
169 |
} |
170 |
|
171 |
/* To make filters work with prog, need to write the file out to a temp file */ |
172 |
/* It will be faster to do the filtering from within the "prog" program */ |
173 |
/* This may not be safe if running as a threaded app, and I'm not clear on how portable this is */ |
174 |
/* This also uses read_stream to read in the file -- so the entire file is read into memory instead of chunked to the temp file */ |
175 |
|
176 |
static void save_to_temp_file(SWISH *sw, FileProp *fprop) |
177 |
{ |
178 |
FILE *out; |
179 |
char *rd_buffer = NULL; /* complete file read into buffer */ |
180 |
size_t bytes; |
181 |
|
182 |
|
183 |
/* slirp entire file into memory -- yuck */ |
184 |
rd_buffer = read_stream(sw, fprop->real_path, fprop->fp, fprop->fsize, 0); |
185 |
|
186 |
|
187 |
/* Save content to a temporary file */ |
188 |
efree( fprop->work_path ); |
189 |
out = create_tempfile(sw, F_WRITE_TEXT, "fltr", &fprop->work_path, 0 ); |
190 |
|
191 |
bytes = fwrite( rd_buffer, 1, fprop->fsize, out ); |
192 |
|
193 |
if ( bytes != (size_t)fprop->fsize ) |
194 |
progerrno("Failed to write temporary filter file '%s': ", fprop->work_path); |
195 |
|
196 |
|
197 |
/* hide the fact that it's an external program */ |
198 |
fprop->fp = (FILE *) NULL; |
199 |
|
200 |
|
201 |
//***JMRUIZ efree(rd_buffer); |
202 |
fclose( out ); |
203 |
|
204 |
} |
205 |
|
206 |
|
207 |
|
208 |
static void extprog_indexpath(SWISH * sw, char *prog) |
209 |
{ |
210 |
FileProp *fprop; |
211 |
FILE *fp; |
212 |
char *ln; |
213 |
char *real_path; |
214 |
long fsize; |
215 |
time_t mtime; |
216 |
int index_no_content; |
217 |
long truncate_doc_size; |
218 |
int docType = 0; |
219 |
|
220 |
mtime = 0; |
221 |
fsize = 0; |
222 |
index_no_content = 0; |
223 |
real_path = NULL; |
224 |
|
225 |
fp = open_external_program(sw, prog); |
226 |
|
227 |
ln = emalloc(MAXSTRLEN + 1); |
228 |
|
229 |
truncate_doc_size = sw->truncateDocSize; |
230 |
sw->truncateDocSize = 0; /* can't truncate -- prog should make sure doc is not too large */ |
231 |
// $$$ This is no longer true with libxml push parser |
232 |
|
233 |
// $$$ next time, break out the header parsing in its own function, please |
234 |
|
235 |
/* loop on headers */ |
236 |
while (fgets(ln, MAXSTRLEN, fp) != NULL) |
237 |
{ |
238 |
char *end; |
239 |
char *line; |
240 |
int has_filter = 0; |
241 |
|
242 |
line = str_skip_ws(ln); /* skip leading white space */ |
243 |
end = strrchr(line, '\n'); /* replace \n with null -- better to remove trailing white space */ |
244 |
|
245 |
/* trim white space */ |
246 |
if (end) |
247 |
{ |
248 |
while ( end > line && isspace( (int)*(end-1) ) ) |
249 |
end--; |
250 |
|
251 |
*end = '\0'; |
252 |
} |
253 |
|
254 |
if (strlen(line) == 0) /* blank line indicates body */ |
255 |
{ |
256 |
if (!fsize || !real_path) |
257 |
progerr("External program failed to return required headers Path-Name: & Content-Length:"); |
258 |
|
259 |
|
260 |
/* Create the FileProp entry to describe this "file" */ |
261 |
|
262 |
/* This is not great -- really should make creating a fprop more generic */ |
263 |
/* this was done because file.c assumed that the "file" was on disk */ |
264 |
/* which has changed over time due to filters, http, and prog */ |
265 |
|
266 |
fprop = init_file_properties(sw); |
267 |
fprop->real_path = real_path; |
268 |
fprop->work_path = estrdup( real_path ); |
269 |
fprop->orig_path = estrdup( real_path ); |
270 |
|
271 |
/* Set the doc type from the header */ |
272 |
if ( docType ) |
273 |
fprop->doctype = docType; |
274 |
|
275 |
|
276 |
/* set real_path, doctype, index_no_content, filter, stordesc */ |
277 |
init_file_prop_settings(sw, fprop); |
278 |
|
279 |
fprop->fp = fp; /* stream to read from */ |
280 |
fprop->fsize = fsize; /* how much to read */ |
281 |
fprop->mtime = mtime; |
282 |
|
283 |
/* header can force index_no_content */ |
284 |
if (index_no_content) |
285 |
fprop->index_no_content++; |
286 |
|
287 |
|
288 |
/* the quick hack to make filters work is for FilterOpen |
289 |
* to see that fprop->fp is set, read it into a buffer |
290 |
* write it to a temporary file, then call the filter |
291 |
* program as noramlly is done. But much smarter to |
292 |
* simply filter in the prog, after all. Faster, too. |
293 |
*/ |
294 |
|
295 |
if (fprop->hasfilter) |
296 |
{ |
297 |
save_to_temp_file( sw , fprop ); |
298 |
has_filter++; /* save locally, in case it gets reset somewhere else */ |
299 |
} |
300 |
|
301 |
if (sw->verbose >= 3) |
302 |
printf("%s", real_path); |
303 |
else if (sw->verbose >= 2) |
304 |
printf("Processing %s...\n", real_path); |
305 |
|
306 |
|
307 |
do_index_file(sw, fprop); |
308 |
|
309 |
if ( has_filter && remove( fprop->work_path ) ) |
310 |
progwarnno("Error removing temporary file '%s': ", fprop->work_path); |
311 |
|
312 |
free_file_properties(fprop); |
313 |
// efree(real_path); free_file_properties will free the paths |
314 |
real_path = NULL; |
315 |
mtime = 0; |
316 |
fsize = 0; |
317 |
index_no_content = 0; |
318 |
|
319 |
} |
320 |
|
321 |
|
322 |
else /* we are reading headers */ |
323 |
{ |
324 |
if (strncasecmp(line, "Content-Length", 14) == 0) |
325 |
{ |
326 |
char *x = strchr(line, ':'); |
327 |
if (!x) |
328 |
progerr("Failed to parse Content-Length header '%s'", line); |
329 |
fsize = strtol(++x, NULL, 10); |
330 |
continue; |
331 |
} |
332 |
|
333 |
if (strncasecmp(line, "Last-Mtime", 10) == 0) |
334 |
{ |
335 |
char *x = strchr(line, ':'); |
336 |
if (!x) |
337 |
progerr("Failed to parse Last-Mtime header '%s'", line); |
338 |
mtime = strtol(++x, NULL, 10); |
339 |
continue; |
340 |
} |
341 |
|
342 |
if (strncasecmp(line, "No-Contents:", 12) == 0) |
343 |
{ |
344 |
index_no_content++; |
345 |
continue; |
346 |
} |
347 |
|
348 |
|
349 |
if (strncasecmp(line, "Path-Name", 9) == 0) |
350 |
{ |
351 |
char *x = strchr(line, ':'); |
352 |
if (!x) |
353 |
progerr("Failed to parse Path-Name header '%s'", line); |
354 |
|
355 |
x = str_skip_ws(++x); |
356 |
if (!*x) |
357 |
progerr("Failed to find path name in Path-Name header '%s'", line); |
358 |
|
359 |
real_path = emalloc(strlen(x) + 1); |
360 |
strcpy(real_path, x); |
361 |
continue; |
362 |
} |
363 |
|
364 |
if (strncasecmp(line, "Document-Type", 13) == 0) |
365 |
{ |
366 |
char *x = strchr(line, ':'); |
367 |
if (!x) |
368 |
progerr("Failed to parse Document-Type '%s'", line); |
369 |
|
370 |
x = str_skip_ws(++x); |
371 |
if (!*x) |
372 |
progerr("Failed to documnet type in Document-Type header '%s'", line); |
373 |
|
374 |
if ( !(docType = strtoDocType( x )) ) |
375 |
progerr("documnet type '%s' not a valid Swish-e document type in Document-Type header '%s'", x, line); |
376 |
|
377 |
continue; |
378 |
} |
379 |
|
380 |
progwarn("Unknown header line: '%s' from program %s", line, prog); |
381 |
|
382 |
} |
383 |
} |
384 |
|
385 |
efree(ln); |
386 |
|
387 |
/* restore the setting */ |
388 |
sw->truncateDocSize = truncate_doc_size; |
389 |
|
390 |
if ( pclose(fp) == -1 ) /* progerr("Failed to properly close external program"); */ |
391 |
progwarnno("Failed to properly close external program: "); |
392 |
|
393 |
} |
394 |
|
395 |
|
396 |
|
397 |
|
398 |
|
399 |
/* Don't use old method of config checking */ |
400 |
static int extprog_parseconfline(SWISH * sw, StringList *l) |
401 |
{ |
402 |
return 0; |
403 |
} |
404 |
|
405 |
|
406 |
|
407 |
struct _indexing_data_source_def ExternalProgramDataSource = { |
408 |
"External-Program", |
409 |
"prog", |
410 |
extprog_indexpath, |
411 |
extprog_parseconfline |
412 |
}; |