/* $OpenBSD: filter.c,v 1.10 2024/11/09 18:03:44 op Exp $ */ /* filter - postprocessing of flex output through filters */ /* This file is part of flex. */ /* Redistribution and use in source and binary forms, with or without */ /* modification, are permitted provided that the following conditions */ /* are met: */ /* 1. Redistributions of source code must retain the above copyright */ /* notice, this list of conditions and the following disclaimer. */ /* 2. Redistributions in binary form must reproduce the above copyright */ /* notice, this list of conditions and the following disclaimer in the */ /* documentation and/or other materials provided with the distribution. */ /* Neither the name of the University nor the names of its contributors */ /* may be used to endorse or promote products derived from this software */ /* without specific prior written permission. */ /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ /* PURPOSE. */ #include "flexdef.h" static const char *check_4_gnu_m4 = "m4_dnl ifdef(`__gnu__', ," "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)" " m4exit(2)')\n"; /** global chain. */ struct filter *output_chain = NULL; /* Allocate and initialize an external filter. * @param chain the current chain or NULL for new chain * @param cmd the command to execute. * @param ... a NULL terminated list of (const char*) arguments to command, * not including argv[0]. * @return newest filter in chain */ struct filter * filter_create_ext(struct filter * chain, const char *cmd, ...) { struct filter *f; int max_args; const char *s; va_list ap; /* allocate and initialize new filter */ f = calloc(sizeof(struct filter), 1); if (!f) flexerror(_("calloc failed (f) in filter_create_ext")); f->filter_func = NULL; f->extra = NULL; f->next = NULL; f->argc = 0; if (chain != NULL) { /* append f to end of chain */ while (chain->next) chain = chain->next; chain->next = f; } /* allocate argv, and populate it with the argument list. */ max_args = 8; f->argv = malloc(sizeof(char *) * (max_args + 1)); if (!f->argv) flexerror(_("malloc failed (f->argv) in filter_create_ext")); f->argv[f->argc++] = cmd; va_start(ap, cmd); while ((s = va_arg(ap, const char *)) != NULL) { if (f->argc >= max_args) { max_args += 8; f->argv = realloc(f->argv, sizeof(char *) * (max_args + 1)); } f->argv[f->argc++] = s; } f->argv[f->argc] = NULL; va_end(ap); return f; } /* Allocate and initialize an internal filter. * @param chain the current chain or NULL for new chain * @param filter_func The function that will perform the filtering. * filter_func should return 0 if successful, and -1 * if an error occurs -- or it can simply exit(). * @param extra optional user-defined data to pass to the filter. * @return newest filter in chain */ struct filter * filter_create_int(struct filter * chain, int (*filter_func) (struct filter *), void *extra) { struct filter *f; /* allocate and initialize new filter */ f = calloc(sizeof(struct filter), 1); if (!f) flexerror(_("calloc failed in filter_create_int")); f->next = NULL; f->argc = 0; f->argv = NULL; f->filter_func = filter_func; f->extra = extra; if (chain != NULL) { /* append f to end of chain */ while (chain->next) chain = chain->next; chain->next = f; } return f; } /** Fork and exec entire filter chain. * @param chain The head of the chain. * @return true on success. */ bool filter_apply_chain(struct filter * chain) { int pid, pipes[2]; /* * Tricky recursion, since we want to begin the chain at the END. * Why? Because we need all the forked processes to be children of * the main flex process. */ if (chain) filter_apply_chain(chain->next); else return true; /* * Now we are the right-most unprocessed link in the chain. */ fflush(stdout); fflush(stderr); if (pipe(pipes) == -1) flexerror(_("pipe failed")); if ((pid = fork()) == -1) flexerror(_("fork failed")); if (pid == 0) { /* child */ /* * We need stdin (the FILE* stdin) to connect to this new * pipe. There is no portable way to set stdin to a new file * descriptor, as stdin is not an lvalue on some systems * (BSD). So we dup the new pipe onto the stdin descriptor * and use a no-op fseek to sync the stream. This is a Hail * Mary situation. It seems to work. */ close(pipes[1]); clearerr(stdin); if (dup2(pipes[0], fileno(stdin)) == -1) flexfatal(_("dup2(pipes[0],0)")); close(pipes[0]); fseek(stdin, 0, SEEK_CUR); /* run as a filter, either internally or by exec */ if (chain->filter_func) { if (chain->filter_func(chain) == -1) flexfatal(_("filter_func failed")); exit(0); } else { execvp(chain->argv[0], (char **const) (chain->argv)); lerrsf_fatal(_("exec of %s failed"), chain->argv[0]); } exit(1); } /* Parent */ close(pipes[0]); if (dup2(pipes[1], fileno(stdout)) == -1) flexfatal(_("dup2(pipes[1],1)")); close(pipes[1]); fseek(stdout, 0, SEEK_CUR); return true; } /** Truncate the chain to max_len number of filters. * @param chain the current chain. * @param max_len the maximum length of the chain. * @return the resulting length of the chain. */ int filter_truncate(struct filter * chain, int max_len) { int len = 1; if (!chain) return 0; while (chain->next && len < max_len) { chain = chain->next; ++len; } chain->next = NULL; return len; } /** Splits the chain in order to write to a header file. * Similar in spirit to the 'tee' program. * The header file name is in extra. * @return 0 (zero) on success, and -1 on failure. */ int filter_tee_header(struct filter * chain) { /* * This function reads from stdin and writes to both the C file and * the header file at the same time. */ const int readsz = 512; char *buf; int to_cfd = -1; FILE *to_c = NULL, *to_h = NULL; bool write_header; write_header = (chain->extra != NULL); /* * Store a copy of the stdout pipe, which is already piped to C file * through the running chain. Then create a new pipe to the H file as * stdout, and fork the rest of the chain again. */ if ((to_cfd = dup(1)) == -1) flexfatal(_("dup(1) failed")); to_c = fdopen(to_cfd, "w"); if (write_header) { if (freopen((char *) chain->extra, "w", stdout) == NULL) flexfatal(_("freopen(headerfilename) failed")); filter_apply_chain(chain->next); to_h = stdout; } /* * Now to_c is a pipe to the C branch, and to_h is a pipe to the H * branch. */ if (write_header) { fputs(check_4_gnu_m4, to_h); fputs("m4_changecom`'m4_dnl\n", to_h); fputs("m4_changequote`'m4_dnl\n", to_h); fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_h); fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h); fputs("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n", to_h); fprintf(to_h, "#ifndef %sHEADER_H\n", prefix); fprintf(to_h, "#define %sHEADER_H 1\n", prefix); fprintf(to_h, "#define %sIN_HEADER 1\n\n", prefix); fprintf(to_h, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", headerfilename ? headerfilename : ""); } fputs(check_4_gnu_m4, to_c); fputs("m4_changecom`'m4_dnl\n", to_c); fputs("m4_changequote`'m4_dnl\n", to_c); fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_c); fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c); fprintf(to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", outfilename ? outfilename : ""); buf = malloc(readsz); if (!buf) flexerror(_("malloc failed in filter_tee_header")); while (fgets(buf, readsz, stdin)) { fputs(buf, to_c); if (write_header) fputs(buf, to_h); } if (write_header) { fprintf(to_h, "\n"); /* * write a fake line number. It will get fixed by the linedir * filter. */ fprintf(to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n"); fprintf(to_h, "#undef %sIN_HEADER\n", prefix); fprintf(to_h, "#endif /* %sHEADER_H */\n", prefix); fputs("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h); fflush(to_h); if (ferror(to_h)) lerrsf(_("error writing output file %s"), (char *) chain->extra); else if (fclose(to_h)) lerrsf(_("error closing output file %s"), (char *) chain->extra); } fflush(to_c); if (ferror(to_c)) lerrsf(_("error writing output file %s"), outfilename ? outfilename : ""); else if (fclose(to_c)) lerrsf(_("error closing output file %s"), outfilename ? outfilename : ""); while (wait(0) > 0); exit(0); return 0; } /** Adjust the line numbers in the #line directives of the generated scanner. * After the m4 expansion, the line numbers are incorrect since the m4 macros * can add or remove lines. This only adjusts line numbers for generated code, * not user code. This also happens to be a good place to squeeze multiple * blank lines into a single blank line. */ int filter_fix_linedirs(struct filter * chain) { char *buf; const int readsz = 512; int lineno = 1; bool in_gen = true; /* in generated code */ bool last_was_blank = false; if (!chain) return 0; buf = malloc(readsz); if (!buf) flexerror(_("malloc failed in filter_fix_linedirs")); while (fgets(buf, readsz, stdin)) { regmatch_t m[10]; /* Check for #line directive. */ if (buf[0] == '#' && regexec(®ex_linedir, buf, 3, m, 0) == 0) { char *fname; /* extract the line number and filename */ regmatch_strtol(&m[1], buf, NULL, 0); fname = regmatch_dup(&m[2], buf); if (strcmp(fname, outfilename ? outfilename : "") == 0 || strcmp(fname, headerfilename ? headerfilename : "") == 0) { char *s1, *s2; char filename[MAXLINE]; s1 = fname; s2 = filename; while ((s2 - filename) < (MAXLINE - 1) && *s1) { /* Escape the backslash */ if (*s1 == '\\') *s2++ = '\\'; /* Escape the double quote */ if (*s1 == '\"') *s2++ = '\\'; /* Copy the character as usual */ *s2++ = *s1++; } *s2 = '\0'; /* Adjust the line directives. */ in_gen = true; snprintf(buf, readsz, "#line %d \"%s\"\n", lineno + 1, filename); } else { /* * it's a #line directive for code we didn't * write */ in_gen = false; } free(fname); last_was_blank = false; } /* squeeze blank lines from generated code */ else if (in_gen && regexec(®ex_blank_line, buf, 0, NULL, 0) == 0) { if (last_was_blank) continue; else last_was_blank = true; } else { /* it's a line of normal, non-empty code. */ last_was_blank = false; } fputs(buf, stdout); lineno++; } fflush(stdout); if (ferror(stdout)) lerrsf(_("error writing output file %s"), outfilename ? outfilename : ""); else if (fclose(stdout)) lerrsf(_("error closing output file %s"), outfilename ? outfilename : ""); return 0; }