/* xgettext sh backend. Copyright (C) 2003, 2005-2009 Free Software Foundation, Inc. Written by Bruno Haible , 2003. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H # include "config.h" #endif /* Specification. */ #include "x-sh.h" #include #include #include #include #include #include #include "message.h" #include "xgettext.h" #include "error.h" #include "xalloc.h" #include "hash.h" #include "gettext.h" #define _(s) gettext(s) #define SIZEOF(a) (sizeof(a) / sizeof(a[0])) /* The sh syntax is defined in POSIX:2001, see http://www.opengroup.org/onlinepubs/007904975/utilities/xcu_chap02.html Summary of sh syntax: - Input is broken into words, which are then subject to - tilde expansion ~... - command substitution `...` - variable substitution $var - arithmetic substitution $((...)) - field splitting at whitespace (IFS) - wildcard pattern expansion *? - quote removal - Strings are enclosed in "..."; command substitution, variable substitution and arithmetic substitution are performed here as well. - '...' is a string without substitutions. - The list of resulting words is split into commands by semicolon and newline. - '#' at the beginning of a word introduces a comment until end of line. The parser is implemented in bash-2.05b/parse.y. */ /* ====================== Keyword set customization. ====================== */ /* If true extract all strings. */ static bool extract_all = false; static hash_table keywords; static bool default_keywords = true; void x_sh_extract_all () { extract_all = true; } void x_sh_keyword (const char *name) { if (name == NULL) default_keywords = false; else { const char *end; struct callshape shape; const char *colon; if (keywords.table == NULL) hash_init (&keywords, 100); split_keywordspec (name, &end, &shape); /* The characters between name and end should form a valid C identifier. A colon means an invalid parse in split_keywordspec(). */ colon = strchr (name, ':'); if (colon == NULL || colon >= end) insert_keyword_callshape (&keywords, name, end - name, &shape); } } /* Finish initializing the keywords hash table. Called after argument processing, before each file is processed. */ static void init_keywords () { if (default_keywords) { /* When adding new keywords here, also update the documentation in xgettext.texi! */ x_sh_keyword ("gettext"); x_sh_keyword ("ngettext:1,2"); x_sh_keyword ("eval_gettext"); x_sh_keyword ("eval_ngettext:1,2"); default_keywords = false; } } void init_flag_table_sh () { xgettext_record_flag ("gettext:1:pass-sh-format"); xgettext_record_flag ("ngettext:1:pass-sh-format"); xgettext_record_flag ("ngettext:2:pass-sh-format"); xgettext_record_flag ("eval_gettext:1:sh-format"); xgettext_record_flag ("eval_ngettext:1:sh-format"); xgettext_record_flag ("eval_ngettext:2:sh-format"); } /* ======================== Reading of characters. ======================== */ /* Real filename, used in error messages about the input file. */ static const char *real_file_name; /* Logical filename and line number, used to label the extracted messages. */ static char *logical_file_name; static int line_number; /* The input file stream. */ static FILE *fp; /* Fetch the next character from the input file. */ static int do_getc () { int c = getc (fp); if (c == EOF) { if (ferror (fp)) error (EXIT_FAILURE, errno, _("\ error while reading \"%s\""), real_file_name); } else if (c == '\n') line_number++; return c; } /* Put back the last fetched character, not EOF. */ static void do_ungetc (int c) { if (c == '\n') line_number--; ungetc (c, fp); } /* Remove backslash followed by newline from the input stream. */ static int phase1_pushback[1]; static int phase1_pushback_length; static int phase1_getc () { int c; if (phase1_pushback_length) { c = phase1_pushback[--phase1_pushback_length]; if (c == '\n') ++line_number; return c; } for (;;) { c = do_getc (); if (c != '\\') return c; c = do_getc (); if (c != '\n') { if (c != EOF) do_ungetc (c); return '\\'; } } } /* Supports only one pushback character. */ static void phase1_ungetc (int c) { switch (c) { case EOF: break; case '\n': --line_number; /* FALLTHROUGH */ default: if (phase1_pushback_length == SIZEOF (phase1_pushback)) abort (); phase1_pushback[phase1_pushback_length++] = c; break; } } /* ========================== Reading of tokens. ========================== */ /* A token consists of a sequence of characters. */ struct token { int allocated; /* number of allocated 'token_char's */ int charcount; /* number of used 'token_char's */ char *chars; /* the token's constituents */ }; /* Initialize a 'struct token'. */ static inline void init_token (struct token *tp) { tp->allocated = 10; tp->chars = XNMALLOC (tp->allocated, char); tp->charcount = 0; } /* Free the memory pointed to by a 'struct token'. */ static inline void free_token (struct token *tp) { free (tp->chars); } /* Ensure there is enough room in the token for one more character. */ static inline void grow_token (struct token *tp) { if (tp->charcount == tp->allocated) { tp->allocated *= 2; tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); } } /* Convert a struct token * to a char*. */ static char * string_of_token (const struct token *tp) { char *str; int n; n = tp->charcount; str = XNMALLOC (n + 1, char); memcpy (str, tp->chars, n); str[n] = '\0'; return str; } /* ========================= Accumulating messages ========================= */ static message_list_ty *mlp; /* ========================= Accumulating comments ========================= */ static char *buffer; static size_t bufmax; static size_t buflen; static inline void comment_start () { buflen = 0; } static inline void comment_add (int c) { if (buflen >= bufmax) { bufmax = 2 * bufmax + 10; buffer = xrealloc (buffer, bufmax); } buffer[buflen++] = c; } static inline void comment_line_end () { while (buflen >= 1 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) --buflen; if (buflen >= bufmax) { bufmax = 2 * bufmax + 10; buffer = xrealloc (buffer, bufmax); } buffer[buflen] = '\0'; savable_comment_add (buffer); } /* These are for tracking whether comments count as immediately before keyword. */ static int last_comment_line; static int last_non_comment_line; /* ========================= Debackslashification ========================== */ /* This state tracks the effect of backquotes, double-quotes and single-quotes on the parsing of backslashes. We make a single pass through the input file, keeping the state up to date. This is much faster than accumulating strings and processing them with explicit debackslashification, like the shell does it. */ /* The number of nested `...` or "`...`" constructs. Assumed to be <= 32. */ static unsigned int nested_backquotes; /* A bit mask indicating which of the currently open `...` or "`...`" constructs is with double-quotes: "`...`". A bit value of 1 stands for "`...`", a bit value of 0 stands for `...`. Bit position 0 designates the outermost backquotes nesting, bit position 1 the second-outermost backquotes nesting, ... bit position (nested_backquotes-1) the innermost backquotes nesting. */ static unsigned int open_doublequotes_mask; /* A bit indicating whether a double-quote is currently open inside the innermost backquotes nesting. */ static bool open_doublequote; /* A bit indicating whether a single-quote is currently open inside the innermost backquotes nesting. */ static bool open_singlequote; /* The expected terminator of the currently open single-quote. Usually '\'', but can be '"' for i18n-quotes. */ static char open_singlequote_terminator; /* Functions to update the state. */ static inline void saw_opening_backquote () { if (open_singlequote) abort (); if (open_doublequote) open_doublequotes_mask |= (unsigned int) 1 << nested_backquotes; nested_backquotes++; open_doublequote = false; } static inline void saw_closing_backquote () { nested_backquotes--; open_doublequote = (open_doublequotes_mask >> nested_backquotes) & 1; open_doublequotes_mask &= ((unsigned int) 1 << nested_backquotes) - 1; open_singlequote = false; /* just for safety */ } static inline void saw_opening_doublequote () { if (open_singlequote || open_doublequote) abort (); open_doublequote = true; } static inline void saw_closing_doublequote () { if (open_singlequote || !open_doublequote) abort (); open_doublequote = false; } static inline void saw_opening_singlequote () { if (open_doublequote || open_singlequote) abort (); open_singlequote = true; open_singlequote_terminator = '\''; } static inline void saw_closing_singlequote () { if (open_doublequote || !open_singlequote) abort (); open_singlequote = false; } /* ========================== Reading of commands ========================== */ /* We are only interested in constant strings. Other words need not to be represented precisely. */ enum word_type { t_string, /* constant string */ t_other, /* other string */ t_separator, /* command separator: semicolon or newline */ t_redirect, /* redirection: one of < > >| << <<- >> <> <& >& */ t_backquote, /* closing '`' pseudo word */ t_paren, /* closing ')' pseudo word */ t_eof /* EOF marker */ }; struct word { enum word_type type; struct token *token; /* for t_string */ int line_number_at_start; /* for t_string */ }; /* Free the memory pointed to by a 'struct word'. */ static inline void free_word (struct word *wp) { if (wp->type == t_string) { free_token (wp->token); free (wp->token); } } /* Convert a t_string token to a char*. */ static char * string_of_word (const struct word *wp) { char *str; int n; if (!(wp->type == t_string)) abort (); n = wp->token->charcount; str = XNMALLOC (n + 1, char); memcpy (str, wp->token->chars, n); str[n] = '\0'; return str; } /* Whitespace recognition. */ static inline bool is_whitespace (int c) { return (c == ' ' || c == '\t' || c == '\n'); } /* Operator character recognition. */ static inline bool is_operator_start (int c) { return (c == '|' || c == '&' || c == ';' || c == '<' || c == '>' || c == '(' || c == ')'); } /* Denotation of a quoted character. The distinction between quoted and unquoted character is important only for the special, whitespace and operator characters; it is irrelevant for alphanumeric characters, '\\' and many others. */ #define QUOTED(c) (UCHAR_MAX + 1 + (c)) /* Values in the 'unsigned char' range are implicitly unquoted. Among these, the following are important: '"' opening or closing double quote '\'' opening or closing single quote '$' the unknown result of a dollar expansion '`' does not occur - replaced with OPENING_BACKQUOTE or CLOSING_BACKQUOTE */ #define OPENING_BACKQUOTE (2 * (UCHAR_MAX + 1) + '`') #define CLOSING_BACKQUOTE (3 * (UCHAR_MAX + 1) + '`') /* 2 characters of pushback are supported. 2 characters of pushback occur only when the first is an 'x'; in all other cases only one character of pushback is needed. */ static int phase2_pushback[2]; static int phase2_pushback_length; /* Return the next character, with backslashes removed. The result is QUOTED(c) for some unsigned char c, if the next character is escaped sufficiently often to make it a regular constituent character, or simply an 'unsigned char' if it has its special meaning (of special, whitespace or operator charcter), or OPENING_BACKQUOTE, CLOSING_BACKQUOTE, EOF. It's the caller's responsibility to update the state. */ static int phase2_getc () { int c; if (phase2_pushback_length) { c = phase2_pushback[--phase2_pushback_length]; if (c == '\n') ++line_number; return c; } c = phase1_getc (); if (c == EOF) return c; if (c == '\'') return ((open_doublequote || (open_singlequote && open_singlequote_terminator != c)) ? QUOTED (c) : c); if (open_singlequote) { if (c == open_singlequote_terminator) return c; } else { if (c == '"' || c == '$') return c; if (c == '`') return (nested_backquotes > 0 ? CLOSING_BACKQUOTE : OPENING_BACKQUOTE); } if (c == '\\') { /* Number of debackslahificication passes that are active at the current point. */ unsigned int debackslahify = nested_backquotes + (open_singlequote ? 0 : 1); /* Normal number of backslashes that yield a single backslash in the final output. */ unsigned int expected_count = (unsigned int) 1 << debackslahify; /* Number of backslashes found. */ unsigned int count; for (count = 1; count < expected_count; count++) { c = phase1_getc (); if (c != '\\') break; } if (count == expected_count) return '\\'; /* The count of backslashes is > 0 and < expected_count, therefore the result depends on c, the first character after the backslashes. Note: The formulas below don't necessarily have a logic; they were empirically determined such that 1. the xgettext-30 test succeeds, 2. the behaviour for count == 0 would correspond to the one without any baskslash. */ if (c == '\'') { if (!open_singlequote && count > (expected_count >> 1)) { phase1_ungetc (c); return '\\'; } else return ((open_doublequote || (open_singlequote && open_singlequote_terminator != c)) ? QUOTED (c) : c); } else if (c == '"') { /* Each debackslahificication pass converts \\ to \ and \" to "; passes corresponding to `...` drop a lone " whereas passes corresponding to "`...`" leave it alone. Therefore, the minimum number of backslashes needed to get one double-quote in the end is open_doublequotes_mask + 1. */ if (open_singlequote) { if (count > open_doublequotes_mask) { phase1_ungetc (c); return '\\'; } else return (open_singlequote_terminator != c ? QUOTED (c) : c); } else { if (count > open_doublequotes_mask) return QUOTED (c); else /* Some of the count values <= open_doublequotes_mask are actually invalid here, but we assume a syntactically correct input file anyway. */ return c; } } else if (c == '`') { /* FIXME: This code looks fishy. */ if (count == expected_count - 1) return c; else /* Some of the count values < expected_count - 1 are actually invalid here, but we assume a syntactically correct input file anyway. */ if (nested_backquotes > 0 && !open_singlequote && count >= (expected_count >> 2)) return OPENING_BACKQUOTE; else return CLOSING_BACKQUOTE; } else if (c == '$') { if (open_singlequote) return QUOTED (c); if (count >= (expected_count >> 1)) return QUOTED (c); else return c; } else { /* When not followed by a quoting character or backslash or dollar, a backslash survives a debackslahificication pass unmodified. Therefore each debackslahificication pass performs a count := (count + 1) >> 1 operation. Therefore the minimum number of backslashes needed to get one backslash in the end is (expected_count >> 1) + 1. */ if (open_doublequote || open_singlequote) { if (count > 0) { phase1_ungetc (c); return '\\'; } else return QUOTED (c); } else { if (count > (expected_count >> 1)) { phase1_ungetc (c); return '\\'; } else if (count > 0) return QUOTED (c); else return c; } } } return (open_singlequote || open_doublequote ? QUOTED (c) : c); } /* Supports 2 characters of pushback. */ static void phase2_ungetc (int c) { switch (c) { case EOF: break; case '\n': --line_number; /* FALLTHROUGH */ default: if (phase2_pushback_length == SIZEOF (phase2_pushback)) abort (); phase2_pushback[phase2_pushback_length++] = c; break; } } /* Context lookup table. */ static flag_context_list_table_ty *flag_context_list_table; /* Forward declaration of local functions. */ static enum word_type read_command_list (int looking_for, flag_context_ty outer_context); /* Read the next word. 'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')' or '\0'. */ static void read_word (struct word *wp, int looking_for, flag_context_ty context) { int c; bool all_unquoted_digits; do { c = phase2_getc (); if (c == '#') { /* Skip a comment up to end of line. */ last_comment_line = line_number; comment_start (); for (;;) { c = phase1_getc (); if (c == EOF || c == '\n') break; /* We skip all leading white space, but not EOLs. */ if (!(buflen == 0 && (c == ' ' || c == '\t'))) comment_add (c); } comment_line_end (); } if (c == '\n') { /* Comments assumed to be grouped with a message must immediately precede it, with no non-whitespace token on a line between both. */ if (last_non_comment_line > last_comment_line) savable_comment_reset (); wp->type = t_separator; return; } } while (is_whitespace (c)); if (c == EOF) { wp->type = t_eof; return; } if (c == '<' || c == '>') { /* Recognize the redirection operators < > >| << <<- >> <> <& >& But <( and >) are handled below, not here. */ int c2 = phase2_getc (); if (c2 != '(') { if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&') { if (c == '<' && c2 == '<') { int c3 = phase2_getc (); if (c3 != '-') phase2_ungetc (c3); } } else phase2_ungetc (c2); wp->type = t_redirect; return; } else phase2_ungetc (c2); } if (looking_for == CLOSING_BACKQUOTE && c == CLOSING_BACKQUOTE) { saw_closing_backquote (); wp->type = t_backquote; last_non_comment_line = line_number; return; } if (looking_for == ')' && c == ')') { wp->type = t_paren; last_non_comment_line = line_number; return; } if (is_operator_start (c)) { wp->type = (c == ';' ? t_separator : t_other); return; } wp->type = t_string; wp->token = XMALLOC (struct token); init_token (wp->token); wp->line_number_at_start = line_number; all_unquoted_digits = true; for (;; c = phase2_getc ()) { if (c == EOF) break; if (all_unquoted_digits && (c == '<' || c == '>')) { /* Recognize the redirection operators < > >| << <<- >> <> <& >& prefixed with a nonempty sequence of unquoted digits. */ int c2 = phase2_getc (); if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&') { if (c == '<' && c2 == '<') { int c3 = phase2_getc (); if (c3 != '-') phase2_ungetc (c3); } } else phase2_ungetc (c2); wp->type = t_redirect; free_token (wp->token); free (wp->token); last_non_comment_line = line_number; return; } all_unquoted_digits = all_unquoted_digits && (c >= '0' && c <= '9'); if (c == '$') { int c2; /* An unquoted dollar indicates we are not inside '...'. */ if (open_singlequote) abort (); /* After reading a dollar, we know that there is no pushed back character from an earlier lookahead. */ if (phase2_pushback_length > 0) abort (); /* Therefore we can use phase1 without interfering with phase2. We need to recognize $( outside and inside double-quotes. It would be incorrect to do c2 = phase2_getc (); if (c2 == '(' || c2 == QUOTED ('(')) because that would also trigger for $\(. */ c2 = phase1_getc (); if (c2 == '(') { bool saved_open_doublequote; int c3; phase1_ungetc (c2); /* The entire inner command or arithmetic expression is read ignoring possible surrounding double-quotes. */ saved_open_doublequote = open_doublequote; open_doublequote = false; c2 = phase2_getc (); if (c2 != '(') abort (); c3 = phase2_getc (); if (c3 == '(') { /* Arithmetic expression (Bash syntax). Skip until the matching closing parenthesis. */ unsigned int depth = 2; do { c = phase2_getc (); if (c == '(') depth++; else if (c == ')') if (--depth == 0) break; } while (c != EOF); } else { /* Command substitution (Bash syntax). */ phase2_ungetc (c3); read_command_list (')', context); } open_doublequote = saved_open_doublequote; } else { phase1_ungetc (c2); c2 = phase2_getc (); if (c2 == '\'' && !open_singlequote) { /* Bash builtin for string with ANSI-C escape sequences. */ saw_opening_singlequote (); for (;;) { c = phase2_getc (); if (c == EOF) break; if (c == '\'') { saw_closing_singlequote (); break; } if (c == '\\') { c = phase2_getc (); switch (c) { default: phase2_ungetc (c); c = '\\'; break; case '\\': break; case '\'': /* Don't call saw_closing_singlequote () here. */ break; case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'e': c = 0x1b; /* ESC */ break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': c = phase2_getc (); if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) { int n; if (c >= '0' && c <= '9') n = c - '0'; else if (c >= 'A' && c <= 'F') n = 10 + c - 'A'; else if (c >= 'a' && c <= 'f') n = 10 + c - 'a'; else abort (); c = phase2_getc (); if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) { if (c >= '0' && c <= '9') n = n * 16 + c - '0'; else if (c >= 'A' && c <= 'F') n = n * 16 + 10 + c - 'A'; else if (c >= 'a' && c <= 'f') n = n * 16 + 10 + c - 'a'; else abort (); } else phase2_ungetc (c); c = n; } else { phase2_ungetc (c); phase2_ungetc ('x'); c = '\\'; } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { int n = c - '0'; c = phase2_getc (); if (c >= '0' && c <= '7') { n = n * 8 + c - '0'; c = phase2_getc (); if (c >= '0' && c <= '7') n = n * 8 + c - '0'; else phase2_ungetc (c); } else phase2_ungetc (c); c = n; } break; } } if (wp->type == t_string) { grow_token (wp->token); wp->token->chars[wp->token->charcount++] = (unsigned char) c; } } /* The result is a literal string. Don't change wp->type. */ continue; } else if (c2 == '"' && !open_doublequote) { /* Bash builtin for internationalized string. */ lex_pos_ty pos; struct token string; saw_opening_singlequote (); open_singlequote_terminator = '"'; pos.file_name = logical_file_name; pos.line_number = line_number; init_token (&string); for (;;) { c = phase2_getc (); if (c == EOF) break; if (c == '"') { saw_closing_singlequote (); break; } grow_token (&string); string.chars[string.charcount++] = (unsigned char) c; } remember_a_message (mlp, NULL, string_of_token (&string), context, &pos, NULL, savable_comment); free_token (&string); error_with_progname = false; error (0, 0, _("%s:%lu: warning: the syntax $\"...\" is deprecated due to security reasons; use eval_gettext instead"), pos.file_name, (unsigned long) pos.line_number); error_with_progname = true; /* The result at runtime is not constant. Therefore we change wp->type. */ } else phase2_ungetc (c2); } wp->type = t_other; continue; } if (c == '\'') { if (!open_singlequote) { /* Handle an opening single quote. */ saw_opening_singlequote (); } else { /* Handle a closing single quote. */ saw_closing_singlequote (); } continue; } if (c == '"') { if (open_singlequote && open_singlequote_terminator == '"') { /* Handle a closing i18n quote. */ saw_closing_singlequote (); } else if (!open_doublequote) { /* Handle an opening double quote. */ saw_opening_doublequote (); } else { /* Handle a closing double quote. */ saw_closing_doublequote (); } continue; } if (c == OPENING_BACKQUOTE) { /* Handle an opening backquote. */ saw_opening_backquote (); read_command_list (CLOSING_BACKQUOTE, context); wp->type = t_other; continue; } if (c == CLOSING_BACKQUOTE) break; if (c == '<' || c == '>') { int c2; /* An unquoted c indicates we are not inside '...' nor "...". */ if (open_singlequote || open_doublequote) abort (); c2 = phase2_getc (); if (c2 == '(') { /* Process substitution (Bash syntax). */ read_command_list (')', context); wp->type = t_other; continue; } else phase2_ungetc (c2); } if (!open_singlequote && !open_doublequote && (is_whitespace (c) || is_operator_start (c))) break; if (wp->type == t_string) { grow_token (wp->token); wp->token->chars[wp->token->charcount++] = (unsigned char) c; } } phase2_ungetc (c); if (wp->type != t_string) { free_token (wp->token); free (wp->token); } last_non_comment_line = line_number; } /* Read the next command. 'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')' or '\0'. Returns the type of the word that terminated the command. */ static enum word_type read_command (int looking_for, flag_context_ty outer_context) { /* Read the words that make up the command. Here we completely ignore field splitting at whitespace and wildcard expansions; i.e. we assume that the source is written in such a way that every word in the program determines exactly one word in the resulting command. But we do not require that the 'gettext'/'ngettext' command is the first in the command; this is because 1. we want to allow for prefixes like "$verbose" that may expand to nothing, and 2. it's a big effort to know where a command starts in a $(for ...) or $(case ...) compound command. */ int arg = 0; /* Current argument number. */ bool arg_of_redirect = false; /* True right after a redirection operator. */ flag_context_list_iterator_ty context_iter; const struct callshapes *shapes = NULL; struct arglist_parser *argparser = NULL; for (;;) { struct word inner; flag_context_ty inner_context; if (arg == 0) inner_context = null_context; else inner_context = inherited_context (outer_context, flag_context_list_iterator_advance ( &context_iter)); read_word (&inner, looking_for, inner_context); /* Recognize end of command. */ if (inner.type == t_separator || inner.type == t_backquote || inner.type == t_paren || inner.type == t_eof) { if (argparser != NULL) arglist_parser_done (argparser, arg); return inner.type; } if (extract_all) { if (inner.type == t_string) { lex_pos_ty pos; pos.file_name = logical_file_name; pos.line_number = inner.line_number_at_start; remember_a_message (mlp, NULL, string_of_word (&inner), inner_context, &pos, NULL, savable_comment); } } if (arg_of_redirect) { /* Ignore arguments of redirection operators. */ arg_of_redirect = false; } else if (inner.type == t_redirect) { /* Ignore this word and the following one. */ arg_of_redirect = true; } else { if (argparser == NULL) { /* This is the function position. */ arg = 0; if (inner.type == t_string) { char *function_name = string_of_word (&inner); void *keyword_value; if (hash_find_entry (&keywords, function_name, strlen (function_name), &keyword_value) == 0) shapes = (const struct callshapes *) keyword_value; argparser = arglist_parser_alloc (mlp, shapes); context_iter = flag_context_list_iterator ( flag_context_list_table_lookup ( flag_context_list_table, function_name, strlen (function_name))); free (function_name); } else context_iter = null_context_list_iterator; } else { /* These are the argument positions. */ if (inner.type == t_string) arglist_parser_remember (argparser, arg, string_of_word (&inner), inner_context, logical_file_name, inner.line_number_at_start, savable_comment); if (arglist_parser_decidedp (argparser, arg)) { /* Stop looking for arguments of the last function_name. */ /* FIXME: What about context_iter? */ arglist_parser_done (argparser, arg); shapes = NULL; argparser = NULL; } } arg++; } free_word (&inner); } } /* Read a list of commands. 'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')' or '\0'. Returns the type of the word that terminated the command list. */ static enum word_type read_command_list (int looking_for, flag_context_ty outer_context) { for (;;) { enum word_type terminator; terminator = read_command (looking_for, outer_context); if (terminator != t_separator) return terminator; } } void extract_sh (FILE *f, const char *real_filename, const char *logical_filename, flag_context_list_table_ty *flag_table, msgdomain_list_ty *mdlp) { mlp = mdlp->item[0]->messages; fp = f; real_file_name = real_filename; logical_file_name = xstrdup (logical_filename); line_number = 1; last_comment_line = -1; last_non_comment_line = -1; nested_backquotes = 0; open_doublequotes_mask = 0; open_doublequote = false; open_singlequote = false; flag_context_list_table = flag_table; init_keywords (); /* Eat tokens until eof is seen. */ read_command_list ('\0', null_context); fp = NULL; real_file_name = NULL; logical_file_name = NULL; line_number = 0; }