summaryrefslogtreecommitdiff
path: root/rfc822.c
blob: 546f481062e075adf93d21fe764cf634dea05fad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/**
 * \file rfc822.c
 * Code for slicing and dicing RFC822 mail headers.
 * 
 *  How to parse RFC822 headers in C. This is not a fully conformant
 *  implementation of RFC822 or RFC2822, but it has been in production use in a
 *  widely-deployed MTA (fetcmail) since 1996 without complaints.  Really
 *  perverse combinations of quoting and commenting could break it.
 * 
 * \author Eric S. Raymond <esr@thyrsus.com>, 1997.  This source code example
 * is part of fetchmail and the Unix Cookbook, and are released under the MIT
 * license. 
 */


#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>


#define HEADER_END(p)	((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))


#define START_HDR	0	/**< before header colon */
#define SKIP_JUNK	1	/**< skip whitespace, \n, and junk */
#define BARE_ADDRESS	2	/**< collecting address without delimiters */
#define INSIDE_DQUOTE	3	/**< inside double quotes */
#define INSIDE_PARENS	4	/**< inside parentheses */
#define INSIDE_BRACKETS	5	/**< inside bracketed address */
#define ENDIT_ALL	6	/**< after last address */

/**
 * Parse addresses in succession out of a specified RFC822 header.
 *
 * \param hdr header to be parsed, NUL to continue previous \p hdr. 
 */
char *next_address(const char *hdr)
{
    static char address[BUFSIZ];
    static int tp;
    static const char *hp;
    static int	state, oldstate;
    int parendepth = 0;

#define NEXTTP()	((tp < sizeof(address)-1) ? tp++ : tp)

    if (hdr)
    {
	hp = hdr;
	state = START_HDR;
	tp = 0;
    }

    for (; *hp; hp++)
    {

	if (state == ENDIT_ALL)		/* after last address */
	    return(NULL);
	else if (HEADER_END(hp))
	{
	    state = ENDIT_ALL;
	    if (tp)
	    {
		while (isspace(address[--tp]))
		    continue;
		address[++tp] = '\0';
		tp = 0;
		return (address);
	    }
	    return(NULL);
	}
	else if (*hp == '\\')		/* handle RFC822 escaping */
	{
	    if (state != INSIDE_PARENS)
	    {
		address[NEXTTP()] = *hp++;	/* take the escape */
		address[NEXTTP()] = *hp;	/* take following char */
	    }
	}
	else switch (state)
	{
	case START_HDR:   /* before header colon */
	    if (*hp == ':')
		state = SKIP_JUNK;
	    break;

	case SKIP_JUNK:		/* looking for address start */
	    if (*hp == '"')	/* quoted string */
	    {
		oldstate = SKIP_JUNK;
	        state = INSIDE_DQUOTE;
		address[NEXTTP()] = *hp;
	    }
	    else if (*hp == '(')	/* address comment -- ignore */
	    {
		parendepth = 1;
		oldstate = SKIP_JUNK;
		state = INSIDE_PARENS;    
	    }
	    else if (*hp == '<')	/* begin <address> */
	    {
		state = INSIDE_BRACKETS;
		tp = 0;
	    }
	    else if (*hp != ',' && !isspace(*hp))
	    {
		--hp;
	        state = BARE_ADDRESS;
	    }
	    break;

	case BARE_ADDRESS:   	/* collecting address without delimiters */
	    if (*hp == ',')  	/* end of address */
	    {
		if (tp)
		{
		    address[NEXTTP()] = '\0';
		    state = SKIP_JUNK;
		    tp = 0;
		    return(address);
		}
	    }
	    else if (*hp == '(')  	/* beginning of comment */
	    {
		parendepth = 1;
		oldstate = BARE_ADDRESS;
		state = INSIDE_PARENS;    
	    }
	    else if (*hp == '<')  	/* beginning of real address */
	    {
		state = INSIDE_BRACKETS;
		tp = 0;
	    }
	    else if (*hp == '"')        /* quoted word, copy verbatim */
	    {
	        oldstate = state;
		state = INSIDE_DQUOTE;
                address[NEXTTP()] = *hp;
            }
	    else if (!isspace(*hp)) 	/* just take it, ignoring whitespace */
		address[NEXTTP()] = *hp;
	    break;

	case INSIDE_DQUOTE:	/* we're in a quoted string, copy verbatim */
	    if (*hp != '"')
	        address[NEXTTP()] = *hp;
	    else
	    {
	        address[NEXTTP()] = *hp;
		state = oldstate;
	    }
	    break;

	case INSIDE_PARENS:	/* we're in a parenthesized comment, ignore */
	    if (*hp == '(')
		++parendepth;
	    else if (*hp == ')')
		--parendepth;
	    if (parendepth == 0)
		state = oldstate;
	    break;

	case INSIDE_BRACKETS:	/* possible <>-enclosed address */
	    if (*hp == '>')	/* end of address */
	    {
		address[NEXTTP()] = '\0';
		state = SKIP_JUNK;
		++hp;
		tp = 0;
		return(address);
	    }
	    else if (*hp == '<')	/* nested <> */
	        tp = 0;
	    else if (*hp == '"')	/* quoted address */
	    {
	        address[NEXTTP()] = *hp;
		oldstate = INSIDE_BRACKETS;
		state = INSIDE_DQUOTE;
	    }
	    else			/* just copy address */
		address[NEXTTP()] = *hp;
	    break;
	}
    }

    return(NULL);
}