checks.c 10.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*  This file is part of "reprepro"
 *  Copyright (C) 2003,2004,2005,2006,2007 Bernhard R. Link
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02111-1301  USA
 */

#include <config.h>

#include <assert.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
23
#include <stdlib.h>
24
#include <stdarg.h>
25 26 27 28
#include <sys/types.h>
#include "error.h"
#include "ignore.h"
#include "strlist.h"
29
#include "mprintf.h"
30
#include "names.h"
31
#include "checks.h"
32 33 34 35 36

typedef unsigned char uchar;

/* check if the character starting where <character> points
 * at is a overlong one */
37
static inline bool overlongUTF8(const char *character) {
38 39 40 41
	/* This checks for overlong utf-8 characters.
	 * (as they might mask '.' '\0' or '/' chars).
	 * we assume no filesystem/ar/gpg code will parse
	 * invalid utf8, as we would only be able to rule
42
	 * this out if we knew it is utf8 we are coping
43 44 45 46
	 * with. (Well, you should not use --ignore=validchars
	 * anyway). */
	uchar c = *character;

47
	if ((c & (uchar)0xC2 /*11000010*/) == (uchar)0xC0 /*11000000*/) {
48 49
		uchar nextc = *(character+1);

50 51
		if ((nextc & (uchar)0xC0 /*11000000*/)
				!= (uchar)0x80 /*10000000*/)
52
			return false;
53

54
		if ((c & (uchar)0x3E /* 00111110 */) == (uchar)0)
55
			return true;
56 57
		if (c == (uchar)0xE0 /*11100000*/ &&
		    (nextc & (uchar)0x20 /*00100000*/) == (uchar)0)
58
			return true;
59 60
		if (c == (uchar)0xF0 /*11110000*/ &&
		    (nextc & (uchar)0x30 /*00110000*/) == (uchar)0)
61
			return true;
62 63
		if (c == (uchar)0xF8 /*11111000*/ &&
		    (nextc & (uchar)0x38 /*00111000*/) == (uchar)0)
64
			return true;
65 66
		if (c == (uchar)0xFC /*11111100*/ &&
		    (nextc & (uchar)0x3C /*00111100*/) == (uchar)0)
67
			return true;
68
	}
69
	return false;
70 71
}

72 73
#define REJECTLOWCHARS(s, str, descr) \
	if ((uchar)*s < (uchar)' ') { \
74 75
		fprintf(stderr, \
			"Character 0x%02hhx not allowed within %s '%s'!\n", \
76
			*s, descr, str); \
77 78 79
		return RET_ERROR; \
	}

80 81
#define REJECTCHARIF(c, s, str, descr) \
	if (c) { \
82 83
		fprintf(stderr, \
			"Character '%c' not allowed within %s '%s'!\n", \
84
			*s, descr, string); \
85 86 87 88 89 90 91
		return RET_ERROR; \
	}


/* check if this is something that can be used as directory safely */
retvalue propersourcename(const char *string) {
	const char *s;
92
	bool firstcharacter = true;
93

94
	if (string[0] == '\0') {
95 96
		/* This is not really ignoreable, as this will lead
		 * to paths not normalized, so all checks go wrong */
97
		fprintf(stderr, "Source name is not allowed to be empty!\n");
98 99
		return RET_ERROR;
	}
100
	if (string[0] == '.') {
101 102
		/* A dot is not only hard to see, it would cause the directory
		 * to become /./.bla, which is quite dangerous. */
103 104
		fprintf(stderr,
"Source names are not allowed to start with a dot!\n");
105 106 107
		return RET_ERROR;
	}
	s = string;
108 109 110
	while (*s != '\0') {
		if ((*s > 'z' || *s < 'a') &&
		    (*s > '9' || *s < '0') &&
111
		    (firstcharacter ||
112 113 114 115
		     (*s != '+' && *s != '-' && *s != '.'))) {
			REJECTLOWCHARS(s, string, "sourcename");
			REJECTCHARIF (*s == '/', s, string, "sourcename");
			if (overlongUTF8(s)) {
Bernhard Link's avatar
Bernhard Link committed
116 117 118
				fprintf(stderr,
"This could contain an overlong UTF8 sequence, rejecting source name '%s'!\n",
					string);
119 120
				return RET_ERROR;
			}
121 122
			if (!IGNORING_(forbiddenchar,
"Character 0x%02hhx not allowed in sourcename: '%s'!\n", *s, string)) {
123 124
				return RET_ERROR;
			}
125 126 127
			if (ISSET(*s, 0x80)) {
				if (!IGNORING_(8bit,
"8bit character in source name: '%s'!\n", string)) {
128 129 130 131 132
					return RET_ERROR;
				}
			}
		}
		s++;
133
		firstcharacter = false;
134 135 136 137 138 139 140 141
	}
	return RET_OK;
}

/* check if this is something that can be used as directory safely */
retvalue properfilename(const char *string) {
	const char *s;

142 143
	if (string[0] == '\0') {
		fprintf(stderr, "Error: empty filename!\n");
144 145
		return RET_ERROR;
	}
146 147
	if ((string[0] == '.' && string[1] == '\0') ||
		(string[0] == '.' && string[1] == '.' && string[2] == '\0')) {
Bernhard Link's avatar
Bernhard Link committed
148
		fprintf(stderr, "File name not allowed: '%s'!\n", string);
149 150
		return RET_ERROR;
	}
151 152 153 154 155
	for (s = string ; *s != '\0'  ; s++) {
		REJECTLOWCHARS(s, string, "filename");
		REJECTCHARIF (*s == '/' , s, string, "filename");
		if (ISSET(*s, 0x80)) {
			if (overlongUTF8(s)) {
Bernhard Link's avatar
Bernhard Link committed
156 157 158
				fprintf(stderr,
"This could contain an overlong UTF8 sequence, rejecting file name '%s'!\n",
						string);
159 160
				return RET_ERROR;
			}
161 162
			if (!IGNORING_(8bit,
"8bit character in file name: '%s'!\n",	string)) {
163 164 165 166 167 168 169
				return RET_ERROR;
			}
		}
	}
	return RET_OK;
}

170 171 172
static const char *formaterror(const char *format, ...) {
	va_list ap;
	static char *data = NULL;
173

174
	if (data != NULL)
175 176 177 178
		free(data);
	va_start(ap, format);
	data = vmprintf(format, ap);
	va_end(ap);
179
	if (data == NULL)
180 181
		return "Out of memory";
	return data;
182 183
}

184
/* check if this is something that can be used as directory *and* identifier safely */
185
const char *checkfordirectoryandidentifier(const char *string) {
186 187
	const char *s;

188
	assert (string != NULL && string[0] != '\0');
189

190
	if ((string[0] == '.' && (string[1] == '\0'||string[1]=='/')))
191
		return "'.' is not allowed as directory part";
192 193
	if ((string[0] == '.' && string[1] == '.'
				&& (string[2] == '\0'||string[2] =='/')))
194
		return "'..' is not allowed as directory part";
195 196
	for (s = string; *s != '\0'; s++) {
		if (*s == '|')
197
			return "'|' is not allowed";
198
		if ((uchar)*s < (uchar)' ')
199
			return formaterror("Character 0x%02hhx not allowed", *s);
200
		if (*s == '/' && s[1] == '.' && (s[2] == '\0' || s[2] == '/'))
201
			return "'.' is not allowed as directory part";
202 203
		if (*s == '/' && s[1] == '.' && s[2] == '.'
				&& (s[3] == '\0' || s[3] =='/'))
204
			return "'..' is not allowed as directory part";
205
		if (*s == '/' && s[1] == '/')
206
			return "\"//\" is not allowed";
207 208 209 210 211 212 213
		if (ISSET(*s, 0x80)) {
			if (overlongUTF8(s))
				return
"Contains overlong UTF-8 sequence if treated as UTF-8";
			if (!IGNORABLE(8bit))
				return
"Contains 8bit character (use --ignore=8bit to ignore)";
214 215
		}
	}
216
	return NULL;
217 218
}

219 220 221
/* check if this can be used as part of identifier (and as part of a filename) */
const char *checkforidentifierpart(const char *string) {
	const char *s;
222

223
	assert (string != NULL && string[0] != '\0');
224

225 226
	for (s = string; *s != '\0' ; s++) {
		if (*s == '|')
227
			return "'|' is not allowed";
228
		if (*s == '/')
229
			return "'/' is not allowed";
230
		if ((uchar)*s < (uchar)' ')
231
			return formaterror("Character 0x%02hhx not allowed", *s);
232 233 234 235 236 237 238
		if (ISSET(*s, 0x80)) {
			if (overlongUTF8(s))
				return
"Contains overlong UTF-8 sequence if treated as UTF-8";
			if (!IGNORABLE(8bit))
				return
"Contains 8bit character (use --ignore=8bit to ignore)";
239 240 241
		}
	}
	return NULL;
242 243 244 245 246
}

retvalue properfilenamepart(const char *string) {
	const char *s;

247 248 249 250 251
	for (s = string ; *s != '\0' ; s++) {
		REJECTLOWCHARS(s, string, "filenamepart");
		REJECTCHARIF (*s == '/' , s, string, "filenamepart");
		if (ISSET(*s, 0x80)) {
			if (overlongUTF8(s)) {
Bernhard Link's avatar
Bernhard Link committed
252 253 254
				fprintf(stderr,
"This could contain an overlong UTF8 sequence, rejecting part of file name '%s'!\n",
					string);
255 256
				return RET_ERROR;
			}
257 258
			if (!IGNORING_(8bit,
"8bit character in part of file name: '%s'!\n",
Bernhard Link's avatar
Bernhard Link committed
259
					string))
260 261 262 263 264 265 266 267
				return RET_ERROR;
		}
	}
	return RET_OK;
}

retvalue properversion(const char *string) {
	const char *s = string;
268 269 270
	bool hadepoch = false;
	bool first = true;
	bool yetonlydigits = true;
271

272 273
	if (string[0] == '\0' && !IGNORING(emptyfilenamepart,
"A version string is empty!\n")) {
274 275
		return RET_ERROR;
	}
276 277
	if ((*s < '0' || *s > '9') &&
	    ((*s >= 'a' && *s <= 'z') || (*s >='A' && *s <= 'Z'))) {
278 279 280
		/* As there are official packages violating the rule
		 * of policy 5.6.11 to start with a digit, disabling
		 * this test, and only omitting a warning. */
281 282 283 284
		if (verbose >= 0)
			fprintf(stderr,
"Warning: Package version '%s' does not start with a digit, violating 'should'-directive in policy 5.6.11\n",
				string);
285
	}
286
	for (; *s != '\0' ; s++, first=false) {
287
		if ((*s <= '9' && *s >= '0')) {
288 289
			continue;
		}
290
		if (!first && yetonlydigits && *s == ':') {
291
			hadepoch = true;
292 293
			continue;
		}
294
		yetonlydigits = false;
295
		if ((*s >= 'A' && *s <= 'Z') ||
296
		           (*s >= 'a' && *s <= 'z')) {
297
			yetonlydigits = false;
298 299
			continue;
		}
300 301 302 303 304 305
		if (first || (*s != '+'  && *s != '-'
					&& *s != '.'  && *s != '~'
					&& (!hadepoch || *s != ':'))) {
			REJECTLOWCHARS(s, string, "version");
			REJECTCHARIF (*s == '/' , s, string, "version");
			if (overlongUTF8(s)) {
Bernhard Link's avatar
Bernhard Link committed
306 307 308
				fprintf(stderr,
"This could contain an overlong UTF8 sequence, rejecting version '%s'!\n",
						string);
309 310
				return RET_ERROR;
			}
311 312
			if (!IGNORING_(forbiddenchar,
"Character '%c' not allowed in version: '%s'!\n", *s, string))
313
				return RET_ERROR;
314 315 316
			if (ISSET(*s, 0x80)) {
				if (!IGNORING_(8bit,
"8bit character in version: '%s'!\n", string))
317 318 319 320 321 322 323 324 325 326
					return RET_ERROR;
			}
		}
	}
	return RET_OK;
}

retvalue properfilenames(const struct strlist *names) {
	int i;

327
	for (i = 0 ; i < names->count ; i ++) {
328
		retvalue r = properfilename(names->values[i]);
329 330
		assert (r != RET_NOTHING);
		if (RET_WAS_ERROR(r))
331 332 333 334 335 336 337
			return r;
	}
	return RET_OK;
}

retvalue properpackagename(const char *string) {
	const char *s;
338
	bool firstcharacter = true;
339 340 341 342

	/* To be able to avoid multiple warnings,
	 * this should always be a subset of propersourcename */

343
	if (string[0] == '\0') {
344 345
		/* This is not really ignoreable, as this is a primary
		 * key for our database */
346
		fprintf(stderr, "Package name is not allowed to be empty!\n");
347 348 349
		return RET_ERROR;
	}
	s = string;
350
	while (*s != '\0') {
351 352 353
		/* DAK also allowed upper case letters last I looked, policy
		 * does not, so they are not allowed without --ignore=forbiddenchar */
		// perhaps some extra ignore-rule for upper case?
354 355 356 357 358 359 360
		if ((*s > 'z' || *s < 'a') &&
		    (*s > '9' || *s < '0') &&
		    (firstcharacter
		     || (*s != '+' && *s != '-' && *s != '.'))) {
			REJECTLOWCHARS(s, string, "package name");
			REJECTCHARIF (*s == '/' , s, string, "package name");
			if (overlongUTF8(s)) {
Bernhard Link's avatar
Bernhard Link committed
361 362 363
				fprintf(stderr,
"This could contain an overlong UTF8 sequence, rejecting package name '%s'!\n",
						string);
364 365
				return RET_ERROR;
			}
366 367
			if (!IGNORING(forbiddenchar,
"Character 0x%02hhx not allowed in package name: '%s'!\n", *s, string)) {
368 369
				return RET_ERROR;
			}
370 371 372
			if (ISSET(*s, 0x80)) {
				if (!IGNORING_(8bit,
"8bit character in package name: '%s'!\n", string)) {
373 374 375 376 377
					return RET_ERROR;
				}
			}
		}
		s++;
378
		firstcharacter = false;
379 380 381 382
	}
	return RET_OK;
}