kallsyms.c 18.7 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7
/* Generate assembler source containing symbol information
 *
 * Copyright 2002       by Kai Germaschewski
 *
 * This software may be used and distributed according to the terms
 * of the GNU General Public License, incorporated herein by reference.
 *
8
 * Usage: kallsyms [--all-symbols] [--absolute-percpu]
9
 *                         [--lto-clang] in.map > out.S
Linus Torvalds's avatar
Linus Torvalds committed
10 11 12 13 14 15 16 17 18 19 20 21
 *
 *      Table compression uses all the unused char codes on the symbols and
 *  maps these to the most used substrings (tokens). For instance, it might
 *  map char code 0xF7 to represent "write_" and then in every symbol where
 *  "write_" appears it can be replaced by 0xF7, saving 5 bytes.
 *      The used codes themselves are also placed in the table so that the
 *  decompresion can work without "special cases".
 *      Applied to kernel symbols, this usually produces a compression ratio
 *  of about 50%.
 *
 */

22
#include <errno.h>
23
#include <getopt.h>
24
#include <stdbool.h>
Linus Torvalds's avatar
Linus Torvalds committed
25 26 27 28
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
29
#include <limits.h>
Linus Torvalds's avatar
Linus Torvalds committed
30

31 32
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))

33
#define KSYM_NAME_LEN		512
Linus Torvalds's avatar
Linus Torvalds committed
34 35 36

struct sym_entry {
	unsigned long long addr;
37
	unsigned int len;
38
	unsigned int seq;
39
	bool percpu_absolute;
40
	unsigned char sym[];
Linus Torvalds's avatar
Linus Torvalds committed
41 42
};

43 44
struct addr_range {
	const char *start_sym, *end_sym;
45 46 47 48
	unsigned long long start, end;
};

static unsigned long long _text;
49
static unsigned long long relative_base;
50
static struct addr_range text_ranges[] = {
51 52 53 54 55 56
	{ "_stext",     "_etext"     },
	{ "_sinittext", "_einittext" },
};
#define text_range_text     (&text_ranges[0])
#define text_range_inittext (&text_ranges[1])

57 58 59 60
static struct addr_range percpu_range = {
	"__per_cpu_start", "__per_cpu_end", -1ULL, 0
};

61
static struct sym_entry **table;
62
static unsigned int table_size, table_cnt;
63 64
static int all_symbols;
static int absolute_percpu;
65
static int lto_clang;
Linus Torvalds's avatar
Linus Torvalds committed
66

67
static int token_profit[0x10000];
Linus Torvalds's avatar
Linus Torvalds committed
68 69

/* the table that holds the result of the compression */
70 71
static unsigned char best_table[256][2];
static unsigned char best_table_len[256];
Linus Torvalds's avatar
Linus Torvalds committed
72 73


74
static void usage(void)
Linus Torvalds's avatar
Linus Torvalds committed
75
{
76
	fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] "
77
			"[--lto-clang] in.map > out.S\n");
Linus Torvalds's avatar
Linus Torvalds committed
78 79 80
	exit(1);
}

81 82 83 84 85
static char *sym_name(const struct sym_entry *s)
{
	return (char *)s->sym + 1;
}

86 87
static bool is_ignored_symbol(const char *name, char type)
{
88
	if (type == 'u' || type == 'n')
89 90 91 92 93 94 95 96 97 98 99
		return true;

	if (toupper(type) == 'A') {
		/* Keep these useful absolute symbols */
		if (strcmp(name, "__kernel_syscall_via_break") &&
		    strcmp(name, "__kernel_syscall_via_epc") &&
		    strcmp(name, "__kernel_sigtramp") &&
		    strcmp(name, "__gp"))
			return true;
	}

100 101 102
	return false;
}

103 104
static void check_symbol_range(const char *sym, unsigned long long addr,
			       struct addr_range *ranges, int entries)
105 106
{
	size_t i;
107
	struct addr_range *ar;
108

109 110
	for (i = 0; i < entries; ++i) {
		ar = &ranges[i];
111

112 113
		if (strcmp(sym, ar->start_sym) == 0) {
			ar->start = addr;
114
			return;
115 116
		} else if (strcmp(sym, ar->end_sym) == 0) {
			ar->end = addr;
117
			return;
118 119 120 121
		}
	}
}

122
static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len)
Linus Torvalds's avatar
Linus Torvalds committed
123
{
124
	char *name, type, *p;
125
	unsigned long long addr;
126 127
	size_t len;
	ssize_t readlen;
128
	struct sym_entry *sym;
Linus Torvalds's avatar
Linus Torvalds committed
129

130
	errno = 0;
131 132 133 134 135 136
	readlen = getline(buf, buf_len, in);
	if (readlen < 0) {
		if (errno) {
			perror("read_symbol");
			exit(EXIT_FAILURE);
		}
137
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
138
	}
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153

	if ((*buf)[readlen - 1] == '\n')
		(*buf)[readlen - 1] = 0;

	addr = strtoull(*buf, &p, 16);

	if (*buf == p || *p++ != ' ' || !isascii((type = *p++)) || *p++ != ' ') {
		fprintf(stderr, "line format error\n");
		exit(EXIT_FAILURE);
	}

	name = p;
	len = strlen(name);

	if (len >= KSYM_NAME_LEN) {
154
		fprintf(stderr, "Symbol %s too long for kallsyms (%zu >= %d).\n"
155
				"Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n",
156
			name, len, KSYM_NAME_LEN);
157
		return NULL;
158
	}
Linus Torvalds's avatar
Linus Torvalds committed
159

160
	if (strcmp(name, "_text") == 0)
161
		_text = addr;
162

163 164 165 166
	/* Ignore most absolute/undefined (?) symbols. */
	if (is_ignored_symbol(name, type))
		return NULL;

167 168
	check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges));
	check_symbol_range(name, addr, &percpu_range, 1);
Linus Torvalds's avatar
Linus Torvalds committed
169 170 171

	/* include the type field in the symbol name, so that it gets
	 * compressed together */
172
	len++;
173

174
	sym = malloc(sizeof(*sym) + len + 1);
175
	if (!sym) {
176 177 178 179
		fprintf(stderr, "kallsyms failure: "
			"unable to allocate required amount of memory\n");
		exit(EXIT_FAILURE);
	}
180 181 182
	sym->addr = addr;
	sym->len = len;
	sym->sym[0] = type;
183
	strcpy(sym_name(sym), name);
184
	sym->percpu_absolute = false;
185

186
	return sym;
Linus Torvalds's avatar
Linus Torvalds committed
187 188
}

189 190
static int symbol_in_range(const struct sym_entry *s,
			   const struct addr_range *ranges, int entries)
191 192
{
	size_t i;
193
	const struct addr_range *ar;
194

195 196
	for (i = 0; i < entries; ++i) {
		ar = &ranges[i];
197

198
		if (s->addr >= ar->start && s->addr <= ar->end)
199
			return 1;
200 201
	}

202
	return 0;
203 204
}

205 206 207 208 209
static bool string_starts_with(const char *s, const char *prefix)
{
	return strncmp(s, prefix, strlen(prefix)) == 0;
}

210
static int symbol_valid(const struct sym_entry *s)
Linus Torvalds's avatar
Linus Torvalds committed
211
{
212
	const char *name = sym_name(s);
213

Linus Torvalds's avatar
Linus Torvalds committed
214 215 216
	/* if --all-symbols is not specified, then symbols outside the text
	 * and inittext sections are discarded */
	if (!all_symbols) {
217 218 219 220 221 222 223 224
		/*
		 * Symbols starting with __start and __stop are used to denote
		 * section boundaries, and should always be included:
		 */
		if (string_starts_with(name, "__start_") ||
		    string_starts_with(name, "__stop_"))
			return 1;

225 226
		if (symbol_in_range(s, text_ranges,
				    ARRAY_SIZE(text_ranges)) == 0)
Linus Torvalds's avatar
Linus Torvalds committed
227 228
			return 0;
		/* Corner case.  Discard any symbols with the same value as
229 230 231 232
		 * _etext _einittext; they can move between pass 1 and 2 when
		 * the kallsyms data are added.  If these symbols move then
		 * they may get dropped in pass 2, which breaks the kallsyms
		 * rules.
Linus Torvalds's avatar
Linus Torvalds committed
233
		 */
234
		if ((s->addr == text_range_text->end &&
235
		     strcmp(name, text_range_text->end_sym)) ||
236
		    (s->addr == text_range_inittext->end &&
237
		     strcmp(name, text_range_inittext->end_sym)))
Linus Torvalds's avatar
Linus Torvalds committed
238 239 240 241 242 243
			return 0;
	}

	return 1;
}

244 245 246 247 248 249 250
/* remove all the invalid symbols from the table */
static void shrink_table(void)
{
	unsigned int i, pos;

	pos = 0;
	for (i = 0; i < table_cnt; i++) {
251
		if (symbol_valid(table[i])) {
252 253 254 255
			if (pos != i)
				table[pos] = table[i];
			pos++;
		} else {
256
			free(table[i]);
257 258 259 260 261
		}
	}
	table_cnt = pos;
}

262
static void read_map(const char *in)
Linus Torvalds's avatar
Linus Torvalds committed
263
{
264
	FILE *fp;
265
	struct sym_entry *sym;
266 267
	char *buf = NULL;
	size_t buflen = 0;
268

269 270 271 272 273 274 275
	fp = fopen(in, "r");
	if (!fp) {
		perror(in);
		exit(1);
	}

	while (!feof(fp)) {
276
		sym = read_symbol(fp, &buf, &buflen);
277 278 279
		if (!sym)
			continue;

280
		sym->seq = table_cnt;
281

282 283 284
		if (table_cnt >= table_size) {
			table_size += 10000;
			table = realloc(table, sizeof(*table) * table_size);
Linus Torvalds's avatar
Linus Torvalds committed
285 286
			if (!table) {
				fprintf(stderr, "out of memory\n");
287
				fclose(fp);
Linus Torvalds's avatar
Linus Torvalds committed
288 289 290
				exit (1);
			}
		}
291 292

		table[table_cnt++] = sym;
Linus Torvalds's avatar
Linus Torvalds committed
293
	}
294

295
	free(buf);
296
	fclose(fp);
Linus Torvalds's avatar
Linus Torvalds committed
297 298
}

299
static void output_label(const char *label)
Linus Torvalds's avatar
Linus Torvalds committed
300
{
301
	printf(".globl %s\n", label);
Linus Torvalds's avatar
Linus Torvalds committed
302
	printf("\tALGN\n");
303
	printf("%s:\n", label);
Linus Torvalds's avatar
Linus Torvalds committed
304 305
}

306 307 308 309 310 311 312 313 314
/* Provide proper symbols relocatability by their '_text' relativeness. */
static void output_address(unsigned long long addr)
{
	if (_text <= addr)
		printf("\tPTR\t_text + %#llx\n", addr - _text);
	else
		printf("\tPTR\t_text - %#llx\n", _text - addr);
}

Linus Torvalds's avatar
Linus Torvalds committed
315 316
/* uncompress a compressed symbol. When this function is called, the best table
 * might still be compressed itself, so the function needs to be recursive */
317
static int expand_symbol(const unsigned char *data, int len, char *result)
Linus Torvalds's avatar
Linus Torvalds committed
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
{
	int c, rlen, total=0;

	while (len) {
		c = *data;
		/* if the table holds a single char that is the same as the one
		 * we are looking for, then end the search */
		if (best_table[c][0]==c && best_table_len[c]==1) {
			*result++ = c;
			total++;
		} else {
			/* if not, recurse and expand */
			rlen = expand_symbol(best_table[c], best_table_len[c], result);
			total += rlen;
			result += rlen;
		}
		data++;
		len--;
	}
	*result=0;

	return total;
}

342
static bool symbol_absolute(const struct sym_entry *s)
343
{
344
	return s->percpu_absolute;
345 346
}

347 348 349 350 351 352 353 354 355 356 357
static void cleanup_symbol_name(char *s)
{
	char *p;

	/*
	 * ASCII[.]   = 2e
	 * ASCII[0-9] = 30,39
	 * ASCII[A-Z] = 41,5a
	 * ASCII[_]   = 5f
	 * ASCII[a-z] = 61,7a
	 *
358 359
	 * As above, replacing the first '.' in ".llvm." with '\0' does not
	 * affect the main sorting, but it helps us with subsorting.
360
	 */
361
	p = strstr(s, ".llvm.");
362 363 364 365
	if (p)
		*p = '\0';
}

366 367 368 369 370 371
static int compare_names(const void *a, const void *b)
{
	int ret;
	const struct sym_entry *sa = *(const struct sym_entry **)a;
	const struct sym_entry *sb = *(const struct sym_entry **)b;

372
	ret = strcmp(sym_name(sa), sym_name(sb));
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
	if (!ret) {
		if (sa->addr > sb->addr)
			return 1;
		else if (sa->addr < sb->addr)
			return -1;

		/* keep old order */
		return (int)(sa->seq - sb->seq);
	}

	return ret;
}

static void sort_symbols_by_name(void)
{
	qsort(table, table_cnt, sizeof(table[0]), compare_names);
}

391
static void write_src(void)
Linus Torvalds's avatar
Linus Torvalds committed
392
{
393
	unsigned int i, k, off;
Linus Torvalds's avatar
Linus Torvalds committed
394
	unsigned int best_idx[256];
395
	unsigned int *markers, markers_cnt;
396
	char buf[KSYM_NAME_LEN];
Linus Torvalds's avatar
Linus Torvalds committed
397

398
	printf("#include <asm/bitsperlong.h>\n");
Linus Torvalds's avatar
Linus Torvalds committed
399 400
	printf("#if BITS_PER_LONG == 64\n");
	printf("#define PTR .quad\n");
401
	printf("#define ALGN .balign 8\n");
Linus Torvalds's avatar
Linus Torvalds committed
402 403
	printf("#else\n");
	printf("#define PTR .long\n");
404
	printf("#define ALGN .balign 4\n");
Linus Torvalds's avatar
Linus Torvalds committed
405 406
	printf("#endif\n");

407
	printf("\t.section .rodata, \"a\"\n");
Linus Torvalds's avatar
Linus Torvalds committed
408 409

	output_label("kallsyms_num_syms");
410
	printf("\t.long\t%u\n", table_cnt);
Linus Torvalds's avatar
Linus Torvalds committed
411 412 413 414
	printf("\n");

	/* table of offset markers, that give the offset in the compressed stream
	 * every 256 symbols */
415 416
	markers_cnt = (table_cnt + 255) / 256;
	markers = malloc(sizeof(*markers) * markers_cnt);
417 418 419 420 421
	if (!markers) {
		fprintf(stderr, "kallsyms failure: "
			"unable to allocate required memory\n");
		exit(EXIT_FAILURE);
	}
Linus Torvalds's avatar
Linus Torvalds committed
422 423 424

	output_label("kallsyms_names");
	off = 0;
425 426 427
	for (i = 0; i < table_cnt; i++) {
		if ((i & 0xFF) == 0)
			markers[i >> 8] = off;
428
		table[i]->seq = i;
Linus Torvalds's avatar
Linus Torvalds committed
429

430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
		/* There cannot be any symbol of length zero. */
		if (table[i]->len == 0) {
			fprintf(stderr, "kallsyms failure: "
				"unexpected zero symbol length\n");
			exit(EXIT_FAILURE);
		}

		/* Only lengths that fit in up-to-two-byte ULEB128 are supported. */
		if (table[i]->len > 0x3FFF) {
			fprintf(stderr, "kallsyms failure: "
				"unexpected huge symbol length\n");
			exit(EXIT_FAILURE);
		}

		/* Encode length with ULEB128. */
		if (table[i]->len <= 0x7F) {
			/* Most symbols use a single byte for the length. */
			printf("\t.byte 0x%02x", table[i]->len);
			off += table[i]->len + 1;
		} else {
			/* "Big" symbols use two bytes. */
			printf("\t.byte 0x%02x, 0x%02x",
				(table[i]->len & 0x7F) | 0x80,
				(table[i]->len >> 7) & 0x7F);
			off += table[i]->len + 2;
		}
456 457
		for (k = 0; k < table[i]->len; k++)
			printf(", 0x%02x", table[i]->sym[k]);
Linus Torvalds's avatar
Linus Torvalds committed
458

459 460 461 462
		/*
		 * Now that we wrote out the compressed symbol name, restore the
		 * original name and print it in the comment.
		 */
463 464
		expand_symbol(table[i]->sym, table[i]->len, buf);
		strcpy((char *)table[i]->sym, buf);
465
		printf("\t/* %s */\n", table[i]->sym);
466
	}
467
	printf("\n");
468

Linus Torvalds's avatar
Linus Torvalds committed
469
	output_label("kallsyms_markers");
470
	for (i = 0; i < markers_cnt; i++)
471
		printf("\t.long\t%u\n", markers[i]);
Linus Torvalds's avatar
Linus Torvalds committed
472 473 474 475 476 477 478 479
	printf("\n");

	free(markers);

	output_label("kallsyms_token_table");
	off = 0;
	for (i = 0; i < 256; i++) {
		best_idx[i] = off;
480
		expand_symbol(best_table[i], best_table_len[i], buf);
Linus Torvalds's avatar
Linus Torvalds committed
481 482 483 484 485 486 487 488 489
		printf("\t.asciz\t\"%s\"\n", buf);
		off += strlen(buf) + 1;
	}
	printf("\n");

	output_label("kallsyms_token_index");
	for (i = 0; i < 256; i++)
		printf("\t.short\t%d\n", best_idx[i]);
	printf("\n");
490

491
	output_label("kallsyms_offsets");
492 493

	for (i = 0; i < table_cnt; i++) {
494 495 496 497 498 499
		/*
		 * Use the offset relative to the lowest value
		 * encountered of all relative symbols, and emit
		 * non-relocatable fixed offsets that will be fixed
		 * up at runtime.
		 */
500

501 502 503 504 505 506 507 508 509
		long long offset;
		int overflow;

		if (!absolute_percpu) {
			offset = table[i]->addr - relative_base;
			overflow = (offset < 0 || offset > UINT_MAX);
		} else if (symbol_absolute(table[i])) {
			offset = table[i]->addr;
			overflow = (offset < 0 || offset > INT_MAX);
510
		} else {
511 512 513 514 515 516 517 518 519
			offset = relative_base - table[i]->addr - 1;
			overflow = (offset < INT_MIN || offset >= 0);
		}
		if (overflow) {
			fprintf(stderr, "kallsyms failure: "
				"%s symbol value %#llx out of range in relative mode\n",
				symbol_absolute(table[i]) ? "absolute" : "relative",
				table[i]->addr);
			exit(EXIT_FAILURE);
520
		}
521
		printf("\t.long\t%#x\t/* %s */\n", (int)offset, table[i]->sym);
522 523 524
	}
	printf("\n");

525 526 527
	output_label("kallsyms_relative_base");
	output_address(relative_base);
	printf("\n");
528

529 530 531 532
	if (lto_clang)
		for (i = 0; i < table_cnt; i++)
			cleanup_symbol_name((char *)table[i]->sym);

533 534 535
	sort_symbols_by_name();
	output_label("kallsyms_seqs_of_names");
	for (i = 0; i < table_cnt; i++)
536
		printf("\t.byte 0x%02x, 0x%02x, 0x%02x\t/* %s */\n",
537 538
			(unsigned char)(table[i]->seq >> 16),
			(unsigned char)(table[i]->seq >> 8),
539 540
			(unsigned char)(table[i]->seq >> 0),
		       table[i]->sym);
541
	printf("\n");
Linus Torvalds's avatar
Linus Torvalds committed
542 543 544 545 546 547
}


/* table lookup compression functions */

/* count all the possible tokens in a symbol */
548
static void learn_symbol(const unsigned char *symbol, int len)
Linus Torvalds's avatar
Linus Torvalds committed
549 550 551 552
{
	int i;

	for (i = 0; i < len - 1; i++)
553
		token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
Linus Torvalds's avatar
Linus Torvalds committed
554 555 556
}

/* decrease the count for all the possible tokens in a symbol */
557
static void forget_symbol(const unsigned char *symbol, int len)
Linus Torvalds's avatar
Linus Torvalds committed
558 559 560 561
{
	int i;

	for (i = 0; i < len - 1; i++)
562
		token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
Linus Torvalds's avatar
Linus Torvalds committed
563 564
}

565
/* do the initial token count */
566
static void build_initial_token_table(void)
Linus Torvalds's avatar
Linus Torvalds committed
567
{
568
	unsigned int i;
Linus Torvalds's avatar
Linus Torvalds committed
569

570
	for (i = 0; i < table_cnt; i++)
571
		learn_symbol(table[i]->sym, table[i]->len);
Linus Torvalds's avatar
Linus Torvalds committed
572 573
}

574
static unsigned char *find_token(unsigned char *str, int len,
575
				 const unsigned char *token)
576 577 578 579 580 581 582 583 584 585
{
	int i;

	for (i = 0; i < len - 1; i++) {
		if (str[i] == token[0] && str[i+1] == token[1])
			return &str[i];
	}
	return NULL;
}

Linus Torvalds's avatar
Linus Torvalds committed
586 587
/* replace a given token in all the valid symbols. Use the sampled symbols
 * to update the counts */
588
static void compress_symbols(const unsigned char *str, int idx)
Linus Torvalds's avatar
Linus Torvalds committed
589
{
590 591
	unsigned int i, len, size;
	unsigned char *p1, *p2;
Linus Torvalds's avatar
Linus Torvalds committed
592

593
	for (i = 0; i < table_cnt; i++) {
Linus Torvalds's avatar
Linus Torvalds committed
594

595 596
		len = table[i]->len;
		p1 = table[i]->sym;
597 598

		/* find the token on the symbol */
599
		p2 = find_token(p1, len, str);
600 601 602
		if (!p2) continue;

		/* decrease the counts for this symbol's tokens */
603
		forget_symbol(table[i]->sym, len);
604 605

		size = len;
Linus Torvalds's avatar
Linus Torvalds committed
606 607

		do {
608 609 610 611 612 613 614 615 616
			*p2 = idx;
			p2++;
			size -= (p2 - p1);
			memmove(p2, p2 + 1, size);
			p1 = p2;
			len--;

			if (size < 2) break;

Linus Torvalds's avatar
Linus Torvalds committed
617
			/* find the token on the symbol */
618
			p2 = find_token(p1, size, str);
Linus Torvalds's avatar
Linus Torvalds committed
619

620
		} while (p2);
Linus Torvalds's avatar
Linus Torvalds committed
621

622
		table[i]->len = len;
Linus Torvalds's avatar
Linus Torvalds committed
623

624
		/* increase the counts for this symbol's new tokens */
625
		learn_symbol(table[i]->sym, len);
Linus Torvalds's avatar
Linus Torvalds committed
626 627 628 629
	}
}

/* search the token with the maximum profit */
630
static int find_best_token(void)
Linus Torvalds's avatar
Linus Torvalds committed
631
{
632
	int i, best, bestprofit;
Linus Torvalds's avatar
Linus Torvalds committed
633 634

	bestprofit=-10000;
635
	best = 0;
Linus Torvalds's avatar
Linus Torvalds committed
636

637 638 639 640
	for (i = 0; i < 0x10000; i++) {
		if (token_profit[i] > bestprofit) {
			best = i;
			bestprofit = token_profit[i];
Linus Torvalds's avatar
Linus Torvalds committed
641 642 643 644 645 646 647 648
		}
	}
	return best;
}

/* this is the core of the algorithm: calculate the "best" table */
static void optimize_result(void)
{
649
	int i, best;
Linus Torvalds's avatar
Linus Torvalds committed
650 651 652 653 654 655 656 657 658

	/* using the '\0' symbol last allows compress_symbols to use standard
	 * fast string functions */
	for (i = 255; i >= 0; i--) {

		/* if this table slot is empty (it is not used by an actual
		 * original char code */
		if (!best_table_len[i]) {

659
			/* find the token with the best profit value */
Linus Torvalds's avatar
Linus Torvalds committed
660
			best = find_best_token();
661 662
			if (token_profit[best] == 0)
				break;
Linus Torvalds's avatar
Linus Torvalds committed
663 664

			/* place it in the "best" table */
665 666 667
			best_table_len[i] = 2;
			best_table[i][0] = best & 0xFF;
			best_table[i][1] = (best >> 8) & 0xFF;
Linus Torvalds's avatar
Linus Torvalds committed
668 669

			/* replace this token in all the valid symbols */
670
			compress_symbols(best_table[i], i);
Linus Torvalds's avatar
Linus Torvalds committed
671 672 673 674 675 676 677
		}
	}
}

/* start by placing the symbols that are actually used on the table */
static void insert_real_symbols_in_table(void)
{
678
	unsigned int i, j, c;
Linus Torvalds's avatar
Linus Torvalds committed
679

680
	for (i = 0; i < table_cnt; i++) {
681 682
		for (j = 0; j < table[i]->len; j++) {
			c = table[i]->sym[j];
683 684
			best_table[c][0]=c;
			best_table_len[c]=1;
Linus Torvalds's avatar
Linus Torvalds committed
685 686 687 688 689 690
		}
	}
}

static void optimize_token_table(void)
{
691
	build_initial_token_table();
Linus Torvalds's avatar
Linus Torvalds committed
692 693 694 695 696 697

	insert_real_symbols_in_table();

	optimize_result();
}

698 699 700
/* guess for "linker script provide" symbol */
static int may_be_linker_script_provide_symbol(const struct sym_entry *se)
{
701
	const char *symbol = sym_name(se);
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
	int len = se->len - 1;

	if (len < 8)
		return 0;

	if (symbol[0] != '_' || symbol[1] != '_')
		return 0;

	/* __start_XXXXX */
	if (!memcmp(symbol + 2, "start_", 6))
		return 1;

	/* __stop_XXXXX */
	if (!memcmp(symbol + 2, "stop_", 5))
		return 1;

	/* __end_XXXXX */
	if (!memcmp(symbol + 2, "end_", 4))
		return 1;

	/* __XXXXX_start */
	if (!memcmp(symbol + len - 6, "_start", 6))
		return 1;

	/* __XXXXX_end */
	if (!memcmp(symbol + len - 4, "_end", 4))
		return 1;

	return 0;
}

733 734
static int compare_symbols(const void *a, const void *b)
{
735 736
	const struct sym_entry *sa = *(const struct sym_entry **)a;
	const struct sym_entry *sb = *(const struct sym_entry **)b;
737 738 739 740 741 742 743 744 745 746 747 748 749 750
	int wa, wb;

	/* sort by address first */
	if (sa->addr > sb->addr)
		return 1;
	if (sa->addr < sb->addr)
		return -1;

	/* sort by "weakness" type */
	wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W');
	wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W');
	if (wa != wb)
		return wa - wb;

751 752 753 754 755 756 757
	/* sort by "linker script provide" type */
	wa = may_be_linker_script_provide_symbol(sa);
	wb = may_be_linker_script_provide_symbol(sb);
	if (wa != wb)
		return wa - wb;

	/* sort by the number of prefix underscores */
758 759
	wa = strspn(sym_name(sa), "_");
	wb = strspn(sym_name(sb), "_");
760 761 762
	if (wa != wb)
		return wa - wb;

763
	/* sort by initial order, so that other symbols are left undisturbed */
764
	return sa->seq - sb->seq;
765 766 767 768
}

static void sort_symbols(void)
{
769
	qsort(table, table_cnt, sizeof(table[0]), compare_symbols);
770
}
Linus Torvalds's avatar
Linus Torvalds committed
771

772 773 774 775 776
static void make_percpus_absolute(void)
{
	unsigned int i;

	for (i = 0; i < table_cnt; i++)
777
		if (symbol_in_range(table[i], &percpu_range, 1)) {
778 779 780 781 782
			/*
			 * Keep the 'A' override for percpu symbols to
			 * ensure consistent behavior compared to older
			 * versions of this tool.
			 */
783
			table[i]->sym[0] = 'A';
784
			table[i]->percpu_absolute = true;
785
		}
786 787
}

788 789 790 791 792 793
/* find the minimum non-absolute symbol address */
static void record_relative_base(void)
{
	unsigned int i;

	for (i = 0; i < table_cnt; i++)
794
		if (!symbol_absolute(table[i])) {
795 796 797 798
			/*
			 * The table is sorted by address.
			 * Take the first non-absolute symbol value.
			 */
799
			relative_base = table[i]->addr;
800 801
			return;
		}
802 803
}

804
int main(int argc, char **argv)
Linus Torvalds's avatar
Linus Torvalds committed
805
{
806
	while (1) {
807
		static const struct option long_options[] = {
808 809
			{"all-symbols",     no_argument, &all_symbols,     1},
			{"absolute-percpu", no_argument, &absolute_percpu, 1},
810
			{"lto-clang",       no_argument, &lto_clang,       1},
811 812 813 814 815 816 817 818 819 820 821 822
			{},
		};

		int c = getopt_long(argc, argv, "", long_options, NULL);

		if (c == -1)
			break;
		if (c != 0)
			usage();
	}

	if (optind >= argc)
Linus Torvalds's avatar
Linus Torvalds committed
823 824
		usage();

825
	read_map(argv[optind]);
826
	shrink_table();
827 828
	if (absolute_percpu)
		make_percpus_absolute();
829
	sort_symbols();
830
	record_relative_base();
831
	optimize_token_table();
Linus Torvalds's avatar
Linus Torvalds committed
832 833 834 835
	write_src();

	return 0;
}