Skip to content

Commit

Permalink
small stylistic fixes and adjustments, fix bug in Makefile, and chang…
Browse files Browse the repository at this point in the history
…e the timing code to skip the first (slow) iteration
  • Loading branch information
karpathy committed Jul 27, 2023
1 parent 0e1b0d4 commit 25b50ee
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 18 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ rungnu:
$(CC) -Ofast -std=gnu11 -o run run.c -lm

.PHONY: runompgnu
rungnu:
runompgnu:
$(CC) -Ofast -fopenmp -std=gnu11 run.c -lm -o run

.PHONY: clean
Expand Down
30 changes: 13 additions & 17 deletions run.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ void softmax(float* x, int size) {

void matmul(float* xout, float* x, float* w, int n, int d) {
// W (d,n) @ x (n,) -> xout (d,)
// by far the most amount of time is spent inside this little function
int i;
#pragma omp parallel for private(i)
for (i = 0; i < d; i++) {
Expand All @@ -205,7 +206,7 @@ void matmul(float* xout, float* x, float* w, int n, int d) {
}

void transformer(int token, int pos, Config* p, RunState* s, TransformerWeights* w) {

// a few convenience variables
float *x = s->x;
int dim = p->dim;
Expand All @@ -222,7 +223,7 @@ void transformer(int token, int pos, Config* p, RunState* s, TransformerWeights*

// forward all the layers
for(int l = 0; l < p->n_layers; l++) {

// attention rmsnorm
rmsnorm(s->xb, x, w->rms_att_weight + l*dim, dim);

Expand Down Expand Up @@ -316,7 +317,7 @@ void transformer(int token, int pos, Config* p, RunState* s, TransformerWeights*
for (int i = 0; i < hidden_dim; i++) {
s->hb[i] = s->hb[i] * (1.0f / (1.0f + expf(-s->hb[i])));
}

// elementwise multiply with w3(x)
for (int i = 0; i < hidden_dim; i++) {
s->hb[i] = s->hb[i] * s->hb2[i];
Expand Down Expand Up @@ -398,15 +399,12 @@ int main(int argc, char *argv[]) {
// read in the model.bin file
Config config;
TransformerWeights weights;
int fd = 0;
float* data = NULL;
long file_size;
int fd = 0; // file descriptor for memory mapping
float* data = NULL; // memory mapped data pointer
long file_size; // size of the checkpoint file in bytes
{
FILE *file = fopen(checkpoint, "rb");
if (!file) {
printf("Unable to open the checkpoint file %s!\n", checkpoint);
return 1;
}
if (!file) { printf("Couldn't open file %s\n", checkpoint); return 1; }
// read in the config header
if(fread(&config, sizeof(Config), 1, file) != 1) { return 1; }
// negative vocab size is hacky way of signaling unshared weights. bit yikes.
Expand All @@ -431,11 +429,7 @@ int main(int argc, char *argv[]) {
char** vocab = (char**)malloc(config.vocab_size * sizeof(char*));
{
FILE *file = fopen("tokenizer.bin", "rb");
if (!file) {
printf("Unable to open the tokenizer file tokenizer.bin! Run "
"python tokenizer.py to convert tokenizer.model -> tokenizer.bin\n");
return 1;
}
if (!file) { printf("Couldn't load tokenizer.bin\n"); return 1; }
int len;
for (int i = 0; i < config.vocab_size; i++) {
if(fread(&len, sizeof(int), 1, file) != 1) { return 1; }
Expand All @@ -451,7 +445,7 @@ int main(int argc, char *argv[]) {
malloc_run_state(&state, &config);

// the current position we are in
long start = time_in_ms();
long start = 0; // used to time our code, only initialized after first iteration
int next;
int token = 1; // 1 = BOS token in Llama-2 sentencepiece
int pos = 0;
Expand Down Expand Up @@ -479,11 +473,13 @@ int main(int argc, char *argv[]) {
// advance forward
token = next;
pos++;
// init our timer here because the first iteration is slow due to memmap
if (start == 0) { start = time_in_ms(); }
}

// report achieved tok/s
long end = time_in_ms();
printf("\nachieved tok/s: %f\n", steps / (double)(end-start)*1000);
printf("\nachieved tok/s: %f\n", (steps-1) / (double)(end-start)*1000);

// memory and file handles cleanup
free_run_state(&state);
Expand Down

0 comments on commit 25b50ee

Please sign in to comment.