forked from elibtronic/ML_Whirlwind
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjbfink-preso.tex
144 lines (120 loc) · 3.71 KB
/
jbfink-preso.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
%\documentclass[handout]{beamer} % set [handout] as an option to remove /pause breaks
\documentclass{beamer}
\usetheme{McMaster}
\beamertemplatenavigationsymbolsempty
\usepackage{tikz}
\usepackage[export]{adjustbox} % for left/right justifying images
\title{AI workshop}
\date{Oct 23rd, 2024}
\begin{document}
\begin{frame}
\maketitle
\end{frame}
\begin{frame}
Why care about local?
\end{frame}
\begin{frame}
\begin{itemize}
\item Privacy
\pause
\item Environmental
\pause
\item Cost
\end{itemize}
\end{frame}
\begin{frame}[plain]
\makebox[\linewidth]{\includegraphics[width=\paperwidth,height=\paperheight]{images/attention}}
\end{frame}
\begin{frame}[plain]
\makebox[\linewidth]{\includegraphics[width=\paperwidth,height=\paperheight]{images/hodge-podge}}
\end{frame}
\begin{frame}{Important concepts for transformer-based models}
\begin{itemize}
\item Context Window and Tokens
% Has gone up enormously in the past year. Now GPT-4 has a max window of 128k, some open models have this or more.
\pause
\item Temperature
\pause
\item Parameters
% GPT-1 117m, GPT-2 1.5b, GPT-3 175b, GPT-4 170t?
\pause
\item Training
\pause
\item The Random Seed.
\pause
\item The Prompt, aka "Programming for English Majors"
\end{itemize}
\end{frame}
\begin{frame}[plain]
\makebox[\linewidth]{\includegraphics[width=\paperwidth,height=\paperheight]{images/hottest-language}}
\end{frame}
\begin{frame}
\begin{itemize}
\item Context Window is the "memory" of an LLM
\pause
\item And Tokens -- words, roughly -- fill up that "memory"
\pause
\item And the \textit{response} also takes tokens.
\end{itemize}
\end{frame}
% demo https://platform.openai.com/tokenizer
\begin{frame}[plain]
\makebox[\linewidth]{\includegraphics[width=\paperwidth,height=\paperheight]{images/leonard}}
\end{frame}
\begin{frame}{temperature}
\begin{itemize}
\item Is the "entropy" of a model's response
\pause
\item Low temperature tends to hew towards predictability and repetitiveness
\pause
\item High temperatures make models get...goofy.
\end{itemize}
\end{frame}
\begin{frame}{Training}
\begin{itemize}
\item Usually done on text corpuses
\pause
\item The Pile (825GiB), Github, ShareGPT, etc.
\pause
\item (cough) books3, others
\pause
\item And other terms like RLHF (Reinforcement Learning from Human Feedback)
\pause
\item The larger the model, the more resources it takes to train or re-train.
\end{itemize}
\end{frame}
\begin{frame}{Parameters}
\begin{itemize}
\item Roughly corresponds to how "Complex" or "Smart" a model is.
\pause
\item (...very roughly)
\pause
\item But \textit{definitely} correlates to resources needed to run the model.
\pause
\item Which is why, say, GPT-4 requires this....
\end{itemize}
\end{frame}
\begin{frame}[plain]
\makebox[\linewidth]{\includegraphics[width=\paperwidth,height=\paperheight]{images/azure-data-centre}}
\end{frame}
\begin{frame}
And you can run a 7B model on this....
\end{frame}
\begin{frame}[plain]
\makebox[\linewidth]{\includegraphics[width=\paperwidth,height=\paperheight]{images/pfeebe}}
\end{frame}
\begin{frame}
And models get even smaller. Good (or at least amusing) results can be had from 1B and 0.5B parameters!
\end{frame}
\begin{frame}{what is \textit{quantization}}
\begin{itemize}
\item Consider TIFF files vs jpegs or FLACs vs mp3s
\pause
\item It's a way to *drastically* reduce compute needs at the expense of some level of fidelity
\pause
\item Without quantization, you pretty much need big GPUs -- 16GB Nvidias seem to be the base level
\pause
\item Quantized models \textit{can} use GPUs, and indeed function better if they do, but they don't \textit{need} them
\end{itemize}
\end{frame}
\end{document}