Update supported_models.md (#150)

* Update supported_models.md
intel · Mar 4, 2024 · 603a661 · 603a661
1 parent 7c2199f
commit 603a661
Showing 1 changed file with 59 additions and 5 deletions.
diff --git a/docs/supported_models.md b/docs/supported_models.md
@@ -7,17 +7,19 @@ Neural Speed supports the following models:
 <thead>
   <tr>
     <th rowspan="2">Model Name</th>
-    <th colspan="3">INT8</th>
-    <th colspan="3">INT4</th>
+    <th colspan="4">INT8</th>
+    <th colspan="4">INT4</th>
     <th rowspan="2">Transformer Version</th>
   </tr>
   <tr>
     <th>RTN</th>
     <th>GPTQ</th>
     <th>AWQ</th>
+    <th>AutoRound</th>
     <th>RTN</th>
     <th>GPTQ</th>
     <th>AWQ</th>
+    <th>AutoRound</th>
   </tr>
 </thead>
 <tbody>
@@ -31,6 +33,8 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
     <td>Latest</td>
   </tr>
   <tr>
@@ -42,6 +46,8 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
     <td>Latest</td>
   </tr>
     <td><a href="https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf" target="_blank" rel="noopener noreferrer">CodeLlama-7b</a></td>
@@ -51,6 +57,8 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
     <td>Latest</td>
   </tr>
   </tr>
@@ -61,36 +69,44 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
     <td>Latest</td>
   </tr>
   <tr>
     <td><a href="https://huggingface.co/EleutherAI/gpt-j-6b" target="_blank" rel="noopener noreferrer">GPT-J-6B</a></td>
     <td>✅</td>
-    <td>✅</td>
+    <td> </td>
+    <td> </td>
     <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>Latest</td>
   </tr>
   <tr>
     <td><a href="https://huggingface.co/EleutherAI/gpt-neox-20b" target="_blank" rel="noopener noreferrer">GPT-NeoX-20B</a></td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>Latest</td>
   </tr>
   <tr>
     <td><a href="https://huggingface.co/databricks/dolly-v2-3b" target="_blank" rel="noopener noreferrer">Dolly-v2-3B</a></td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>4.28.1 or newer</td>
   </tr>
   <tr>
@@ -99,9 +115,11 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>Latest</td>
   </tr>
   <tr>
@@ -110,19 +128,23 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>Latest</td>
   </tr>
   <tr>
     <td><a href="https://huggingface.co/bigscience/bloomz-7b1" target="_blank" rel="noopener noreferrer">BLOOM-7B</a></td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>Latest</td>
   </tr>
   <tr>
@@ -132,9 +154,11 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>Latest</td>
   </tr>
     <tr>
@@ -146,6 +170,8 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
     <td>Latest</td>
   </tr>
   <tr>
@@ -154,9 +180,11 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>4.33.1</td>
   </tr>
   <tr>
@@ -165,9 +193,11 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>4.33.1</td>
   </tr>
   <tr>
@@ -176,9 +206,11 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>4.36.0 or newer</td>
   </tr>
   <tr>
@@ -187,9 +219,11 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>Latest</td>
   </tr>
   <tr>
@@ -199,9 +233,11 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>Latest</td>
   </tr>
     <tr>
@@ -213,9 +249,11 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
     <td> </td>
+    <td> </td>
     <td>Latest</td>
   </tr>
 </tbody>
@@ -227,15 +265,19 @@ Neural Speed supports the following models:
 <thead>
   <tr>
     <th rowspan="2">Model Name</th>
-    <th colspan="2">INT8</th>
-    <th colspan="2">INT4</th>
+    <th colspan="4">INT8</th>
+    <th colspan="4">INT4</th>
     <th rowspan="2">Transformer Version</th>
   </tr>
   <tr>
     <th>RTN</th>
     <th>GPTQ</th>
+    <th>AWQ</th>
+    <th>AutoRound</th>
     <th>RTN</th>
     <th>GPTQ</th>
+    <th>AWQ</th>
+    <th>AutoRound</th>
   </tr>
 </thead>
 <tbody>
@@ -246,6 +288,10 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
     <td>Latest</td>
   </tr>
     <tr>
@@ -254,6 +300,10 @@ Neural Speed supports the following models:
     <td>✅</td>
     <td>✅</td>
     <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
+    <td>✅</td>
     <td>Latest</td>
   </tr>
   <tr>
@@ -262,8 +312,12 @@ Neural Speed supports the following models:
     <a href="https://huggingface.co/bigcode/starcoder" target="_blank" rel="noopener noreferrer">StarCoder-15.5B</a></td>
     <td>✅</td>
     <td> </td>
+    <td> </td>
+    <td> </td>
     <td>✅</td>
     <td> </td>
+    <td> </td>
+    <td> </td>
     <td>Latest</td>
   </tr>
 </tbody>