<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>LLM Compression | Junhyun Kim</title><link>https://junhyun.me/tag/llm-compression/</link><atom:link href="https://junhyun.me/tag/llm-compression/index.xml" rel="self" type="application/rss+xml"/><description>LLM Compression</description><generator>Hugo Blox Builder (https://hugoblox.com)</generator><language>en-us</language><lastBuildDate>Sun, 01 Dec 2024 00:00:00 +0000</lastBuildDate><image><url>https://junhyun.me/media/icon_hu97bcb6f0f3ad9a2f164a1f560b52571a_13652_512x512_fill_lanczos_center_3.png</url><title>LLM Compression</title><link>https://junhyun.me/tag/llm-compression/</link></image><item><title>LLM Compression: Enhancing AWQ</title><link>https://junhyun.me/project/external-project/</link><pubDate>Sun, 01 Dec 2024 00:00:00 +0000</pubDate><guid>https://junhyun.me/project/external-project/</guid><description>&lt;p>Graduation project focused on improving AWQ (Activation-aware Weight Quantization) with extra scaling.&lt;/p>
&lt;ul>
&lt;li>Obtained lower perplexity for INT3-quantized OPT and Llama 2 models.&lt;/li>
&lt;/ul></description></item></channel></rss>