<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Efficient Inference | Mark Boss</title><link>https://markboss.me/tag/efficient-inference/</link><atom:link href="https://markboss.me/tag/efficient-inference/index.xml" rel="self" type="application/rss+xml"/><description>Efficient Inference</description><generator>HugoBlox Kit (https://hugoblox.com)</generator><language>en-us</language><lastBuildDate>Thu, 21 May 2026 00:00:00 +0000</lastBuildDate><image><url>https://markboss.me/media/icon_hu_305902ed81759b07.png</url><title>Efficient Inference</title><link>https://markboss.me/tag/efficient-inference/</link></image><item><title>OCTOPUS: Optimized KV Cache for Transformers via Octahedral Parametrization Under Optimal Squared Error Quantization</title><link>https://markboss.me/publications/2026-octopus/</link><pubDate>Thu, 21 May 2026 00:00:00 +0000</pubDate><guid>https://markboss.me/publications/2026-octopus/</guid><description/></item></channel></rss>