-
-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c48a61f
commit f680298
Showing
1 changed file
with
47 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
<!DOCTYPE html><html><head><title>documentation: Read and write Parquet from and to Protobuf</title><meta charset="utf-8" /><meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" /><meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="author" content="Marcin Jakubowski" /><meta name="description" content="Read and write Parquet files using Scala" /><meta name="og:image" content="/parquet4s/img/poster.png" /><meta name="image" property="og:image" content="/parquet4s/img/poster.png" /><meta name="og:title" content="documentation: Read and write Parquet from and to Protobuf" /><meta name="title" property="og:title" content="documentation: Read and write Parquet from and to Protobuf" /><meta name="og:site_name" content="documentation" /><meta name="og:url" content="" /><meta name="og:type" content="website" /><meta name="og:description" content="Read and write Parquet files using Scala" /><link rel="icon" type="image/png" href="/parquet4s/img/favicon.png" /><meta name="twitter:title" content="documentation: Read and write Parquet from and to Protobuf" /><meta name="twitter:image" content="/parquet4s/img/poster.png" /><meta name="twitter:description" content="Read and write Parquet files using Scala" /><meta name="twitter:card" content="summary_large_image" /><link rel="icon" type="image/png" sizes="16x16" href="/parquet4s/img/favicon-16x16.png" /><link rel="icon" type="image/png" sizes="32x32" href="/parquet4s/img/favicon-32x32.png" /><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css" /><link rel="stylesheet" href="/parquet4s/highlight/styles/vs.css" /><link rel="stylesheet" href="/parquet4s/css/light-style.css" /></head><body class="docs"><div id="wrapper"><div id="sidebar-wrapper"><div id="sidebar-brand"><a href="/parquet4s/" class="brand"><div class="brand-wrapper"></div><span>documentation</span></a><button id="main-toggle" class="sidebar-toggle"><span class="close"></span></button></div><div class="sidebar-nav"> <div class="sidebar-nav-item "><a href="/parquet4s/docs" title="Introduction" class="">Introduction</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/quick_start" title="Quick Start" class="">Quick Start</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/akka" title="Integration with Akka Streams" class="">Integration with Akka Streams</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/pekko" title="Integration with Pekko Streams" class="">Integration with Pekko Streams</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/fs2" title="Integration with FS2" class="">Integration with FS2</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/storage_types" title="Supported storage types" class="">Supported storage types</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/records_and_schema" title="Records, types and schema" class="">Records, types and schema</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/projection" title="Projection" class="">Projection</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/filtering" title="Filtering" class="">Filtering</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/partitioning" title="Partitioning" class="">Partitioning</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/statistics" title="Statistics" class="">Statistics</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/examples" title="Examples" class="">Examples</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/migration" title="Migration from 1.x" class="">Migration from 1.x</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/etl" title="(Experimental) ETL" class="">(Experimental) ETL</a></div> <div class="sidebar-nav-item active "><a href="/parquet4s/docs/protobuf" title="(Experimental) Protobuf" class="active">(Experimental) Protobuf</a></div> <div class="sidebar-nav-item "><a href="/parquet4s/docs/sponsors" title="Distinguished Sponsors" class="">Distinguished Sponsors</a></div></div></div><div id="page-content-wrapper"><div class="nav"><div class="container-fluid"><div class="row"><div class="col-lg-12"><div class="action-menu pull-left clearfix"><a href="#menu-toggle" id="menu-toggle"><i class="fa fa-bars" aria-hidden="true"></i></a></div><ul class="pull-right"><li class="search-nav"><div id="search-dropdown"><label><i class="fa fa-search"></i>Search</label><input id="search-bar" type="text" placeholder="Enter keywords here..." onclick="displayToggleSearch(event)" /><ul id="search-dropdown-content" class="dropdown dropdown-content"></ul></div></li><li id="gh-eyes-item" class="hidden-xs to-uppercase"><a href="https://github.com/mjakubowski84/parquet4s" target="_blank" rel="noopener noreferrer"><i class="fa fa-eye"></i><span>Watchers<span id="eyes" class="label label-default">--</span></span></a></li><li id="gh-stars-item" class="hidden-xs to-uppercase"><a href="https://github.com/mjakubowski84/parquet4s" target="_blank" rel="noopener noreferrer"><i class="fa fa-star-o"></i><span>Stars<span id="stars" class="label label-default">--</span></span></a></li></ul></div></div></div></div><div id="content" data-github-owner="mjakubowski84" data-github-repo="parquet4s"><div class="content-wrapper"><section><h1 id="read-and-write-parquet-from-and-to-protobuf">Read and write Parquet from and to Protobuf</h1> | ||
|
||
<p>Using the original Java Parquet library, you can read and write parquet to and from Protbuf. Parquet4s has <code class="language-plaintext highlighter-rouge">custom</code> functions in its API, which could be leveraged for that. However, Protobuf Parquet can only be used with Java models, not to mention other issues that make it hard to use, especially in Scala. You would prefer to use <a href="https://scalapb.github.io/">ScalaPB</a> in Scala projects, right? Thanks to Parquet4S, you can! Import ScalaPB extension to any Parquet4S project, either it is Akka / Pekko, FS2 or plain Scala:</p> | ||
|
||
<div class="language-scala highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="s">"com.github.mjakubowski84"</span> <span class="o">%%</span> <span class="s">"parquet4s-scalapb"</span> <span class="o">%</span> <span class="s">"2.18.0"</span> | ||
</code></pre></div></div> | ||
|
||
<p>Follow the ScalaPB <a href="https://scalapb.github.io/docs/installation">documentation</a> to generate your Scala model from <code class="language-plaintext highlighter-rouge">.proto</code> files.</p> | ||
|
||
<p>Then, import Parquet4S type classes tailored for Protobuf. The rest of the code stays the same as in regular Parquet4S - no matter if that is Akka / Pekko, FS2 or core!</p> | ||
|
||
<div class="language-scala highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">import</span> <span class="nn">com.github.mjakubowski84.parquet4s.ScalaPBImplicits._</span> | ||
<span class="k">import</span> <span class="nn">com.github.mjakubowski84.parquet4s.</span><span class="o">{</span><span class="nc">ParquetReader</span><span class="o">,</span> <span class="nc">ParquetWriter</span><span class="o">,</span> <span class="nc">Path</span><span class="o">}</span> | ||
|
||
<span class="k">import</span> <span class="nn">scala.util.Using</span> | ||
|
||
<span class="k">case</span> <span class="k">class</span> <span class="nc">GeneratedProtobufData</span><span class="o">()</span> | ||
|
||
<span class="k">val</span> <span class="nv">data</span><span class="k">:</span> <span class="kt">Iterable</span><span class="o">[</span><span class="kt">GeneratedProtobufData</span><span class="o">]</span> <span class="k">=</span> <span class="o">???</span> <span class="c1">// your data // your data</span> | ||
<span class="k">val</span> <span class="nv">path</span><span class="k">:</span> <span class="kt">Path</span> <span class="o">=</span> <span class="o">???</span> <span class="c1">// path to write to / to read from // path to write to / to read from</span> | ||
|
||
<span class="c1">// write</span> | ||
<span class="nv">ParquetWriter</span><span class="o">.</span><span class="py">of</span><span class="o">[</span><span class="kt">GeneratedProtobufData</span><span class="o">].</span><span class="py">writeAndClose</span><span class="o">(</span><span class="nv">path</span><span class="o">.</span><span class="py">append</span><span class="o">(</span><span class="s">"data.parquet"</span><span class="o">),</span> <span class="n">data</span><span class="o">)</span> | ||
|
||
<span class="c1">// read</span> | ||
<span class="nv">Using</span><span class="o">.</span><span class="py">resource</span><span class="o">(</span><span class="nv">ParquetReader</span><span class="o">.</span><span class="py">as</span><span class="o">[</span><span class="kt">GeneratedProtobufData</span><span class="o">].</span><span class="py">read</span><span class="o">(</span><span class="n">path</span><span class="o">))(</span><span class="nv">_</span><span class="o">.</span><span class="py">foreach</span><span class="o">(</span><span class="n">println</span><span class="o">))</span> | ||
</code></pre></div></div> | ||
|
||
<p>Please follow the <a href="https://github.com/mjakubowski84/parquet4s/tree/master/examples/src/main/scala/com/github/mjakubowski84/parquet4s/scalapb">examples</a> to learn more.</p> | ||
</section></div></div></div></div><script src="/parquet4s/highlight/highlight.pack.js"></script><script src="/parquet4s/lunr/lunr.js"></script><script> | ||
// For all code blocks, copy the language from the containing div | ||
// to the inner code tag (where hljs expects it to be) | ||
const langPrefix = 'language-'; | ||
document.querySelectorAll(`div[class^='${langPrefix}']`).forEach(function(div) { | ||
div.classList.forEach(function(cssClass) { | ||
if (cssClass.startsWith(langPrefix)) { | ||
const lang = cssClass.substring(langPrefix.length); | ||
div.querySelectorAll('pre code').forEach(function(code) { | ||
code.classList.add(lang); | ||
}); | ||
} | ||
}); | ||
}); | ||
|
||
hljs.configure({languages:['scala','java','bash']}); | ||
hljs.initHighlightingOnLoad(); | ||
</script><script>console.info('\x57\x65\x62\x73\x69\x74\x65\x20\x62\x75\x69\x6c\x74\x20\x77\x69\x74\x68\x3a\x0a\x20\x20\x20\x20\x20\x20\x20\x20\x20\x5f\x5f\x20\x20\x20\x20\x5f\x5f\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x5f\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x5f\x20\x5f\x5f\x0a\x20\x20\x20\x5f\x5f\x5f\x5f\x5f\x2f\x20\x2f\x5f\x20\x20\x2f\x20\x2f\x5f\x20\x20\x20\x20\x20\x20\x5f\x5f\x5f\x5f\x20\x5f\x5f\x5f\x20\x20\x28\x5f\x29\x5f\x5f\x5f\x5f\x5f\x5f\x5f\x5f\x5f\x5f\x5f\x5f\x5f\x20\x20\x5f\x5f\x5f\x5f\x5f\x28\x5f\x29\x20\x2f\x5f\x5f\x5f\x5f\x20\x20\x5f\x5f\x5f\x5f\x5f\x0a\x20\x20\x2f\x20\x5f\x5f\x5f\x2f\x20\x5f\x5f\x20\x5c\x2f\x20\x5f\x5f\x2f\x5f\x5f\x5f\x5f\x5f\x2f\x20\x5f\x5f\x20\x60\x5f\x5f\x20\x5c\x2f\x20\x2f\x20\x5f\x5f\x5f\x2f\x20\x5f\x5f\x5f\x2f\x20\x5f\x5f\x20\x5c\x2f\x20\x5f\x5f\x5f\x2f\x20\x2f\x20\x5f\x5f\x2f\x20\x5f\x20\x5c\x2f\x20\x5f\x5f\x5f\x2f\x0a\x20\x28\x5f\x5f\x20\x20\x29\x20\x2f\x5f\x2f\x20\x2f\x20\x2f\x5f\x2f\x5f\x5f\x5f\x5f\x5f\x2f\x20\x2f\x20\x2f\x20\x2f\x20\x2f\x20\x2f\x20\x2f\x20\x2f\x5f\x5f\x2f\x20\x2f\x20\x20\x2f\x20\x2f\x5f\x2f\x20\x28\x5f\x5f\x20\x20\x29\x20\x2f\x20\x2f\x5f\x2f\x20\x20\x5f\x5f\x28\x5f\x5f\x20\x20\x29\x0a\x2f\x5f\x5f\x5f\x5f\x2f\x5f\x2e\x5f\x5f\x5f\x2f\x5c\x5f\x5f\x2f\x20\x20\x20\x20\x20\x2f\x5f\x2f\x20\x2f\x5f\x2f\x20\x2f\x5f\x2f\x5f\x2f\x5c\x5f\x5f\x5f\x2f\x5f\x2f\x20\x20\x20\x5c\x5f\x5f\x5f\x5f\x2f\x5f\x5f\x5f\x5f\x2f\x5f\x2f\x5c\x5f\x5f\x2f\x5c\x5f\x5f\x5f\x2f\x5f\x5f\x5f\x5f\x2f\x0a\x0a\x68\x74\x74\x70\x73\x3a\x2f\x2f\x34\x37\x64\x65\x67\x2e\x67\x69\x74\x68\x75\x62\x2e\x69\x6f\x2f\x73\x62\x74\x2d\x6d\x69\x63\x72\x6f\x73\x69\x74\x65\x73')</script><script src="/parquet4s/js/search.js"></script><script src="/parquet4s/js/docs.js"></script></body></html> |