Skip to content

Commit

Permalink
Merge pull request #3 from noriyotcp/extractor-class
Browse files Browse the repository at this point in the history
Add Extractor class
  • Loading branch information
noriyotcp authored Oct 2, 2020
2 parents fd5ec91 + b4db0bb commit ebe29e2
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 67 deletions.
30 changes: 30 additions & 0 deletions lib/headown.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
require 'headown/version'
require 'open-uri'
require 'commonmarker'

module Headown
class NotMarkdownError < StandardError
Expand All @@ -7,4 +9,32 @@ def initialize(file_path:)
super(msg)
end
end

class Extractor
attr_reader :headers

def initialize(file_path)
raise Headown::NotMarkdownError.new(file_path: file_path) if File.extname(file_path) != '.md'

file_data = URI.open(file_path, &:read)
extract_headers(file_data)
end

private

def extract_headers(file_data)
@headers = header_nodes(file_data).map { |node| build_header(node) }
end

def header_nodes(data)
doc = CommonMarker.render_doc(data)
[].tap { |nodes| doc.walk { |node| nodes << node if node.type == :header } }
end

def build_header(node)
text = ''
node.each { |subnode| text = subnode.string_content if subnode.type == :text }
'#' * node.header_level + " #{text}"
end
end
end
29 changes: 2 additions & 27 deletions lib/headown/cli.rb
Original file line number Diff line number Diff line change
@@ -1,42 +1,17 @@
require 'thor'
require 'commonmarker'
require 'open-uri'

module Headown
class CLI < Thor
desc 'extract <path>', 'extract headers from file path'

def extract(file_path)
raise Headown::NotMarkdownError.new(file_path: file_path) if File.extname(file_path) != '.md'

file_data = URI.open(file_path, &:read)
puts_headers(file_data)
extractor = Headown::Extractor.new(file_path)
puts extractor.headers
rescue Headown::NotMarkdownError => e
puts <<~MSG
#{e.class}:
#{e.message}
MSG
end

private

def header_nodes(data)
doc = CommonMarker.render_doc(data)
[].tap { |nodes| doc.walk { |node| nodes << node if node.type == :header } }
end

def build_header(node)
text = ''
node.each { |subnode| text = subnode.string_content if subnode.type == :text }
'#' * node.header_level + " #{text}"
end

def extract_headers(file_data)
header_nodes(file_data).map { |node| build_header(node) }
end

def puts_headers(file_data)
puts extract_headers(file_data)
end
end
end
40 changes: 0 additions & 40 deletions spec/headown/cli_spec.rb
Original file line number Diff line number Diff line change
@@ -1,46 +1,6 @@
RSpec.describe Headown::CLI do
let(:headown) { described_class.new }

describe '#extract_headers' do
context 'when it finds a markdown file' do
it 'extracts headers' do
file_data = URI.open('spec/headown/sample.md', &:read)
expect(headown.send(:extract_headers, file_data)).to eq [
'# h1',
'## h2',
'### h3',
'#### h4',
'##### h5',
'###### h6'
]
end
end

context 'when it finds a markdown file online' do
it 'extracts headers' do
file_data = URI.open('https://raw.githubusercontent.com/noriyotcp/headown/main/spec/headown/sample.md', &:read)
expect(headown.send(:extract_headers, file_data)).to eq [
'# h1',
'## h2',
'### h3',
'#### h4',
'##### h5',
'###### h6'
]
end
end

# cf. https://spec.commonmark.org/0.29/#atx-headings
describe 'interrupting paragraphs' do
context 'when ATX headings are not separated from surrounding content by blank lines' do
it 'extracts headers' do
file_data = URI.open('spec/headown/sample2.md', &:read)
expect(headown.send(:extract_headers, file_data)).to eq ['# baz']
end
end
end
end

describe '#extract' do
context 'when it can NOT find a markdown file' do
it 'can NOT extract headers' do
Expand Down
24 changes: 24 additions & 0 deletions spec/headown_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,28 @@
it 'has a version number' do
expect(Headown::VERSION).not_to be nil
end

describe 'Headown::Extractor' do
subject { Headown::Extractor.new(file_path).headers }

describe '#headers' do
context 'when it finds a markdown file' do
let(:file_path) { 'spec/headown/sample.md' }
it { is_expected.to eq ['# h1', '## h2', '### h3', '#### h4', '##### h5', '###### h6'] }
end

context 'when it finds a markdown file online' do
let(:file_path) { 'https://raw.githubusercontent.com/noriyotcp/headown/main/spec/headown/sample.md' }
it { is_expected.to eq ['# h1', '## h2', '### h3', '#### h4', '##### h5', '###### h6'] }
end

# cf. https://spec.commonmark.org/0.29/#atx-headings
describe 'interrupting paragraphs' do
context 'when ATX headings are not separated from surrounding content by blank lines' do
let(:file_path) { 'spec/headown/sample2.md' }
it { is_expected.to eq ['# baz'] }
end
end
end
end
end

0 comments on commit ebe29e2

Please sign in to comment.