From 616b1b40ab16dd5f5c33bb60189ef531f651180c Mon Sep 17 00:00:00 2001 From: noriyotcp Date: Sat, 3 Oct 2020 02:47:33 +0900 Subject: [PATCH 1/2] Add Headown::Extractor which has logic to extract headers --- lib/headown.rb | 28 ++++++++++++++++++++++++++++ lib/headown/cli.rb | 27 ++------------------------- spec/headown/cli_spec.rb | 40 ---------------------------------------- spec/headown_spec.rb | 24 ++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 65 deletions(-) diff --git a/lib/headown.rb b/lib/headown.rb index eda66e0..604a8d4 100644 --- a/lib/headown.rb +++ b/lib/headown.rb @@ -1,4 +1,6 @@ require 'headown/version' +require 'open-uri' +require 'commonmarker' module Headown class NotMarkdownError < StandardError @@ -7,4 +9,30 @@ def initialize(file_path:) super(msg) end end + + class Extractor + attr_reader :headers + + def initialize(file_path) + file_data = URI.open(file_path, &:read) + extract_headers(file_data) + end + + private + + def extract_headers(file_data) + @headers = header_nodes(file_data).map { |node| build_header(node) } + end + + def header_nodes(data) + doc = CommonMarker.render_doc(data) + [].tap { |nodes| doc.walk { |node| nodes << node if node.type == :header } } + end + + def build_header(node) + text = '' + node.each { |subnode| text = subnode.string_content if subnode.type == :text } + '#' * node.header_level + " #{text}" + end + end end diff --git a/lib/headown/cli.rb b/lib/headown/cli.rb index e1b359c..442ee97 100644 --- a/lib/headown/cli.rb +++ b/lib/headown/cli.rb @@ -1,6 +1,4 @@ require 'thor' -require 'commonmarker' -require 'open-uri' module Headown class CLI < Thor @@ -9,34 +7,13 @@ class CLI < Thor def extract(file_path) raise Headown::NotMarkdownError.new(file_path: file_path) if File.extname(file_path) != '.md' - file_data = URI.open(file_path, &:read) - puts_headers(file_data) + extractor = Headown::Extractor.new(file_path) + puts extractor.headers rescue Headown::NotMarkdownError => e puts <<~MSG #{e.class}: #{e.message} MSG end - - private - - def header_nodes(data) - doc = CommonMarker.render_doc(data) - [].tap { |nodes| doc.walk { |node| nodes << node if node.type == :header } } - end - - def build_header(node) - text = '' - node.each { |subnode| text = subnode.string_content if subnode.type == :text } - '#' * node.header_level + " #{text}" - end - - def extract_headers(file_data) - header_nodes(file_data).map { |node| build_header(node) } - end - - def puts_headers(file_data) - puts extract_headers(file_data) - end end end diff --git a/spec/headown/cli_spec.rb b/spec/headown/cli_spec.rb index af28e4d..fb12e20 100644 --- a/spec/headown/cli_spec.rb +++ b/spec/headown/cli_spec.rb @@ -1,46 +1,6 @@ RSpec.describe Headown::CLI do let(:headown) { described_class.new } - describe '#extract_headers' do - context 'when it finds a markdown file' do - it 'extracts headers' do - file_data = URI.open('spec/headown/sample.md', &:read) - expect(headown.send(:extract_headers, file_data)).to eq [ - '# h1', - '## h2', - '### h3', - '#### h4', - '##### h5', - '###### h6' - ] - end - end - - context 'when it finds a markdown file online' do - it 'extracts headers' do - file_data = URI.open('https://raw.githubusercontent.com/noriyotcp/headown/main/spec/headown/sample.md', &:read) - expect(headown.send(:extract_headers, file_data)).to eq [ - '# h1', - '## h2', - '### h3', - '#### h4', - '##### h5', - '###### h6' - ] - end - end - - # cf. https://spec.commonmark.org/0.29/#atx-headings - describe 'interrupting paragraphs' do - context 'when ATX headings are not separated from surrounding content by blank lines' do - it 'extracts headers' do - file_data = URI.open('spec/headown/sample2.md', &:read) - expect(headown.send(:extract_headers, file_data)).to eq ['# baz'] - end - end - end - end - describe '#extract' do context 'when it can NOT find a markdown file' do it 'can NOT extract headers' do diff --git a/spec/headown_spec.rb b/spec/headown_spec.rb index 8453440..def3173 100644 --- a/spec/headown_spec.rb +++ b/spec/headown_spec.rb @@ -2,4 +2,28 @@ it 'has a version number' do expect(Headown::VERSION).not_to be nil end + + describe 'Headown::Extractor' do + subject { Headown::Extractor.new(file_path).headers } + + describe '#headers' do + context 'when it finds a markdown file' do + let(:file_path) { 'spec/headown/sample.md' } + it { is_expected.to eq ['# h1', '## h2', '### h3', '#### h4', '##### h5', '###### h6'] } + end + + context 'when it finds a markdown file online' do + let(:file_path) { 'https://raw.githubusercontent.com/noriyotcp/headown/main/spec/headown/sample.md' } + it { is_expected.to eq ['# h1', '## h2', '### h3', '#### h4', '##### h5', '###### h6'] } + end + + # cf. https://spec.commonmark.org/0.29/#atx-headings + describe 'interrupting paragraphs' do + context 'when ATX headings are not separated from surrounding content by blank lines' do + let(:file_path) { 'spec/headown/sample2.md' } + it { is_expected.to eq ['# baz'] } + end + end + end + end end From b4db0bbc2f27a355ec962fc21420bc009a08d527 Mon Sep 17 00:00:00 2001 From: noriyotcp Date: Sat, 3 Oct 2020 02:53:57 +0900 Subject: [PATCH 2/2] Raise Headown::NotMarkdownError in constructor in Headown::Extractor --- lib/headown.rb | 2 ++ lib/headown/cli.rb | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/headown.rb b/lib/headown.rb index 604a8d4..31f6a96 100644 --- a/lib/headown.rb +++ b/lib/headown.rb @@ -14,6 +14,8 @@ class Extractor attr_reader :headers def initialize(file_path) + raise Headown::NotMarkdownError.new(file_path: file_path) if File.extname(file_path) != '.md' + file_data = URI.open(file_path, &:read) extract_headers(file_data) end diff --git a/lib/headown/cli.rb b/lib/headown/cli.rb index 442ee97..ea07084 100644 --- a/lib/headown/cli.rb +++ b/lib/headown/cli.rb @@ -5,8 +5,6 @@ class CLI < Thor desc 'extract ', 'extract headers from file path' def extract(file_path) - raise Headown::NotMarkdownError.new(file_path: file_path) if File.extname(file_path) != '.md' - extractor = Headown::Extractor.new(file_path) puts extractor.headers rescue Headown::NotMarkdownError => e