diff options
Diffstat (limited to 'new/lib')
-rw-r--r-- | new/lib/crys.rb | 2 | ||||
-rw-r--r-- | new/lib/crys/batch_builder.rb | 19 | ||||
-rw-r--r-- | new/lib/crys/builder.rb | 23 | ||||
-rw-r--r-- | new/lib/crys/pages_db_manager.rb | 39 | ||||
-rw-r--r-- | new/lib/crys/processors/html_processor.rb | 4 | ||||
-rw-r--r-- | new/lib/crys/processors/processed_page.rb | 4 | ||||
-rw-r--r-- | new/lib/crys/processors/rss_processor.rb | 4 | ||||
-rw-r--r-- | new/lib/crys/processors/yaml_batch_processor.rb | 45 |
8 files changed, 107 insertions, 33 deletions
diff --git a/new/lib/crys.rb b/new/lib/crys.rb index f1aac2d8..dd7a7df3 100644 --- a/new/lib/crys.rb +++ b/new/lib/crys.rb @@ -4,6 +4,8 @@ require 'bundler/setup' require_relative 'crys/processors/html_processor' require_relative 'crys/processors/rss_processor' +require_relative 'crys/processors/yaml_batch_processor' + require_relative 'crys/processors/processed_page' require_relative 'crys/server' diff --git a/new/lib/crys/batch_builder.rb b/new/lib/crys/batch_builder.rb index 43dbec62..d8f1d7b6 100644 --- a/new/lib/crys/batch_builder.rb +++ b/new/lib/crys/batch_builder.rb @@ -2,30 +2,29 @@ module Crys class BatchBuilder - def initialize(pages_db:, output_dir:, image_dir:, builder_class:) - @pages_db = pages_db + def initialize(pages_db_manager:, output_dir:, image_dir:, builder_class:) + @pages_db_manager = pages_db_manager @output_dir = output_dir @image_dir = image_dir @builder_class = builder_class + end def build - db.pages.each do |page| + pages_db_manager.pages.each do |page| builder_class.new( - file_path: page.file_path, + file_path: page.source, output_dir: output_dir, - image_dir: image_dir + image_dir: image_dir, + pages_db_manager: pages_db_manager ).build - puts "processed: #{page.filename}" + puts "processed: #{page.filename}" end end private - attr_reader :pages_db, :output_dir, :image_dir, :builder_class + attr_reader :pages_db_manager, :output_dir, :image_dir, :builder_class - def db - pages_db.new - end end end diff --git a/new/lib/crys/builder.rb b/new/lib/crys/builder.rb index ee4fb8ce..123935a6 100644 --- a/new/lib/crys/builder.rb +++ b/new/lib/crys/builder.rb @@ -2,10 +2,11 @@ module Crys class Builder - def initialize(file_path:, output_dir:, image_dir:) + def initialize(file_path:, output_dir:, image_dir:, pages_db_manager:) @file_path = file_path @output_dir = output_dir @image_dir = image_dir + @pages_db_manager = pages_db_manager end def build @@ -13,17 +14,12 @@ module Crys process_content process_assets - - upsert_pages_db end private - attr_reader :file_path, :output_dir, :processor, :db, :image_dir + attr_reader :file_path, :output_dir, :processor, :db, :image_dir, :pages_db_manager - def pages_db_manager - Crys::PagesDbManager.new - end def process_assets processor.assets.each do |asset| @@ -34,12 +30,17 @@ module Crys def process_content processor.parsed_pages.each do |page| + path = output_dir + "/" + File.dirname(page.filename) + FileUtils.mkdir_p(path) + content = page.content filename = page.filename output_path = "#{output_dir}/#{filename}" - + pages_db_manager.add_page(page) File.write(output_path, content) end + + pages_db_manager.save end def processor_class @@ -48,6 +49,8 @@ module Crys html_processor when /xml.rb$/ rss_processor + when /yaml$/ + yaml_batch_processor else raise StandardError, "No processor for #{file_path}" end @@ -61,8 +64,8 @@ module Crys Crys::RssProcessor end - def upsert_pages_db - pages_db_manager.add_page(processor) + def yaml_batch_processor + Crys::YamlBatchProcessor end end end diff --git a/new/lib/crys/pages_db_manager.rb b/new/lib/crys/pages_db_manager.rb index 029b0352..d8da7e70 100644 --- a/new/lib/crys/pages_db_manager.rb +++ b/new/lib/crys/pages_db_manager.rb @@ -3,16 +3,33 @@ module Crys class PagesDbManager PAGES_KEY = :pages - PAGE = Struct.new(:filename, :file_path, :last_update, :in_rss, keyword_init: true) + PAGE = Struct.new( + :filename, + :source, - def db_path; end + :last_updated_at, + :created_at, - def add_page(processor) - @processor = processor - hash = Digest::MD5.hexdigest(processor.file_path) + :in_rss, + :in_all, + + keyword_init: true) + + + def initialize(db_path:) + @db_path = db_path + end + + + def add_page(page) + @page = page + hash = Digest::MD5.hexdigest(page.source) known_page = known_page(hash) upsert_db(hash, known_page) + end + + def save save_db end @@ -24,7 +41,7 @@ module Crys private - attr_reader :hash, :processor + attr_reader :hash, :page, :db_path def db @db ||= YAML.load_file( @@ -34,11 +51,13 @@ module Crys end def upsert_db(hash, known_page) - record = PAGE.new(known_page || { filename: processor.filename, - file_path: processor.file_path, - in_rss: true }) + record = PAGE.new(known_page || { filename: page.filename, + source: page.source, + created_at: Time.now, + in_rss: true, + in_all: true}) - record.last_update = Time.now + record.last_updated_at = page.last_updated_at @db[PAGES_KEY][hash] = record.to_h end diff --git a/new/lib/crys/processors/html_processor.rb b/new/lib/crys/processors/html_processor.rb index 5dd1387f..eccc748b 100644 --- a/new/lib/crys/processors/html_processor.rb +++ b/new/lib/crys/processors/html_processor.rb @@ -18,7 +18,9 @@ module Crys [ ProcessedPage.new( filename: filename, - content: html_content + content: html_content, + source: file_path, + last_updated_at: Time.now ) ] end diff --git a/new/lib/crys/processors/processed_page.rb b/new/lib/crys/processors/processed_page.rb index 5bab3566..8627c10e 100644 --- a/new/lib/crys/processors/processed_page.rb +++ b/new/lib/crys/processors/processed_page.rb @@ -1,6 +1,8 @@ module Crys ProcessedPage = Struct.new( :filename, - :content + :content, + :source, + :last_updated_at ) end diff --git a/new/lib/crys/processors/rss_processor.rb b/new/lib/crys/processors/rss_processor.rb index 4806c594..8254bf4a 100644 --- a/new/lib/crys/processors/rss_processor.rb +++ b/new/lib/crys/processors/rss_processor.rb @@ -17,7 +17,9 @@ module Crys [ ProcessedPage.new( filename: rss.filename, - content: rss.content + content: rss.content, + source: file_path, + last_updated_at: Time.now ) ] end diff --git a/new/lib/crys/processors/yaml_batch_processor.rb b/new/lib/crys/processors/yaml_batch_processor.rb new file mode 100644 index 00000000..ef0257b8 --- /dev/null +++ b/new/lib/crys/processors/yaml_batch_processor.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require "yaml" + +require_relative 'mixins/common_functions' +require_relative 'mixins/common_parts' + +module Crys + class YamlBatchProcessor + def initialize(file_path:, image_dir:) + @file_path = file_path + rss_file = File.read(file_path) + + @assets = [] + @image_dir = image_dir # just for api consistency + end + + def parsed_pages + pages.map do |page| + ProcessedPage.new( + filename: page[:filename], + content: "all your base are belong to us", + source: file_path + "#" + page[:uid].to_s , + last_updated_at: page[:last_updated_at] + ) + end + end + + def filename + rss.filename + end + + attr_reader :assets, :file_path + + private + + def pages + @pages ||= YAML.load_file(file_path, + permitted_classes: [Time, Symbol] + ) + end + + attr_reader :rss + end +end |