Unverified Commit 86748148 authored by Eugen Rochko's avatar Eugen Rochko Committed by GitHub
Browse files

Change tootctl to use inline parallelization instead of Sidekiq (#11776)

- Remove --background option
- Add --concurrency(=5) option
- Add progress bars
parent 9045f5e3
......@@ -77,6 +77,7 @@ gem 'rails-settings-cached', '~> 0.6'
gem 'redis', '~> 4.1', require: ['redis', 'redis/connection/hiredis']
gem 'mario-redis-lock', '~> 1.2', require: 'redis_lock'
gem 'rqrcode', '~> 0.10'
gem 'ruby-progressbar', '~> 1.10'
gem 'sanitize', '~> 5.1'
gem 'sidekiq', '~> 5.2'
gem 'sidekiq-scheduler', '~> 3.0'
......
......@@ -769,6 +769,7 @@ DEPENDENCIES
rspec-sidekiq (~> 3.0)
rubocop (~> 0.74)
rubocop-rails (~> 2.3)
ruby-progressbar (~> 1.10)
sanitize (~> 5.1)
sidekiq (~> 5.2)
sidekiq-bulk (~> 0.2.0)
......
......@@ -129,6 +129,7 @@ class MediaAttachment < ApplicationRecord
scope :unattached, -> { where(status_id: nil, scheduled_status_id: nil) }
scope :local, -> { where(remote_url: '') }
scope :remote, -> { where.not(remote_url: '') }
scope :cached, -> { remote.where.not(file_file_name: nil) }
default_scope { order(id: :asc) }
......
......@@ -43,6 +43,8 @@ class PreviewCard < ApplicationRecord
validates_attachment_size :image, less_than: LIMIT
remotable_attachment :image, LIMIT
scope :cached, -> { where.not(image_file_name: [nil, '']) }
before_save :extract_dimensions, if: :link?
def save_with_optional_image!
......
# frozen_string_literal: true
class Maintenance::DestroyMediaWorker
include Sidekiq::Worker
sidekiq_options queue: 'pull'
def perform(media_attachment_id)
media = media_attachment_id.is_a?(MediaAttachment) ? media_attachment_id : MediaAttachment.find(media_attachment_id)
media.destroy
rescue ActiveRecord::RecordNotFound
true
end
end
# frozen_string_literal: true
class Maintenance::RedownloadAccountMediaWorker
include Sidekiq::Worker
sidekiq_options queue: 'pull', retry: false
def perform(account_id)
account = account_id.is_a?(Account) ? account_id : Account.find(account_id)
account.reset_avatar!
account.reset_header!
account.save
rescue ActiveRecord::RecordNotFound
true
end
end
# frozen_string_literal: true
class Maintenance::UncacheMediaWorker
include Sidekiq::Worker
sidekiq_options queue: 'pull'
def perform(media_attachment_id)
media = media_attachment_id.is_a?(MediaAttachment) ? media_attachment_id : MediaAttachment.find(media_attachment_id)
return if media.file.blank?
media.file.destroy
media.save
rescue ActiveRecord::RecordNotFound
true
end
end
# frozen_string_literal: true
class Maintenance::UncachePreviewWorker
include Sidekiq::Worker
sidekiq_options queue: 'pull'
def perform(preview_card_id)
preview_card = PreviewCard.find(preview_card_id)
return if preview_card.image.blank?
preview_card.image.destroy
preview_card.save
rescue ActiveRecord::RecordNotFound
true
end
end
......@@ -7,6 +7,8 @@ require_relative 'cli_helper'
module Mastodon
class AccountsCLI < Thor
include CLIHelper
def self.exit_on_failure?
true
end
......@@ -26,18 +28,20 @@ module Mastodon
if options[:all]
processed = 0
delay = 0
scope = Account.local.without_suspended
progress = create_progress_bar(scope.count)
Account.local.without_suspended.find_in_batches do |accounts|
scope.find_in_batches do |accounts|
accounts.each do |account|
rotate_keys_for_account(account, delay)
progress.increment
processed += 1
say('.', :green, false)
end
delay += 5.minutes
end
say
progress.finish
say("OK, rotated keys for #{processed} accounts", :green)
elsif username.present?
rotate_keys_for_account(Account.find_local(username))
......@@ -206,6 +210,8 @@ module Mastodon
say('OK', :green)
end
option :concurrency, type: :numeric, default: 5, aliases: [:c]
option :verbose, type: :boolean, aliases: [:v]
option :dry_run, type: :boolean
desc 'cull', 'Remove remote accounts that no longer exist'
long_desc <<-LONG_DESC
......@@ -215,63 +221,45 @@ module Mastodon
Accounts that have had confirmed activity within the last week
are excluded from the checks.
Domains that are unreachable are not checked.
With the --dry-run option, no deletes will actually be carried
out.
LONG_DESC
def cull
skip_threshold = 7.days.ago
culled = 0
dry_run_culled = []
skip_domains = Set.new
dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
skip_domains = Concurrent::Set.new
Account.remote.where(protocol: :activitypub).partitioned.find_each do |account|
next if account.updated_at >= skip_threshold || (account.last_webfingered_at.present? && account.last_webfingered_at >= skip_threshold)
processed, culled = parallelize_with_progress(Account.remote.where(protocol: :activitypub).partitioned) do |account|
next if account.updated_at >= skip_threshold || (account.last_webfingered_at.present? && account.last_webfingered_at >= skip_threshold) || skip_domains.include?(account.domain)
code = 0
unless skip_domains.include?(account.domain)
begin
code = Request.new(:head, account.uri).perform(&:code)
rescue HTTP::ConnectionError
skip_domains << account.domain
rescue StandardError
next
end
begin
code = Request.new(:head, account.uri).perform(&:code)
rescue HTTP::ConnectionError
skip_domains << account.domain
end
if [404, 410].include?(code)
if options[:dry_run]
dry_run_culled << account.acct
else
SuspendAccountService.new.call(account, destroy: true)
end
culled += 1
say('+', :green, false)
SuspendAccountService.new.call(account, destroy: true) unless options[:dry_run]
1
else
account.touch # Touch account even during dry run to avoid getting the account into the window again
say('.', nil, false)
# Touch account even during dry run to avoid getting the account into the window again
account.touch
end
end
say
say("Removed #{culled} accounts. #{skip_domains.size} servers skipped#{dry_run}", skip_domains.empty? ? :green : :yellow)
say("Visited #{processed} accounts, removed #{culled}#{dry_run}", :green)
unless skip_domains.empty?
say('The following servers were not available during the check:', :yellow)
say('The following domains were not available during the check:', :yellow)
skip_domains.each { |domain| say(' ' + domain) }
end
unless dry_run_culled.empty?
say('The following accounts would have been deleted:', :green)
dry_run_culled.each { |account| say(' ' + account) }
end
end
option :all, type: :boolean
option :domain
option :concurrency, type: :numeric, default: 5, aliases: [:c]
option :verbose, type: :boolean, aliases: [:v]
option :dry_run, type: :boolean
desc 'refresh [USERNAME]', 'Fetch remote user data and files'
long_desc <<-LONG_DESC
Fetch remote user data and files for one or multiple accounts.
......@@ -280,21 +268,23 @@ module Mastodon
Through the --domain option, this can be narrowed down to a
specific domain only. Otherwise, a single remote account must
be specified with USERNAME.
All processing is done in the background through Sidekiq.
LONG_DESC
def refresh(username = nil)
dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
if options[:domain] || options[:all]
queued = 0
scope = Account.remote
scope = scope.where(domain: options[:domain]) if options[:domain]
scope.select(:id).reorder(nil).find_in_batches do |accounts|
Maintenance::RedownloadAccountMediaWorker.push_bulk(accounts.map(&:id))
queued += accounts.size
processed, = parallelize_with_progress(scope) do |account|
next if options[:dry_run]
account.reset_avatar!
account.reset_header!
account.save
end
say("Scheduled refreshment of #{queued} accounts", :green, true)
say("Refreshed #{processed} accounts#{dry_run}", :green, true)
elsif username.present?
username, domain = username.split('@')
account = Account.find_remote(username, domain)
......@@ -304,76 +294,53 @@ module Mastodon
exit(1)
end
Maintenance::RedownloadAccountMediaWorker.perform_async(account.id)
say('OK', :green)
unless options[:dry_run]
account.reset_avatar!
account.reset_header!
account.save
end
say("OK#{dry_run}", :green)
else
say('No account(s) given', :red)
exit(1)
end
end
desc 'follow ACCT', 'Make all local accounts follow account specified by ACCT'
long_desc <<-LONG_DESC
Make all local accounts follow another local account specified by ACCT.
ACCT should be the username only.
LONG_DESC
def follow(acct)
if acct.include? '@'
say('Target account name should not contain a target instance, since it has to be a local account.', :red)
exit(1)
end
target_account = ResolveAccountService.new.call(acct)
processed = 0
failed = 0
option :concurrency, type: :numeric, default: 5, aliases: [:c]
option :verbose, type: :boolean, aliases: [:v]
desc 'follow USERNAME', 'Make all local accounts follow account specified by USERNAME'
def follow(username)
target_account = Account.find_local(username)
if target_account.nil?
say("Target account (#{acct}) could not be resolved", :red)
say('No such account', :red)
exit(1)
end
Account.local.without_suspended.find_each do |account|
begin
FollowService.new.call(account, target_account)
processed += 1
say('.', :green, false)
rescue StandardError
failed += 1
say('.', :red, false)
end
processed, = parallelize_with_progress(Account.local.without_suspended) do |account|
FollowService.new.call(account, target_account)
end
say("OK, followed target from #{processed} accounts, skipped #{failed}", :green)
say("OK, followed target from #{processed} accounts", :green)
end
option :concurrency, type: :numeric, default: 5, aliases: [:c]
option :verbose, type: :boolean, aliases: [:v]
desc 'unfollow ACCT', 'Make all local accounts unfollow account specified by ACCT'
long_desc <<-LONG_DESC
Make all local accounts unfollow an account specified by ACCT. ACCT can be
a simple username, in case of a local user. It can also be in the format
username@domain, in case of a remote user.
LONG_DESC
def unfollow(acct)
target_account = Account.find_remote(*acct.split('@'))
processed = 0
failed = 0
if target_account.nil?
say("Target account (#{acct}) was not found", :red)
say('No such account', :red)
exit(1)
end
target_account.followers.local.find_each do |account|
begin
UnfollowService.new.call(account, target_account)
processed += 1
say('.', :green, false)
rescue StandardError
failed += 1
say('.', :red, false)
end
parallelize_with_progress(target_account.followers.local) do |account|
UnfollowService.new.call(account, target_account)
end
say("OK, unfollowed target from #{processed} accounts, skipped #{failed}", :green)
say("OK, unfollowed target from #{processed} accounts", :green)
end
option :follows, type: :boolean, default: false
......@@ -396,51 +363,50 @@ module Mastodon
account = Account.find_local(username)
if account.nil?
say('No user with such username', :red)
say('No such account', :red)
exit(1)
end
if options[:follows]
processed = 0
failed = 0
total = 0
total += Account.where(id: ::Follow.where(account: account).select(:target_account_id)).count if options[:follows]
total += Account.where(id: ::Follow.where(target_account: account).select(:account_id)).count if options[:followers]
progress = create_progress_bar(total)
processed = 0
say("Unfollowing #{account.username}'s followees, this might take a while...")
if options[:follows]
scope = Account.where(id: ::Follow.where(account: account).select(:target_account_id))
Account.where(id: ::Follow.where(account: account).select(:target_account_id)).find_each do |target_account|
scope.find_each do |target_account|
begin
UnfollowService.new.call(account, target_account)
rescue => e
progress.log pastel.red("Error processing #{target_account.id}: #{e}")
ensure
progress.increment
processed += 1
say('.', :green, false)
rescue StandardError
failed += 1
say('.', :red, false)
end
end
BootstrapTimelineWorker.perform_async(account.id)
say("OK, unfollowed #{processed} followees, skipped #{failed}", :green)
end
if options[:followers]
processed = 0
failed = 0
say("Removing #{account.username}'s followers, this might take a while...")
scope = Account.where(id: ::Follow.where(target_account: account).select(:account_id))
Account.where(id: ::Follow.where(target_account: account).select(:account_id)).find_each do |target_account|
scope.find_each do |target_account|
begin
UnfollowService.new.call(target_account, account)
rescue => e
progress.log pastel.red("Error processing #{target_account.id}: #{e}")
ensure
progress.increment
processed += 1
say('.', :green, false)
rescue StandardError
failed += 1
say('.', :red, false)
end
end
say("OK, removed #{processed} followers, skipped #{failed}", :green)
end
progress.finish
say("Processed #{processed} relationships", :green, true)
end
option :number, type: :numeric, aliases: [:n]
......
......@@ -6,6 +6,8 @@ require_relative 'cli_helper'
module Mastodon
class CacheCLI < Thor
include CLIHelper
def self.exit_on_failure?
true
end
......@@ -16,6 +18,8 @@ module Mastodon
say('OK', :green)
end
option :concurrency, type: :numeric, default: 5, aliases: [:c]
option :verbose, type: :boolean, aliases: [:v]
desc 'recount TYPE', 'Update hard-cached counters'
long_desc <<~LONG_DESC
Update hard-cached counters of TYPE by counting referenced
......@@ -25,32 +29,24 @@ module Mastodon
size of the database.
LONG_DESC
def recount(type)
processed = 0
case type
when 'accounts'
Account.local.includes(:account_stat).find_each do |account|
processed, = parallelize_with_progress(Account.local.includes(:account_stat)) do |account|
account_stat = account.account_stat
account_stat.following_count = account.active_relationships.count
account_stat.followers_count = account.passive_relationships.count
account_stat.statuses_count = account.statuses.where.not(visibility: :direct).count
account_stat.save if account_stat.changed?
processed += 1
say('.', :green, false)
end
when 'statuses'
Status.includes(:status_stat).find_each do |status|
processed, = parallelize_with_progress(Status.includes(:status_stat)) do |status|
status_stat = status.status_stat
status_stat.replies_count = status.replies.where.not(visibility: :direct).count
status_stat.reblogs_count = status.reblogs.count
status_stat.favourites_count = status.favourites.count
status_stat.save if status_stat.changed?
processed += 1
say('.', :green, false)
end
else
say("Unknown type: #{type}", :red)
......
......@@ -7,3 +7,52 @@ ActiveRecord::Base.logger = dev_null
ActiveJob::Base.logger = dev_null
HttpLog.configuration.logger = dev_null
Paperclip.options[:log] = false
module Mastodon
module CLIHelper
def create_progress_bar(total = nil)
ProgressBar.create(total: total, format: '%c/%u |%b%i| %e')
end
def parallelize_with_progress(scope)
ActiveRecord::Base.configurations[Rails.env]['pool'] = options[:concurrency]
progress = create_progress_bar(scope.count)
pool = Concurrent::FixedThreadPool.new(options[:concurrency])
total = Concurrent::AtomicFixnum.new(0)
aggregate = Concurrent::AtomicFixnum.new(0)
scope.reorder(nil).find_in_batches do |items|
futures = []
items.each do |item|
futures << Concurrent::Future.execute(executor: pool) do
ActiveRecord::Base.connection_pool.with_connection do
begin
progress.log("Processing #{item.id}") if options[:verbose]
result = yield(item)
aggregate.increment(result) if result.is_a?(Integer)
rescue => e
progress.log pastel.red("Error processing #{item.id}: #{e}")
ensure
progress.increment
end
end
end
end
total.increment(items.size)
futures.map(&:value)
end
progress.finish
[total.value, aggregate.value]
end
def pastel
@pastel ||= Pastel.new
end
end
end
......@@ -7,10 +7,14 @@ require_relative 'cli_helper'
module Mastodon
class DomainsCLI < Thor
include CLIHelper
def self.exit_on_failure?
true
end
option :concurrency, type: :numeric, default: 5, aliases: [:c]
option :verbose, type: :boolean, aliases: [:v]
option :dry_run, type: :boolean
option :whitelist_mode, type: :boolean
desc 'purge [DOMAIN]', 'Remove accounts from a DOMAIN without a trace'
......@@ -24,7 +28,6 @@ module Mastodon
are removed from the database.
LONG_DESC
def purge(domain = nil)
removed = 0
dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
scope = begin
......@@ -38,25 +41,22 @@ module Mastodon
end
end
scope.find_each do |account|
processed, = parallelize_with_progress(scope) do |account|
SuspendAccountService.new.call(account, destroy: true) unless options[:dry_run]
removed += 1
say('.', :green, false)
end
DomainBlock.where(domain: domain).destroy_all unless options[:dry_run]
say
say("Removed #{removed} accounts#{dry_run}", :green)
say("Removed #{processed} accounts#{dry_run}", :green)
custom_emojis = CustomEmoji.where(domain: domain)
custom_emojis_count = custom_emojis.count
custom_emojis.destroy_all unless options[:dry_run]
say("Removed #{custom_emojis_count} custom emojis", :green)
end
option :concurrency, type: :numeric, default: 50, aliases: [:c]
option :silent, type: :boolean, default: false, aliases: [:s]
option :format, type: :string, default: 'summary', aliases: [:f]
option :exclude_suspended, type: :boolean, default: false, aliases: [:x]
desc 'crawl [START]', 'Crawl all known peers, optionally beginning at START'
......@@ -69,8 +69,6 @@ module Mastodon
The --concurrency (-c) option controls the number of threads performing HTTP
requests at the same time. More threads means the crawl may complete faster.
The --silent (-s) option controls progress output.
The --format (-f) option controls how the data is displayed at the end. By
default (`summary`), a summary of the statistics is returned. The other options
are `domains`, which returns a newline-delimited list of all discovered peers,
......@@ -87,6 +85,7 @@ module Mastodon
start_at = Time.now.to_f
seed = start ? [start] : Account.remote.domains
blocked_domains = Regexp.new('\\.?' + DomainBlock.where(severity: 1).pluck(:domain).join('|') + '$')
progress = create_progress_bar
pool = Concurrent::ThreadPoolExecutor.new(min_threads: 0, max_threads: options[:concurrency], idletime: 10, auto_terminate: true, max_queue: 0)