jspider.properties
上传用户:qing5858
上传日期:2015-10-27
资源大小:6056k
文件大小:10k
- # -----------------------------------------------------------------------------
# JSpider Main Configuration File
# -----------------------------------------------------------------------------
#
# $Id: jspider.properties,v 1.23 2003/04/03 16:10:26 vanrogu Exp $
#
# This is the main JSpider configuration file. Most system configuration is
# done through this file.
# Configuration info that is to be defined on a per-site or per-plugin basis
# can be found in the specific configuration files.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Proxy Configuration
# -----------------------------------------------------------------------------
# This section controls the proxy server/firewall configuration. If you want
# JSpider to be able to access websites on the internet, and you have to pass
# a proxyserver, you can configure this here.
# These are general proxy settings, if some hosts are accessible without using
# a proxy server, you can configure not to use a proxy server for these hosts
# in the host-specific configuration file.
#
# PROPERTIES :
# jspider.proxy.use
# determines whether a proxy server should be used (true/false)
# the other jspider.proxy.* settings are only applicable if
# jspider.proxy.use is set to true
#
# jspider.proxy.host
# DNS name or IP address of the proxy server to use
#
# jspider.proxy.port
# port the proxy server is listening on
#
# jspider.proxy.authenticate
# whether authentication should be used (true/false)
#
# jspider.proxy.user
# user account on the proxy server to be used
# only applicable if jspider.proxy.authenticate=true
#
# jspider.proxy.password
# password of the account on the proxy server
# only applicable if jspider.proxy.authenticate=true
#
# -----------------------------------------------------------------------------
jspider.proxy.use=@jspider.proxy.use@
jspider.proxy.host=@jspider.proxy.host@
jspider.proxy.port=@jspider.proxy.port@
jspider.proxy.authenticate=@jspider.proxy.authenticate@
jspider.proxy.user=@jspider.proxy.user@
jspider.proxy.password=@jspider.proxy.password@
# -----------------------------------------------------------------------------
# Storage Configuration
# -----------------------------------------------------------------------------
#
# Configuration of the storage subsystem of JSpider, that will keep track of
# data, resources found, relationships between resources and statusses of
# resources.
# By default, an in-memory storage implementation is used.
#
# PROPERTIES :
#
# jspider.storage.provider
# name of the Java class that will provide the Storage implementation to be
# used by jspider.
# by passing a class that implements the StorageProvider interface, you
# can easily plug in your own storage implementation.
#
# -----------------------------------------------------------------------------
jspider.storage.provider=net.javacoding.jspider.core.storage.memory.InMemoryStorageProvider
#jspider.storage.provider=net.javacoding.jspider.core.storage.jdbc.JdbcStorageProvider
jspider.storage.config.driver=com.mysql.jdbc.Driver
jspider.storage.config.url=jdbc:mysql://localhost/jspider
jspider.storage.config.user=
jspider.storage.config.password=
# -----------------------------------------------------------------------------
# Task Scheduler Configuration
# -----------------------------------------------------------------------------
#
# Configuration for the task scheduler.
# Every job that needs to be carried out by JSpider is implemented as a Job,
# that is entered into the JobScheduler. The worker thread pools get their
# jobs from this Task Scheduler.
#
# PROPERTIES :
#
# jspider.taskscheduler.provider
# class name of the class that will provider the TaskScheduler interface
# implementation to be used by JSpider.
# This way, you can easily plug in your own TaskScheduler implementation.
#
# jspider.taskscheduler.monitoring.enabled
# whether monitoring events should be generated regarding the status of the
# task scheduler. This way, you can monitor the number and type of jobs
# that are waiting to be executed, or already executed, and find bottlenecks
# in the system or simple estimate the finish time.
#
# jspider.taskscheduler.monitoring.interval
# the number of milliseconds between two monitoring events with the status
# of the task scheduler.
#
# -----------------------------------------------------------------------------
jspider.taskscheduler.provider=net.javacoding.jspider.core.task.impl.DefaultSchedulerProvider
jspider.taskscheduler.monitoring.enabled=true
jspider.taskscheduler.monitoring.interval=1000
# -----------------------------------------------------------------------------
# Threading Configuration
# -----------------------------------------------------------------------------
#
# Determines the number of threads in the different worker thread pools used
# by JSpider. This settings will affect the system's performance and
# scalibility.
# Normally, the number of spider threads (threads that go out and fetch data
# from the web servers, needs to be much higher than the number of parser
# threads (threads that inspect the data and find new URLs in it).
#
# PROPERTIES:
#
# jspider.threads.spiders
# the number of threads assigned to fetching data over the internet.
#
# jspider.threads.spiders.monitoring.enabled
# whether monitoring events should be generated about the status of this
# thread pool. If enabled, monitoring events will be dispatched by the
# system to allow you to monitor the load on the threadpool. (true/false)
#
# jspider.threads.spiders.monitoring.interval
# the number of milliseconds between two snapshots of the status of the
# threadpool with the spider threads. At each interval, a monitoring
# event will be triggered notidfying you of the status of this threadpool.
# only used if jspider.threads.spiders.monitoring.enabled=true
#
# jspider.threads.thinkers
# the number of threads assigned to inspecting fetched data and searching
# for new URLs and sites in it.
# A very low number (1 or 2) should really be enough in most circumstances
#
# jspider.threads.thinkers.monitoring.enabled
# whether monitoring events should be generated about the status of this
# thread pool. If enabled, monitoring events will be dispatched by the
# system to allow you to monitor the load on the threadpool. (true/false)
#
# jspider.threads.thinkers.monitoring.interval
# the number of milliseconds between two snapshots of the status of the
# threadpool with the parser threads. At each interval, a monitoring
# event will be triggered notidfying you of the status of this threadpool.
# only used if jspider.threads.thinkers.monitoring.enabled=true
#
# -----------------------------------------------------------------------------
jspider.threads.spiders.count=5
jspider.threads.spiders.monitoring.enabled=true
jspider.threads.spiders.monitoring.interval=1000
jspider.threads.thinkers.count=1
jspider.threads.thinkers.monitoring.enabled=true
jspider.threads.thinkers.monitoring.interval=1000
# -----------------------------------------------------------------------------
# User Agent Configuration
# -----------------------------------------------------------------------------
#
# This configuration controls the User Agent that is used by JSpider.
# If you access hosts on the internet, please use the default one, as this
# allows webmasters to configure the allowed parts of the site via the site's
# robots.txt file.
# If you change this setting, you can test sites that generate different output
# for different user agents with JSpider.
#
# PROPERTIES :
#
# jspider.userAgent
# the User-Agent that JSpider will send along with each HTTP request.
#
# -----------------------------------------------------------------------------
#jspider.userAgent=JSpider (http://j-spider.sourceforge.net)
# -----------------------------------------------------------------------------
# Logging Configuration
# -----------------------------------------------------------------------------
#
# Configuration of the logging subsystem of JSpider. Some very basic
# information (mostly startup info, configuration decisions, errors, warnings,
# etc... are not dispatched via the event subsystem, but logged via the logging
# interface.
# By default, a log implementation that writes all output to the console from
# which JSpider is started is provided.
#
# PROPERTIES:
#
# jspider.log.provider
# class that implements the LogProvider interface that will provide the Log
# implementation to be used by JSpider.
# This way, you can easily plug in other ways of logging in the system
#
# -----------------------------------------------------------------------------
jspider.log.provider=net.javacoding.jspider.core.logging.impl.CommonsLoggingLogProvider
# -----------------------------------------------------------------------------
# Rules Configuration
# -----------------------------------------------------------------------------
#
# Tells the system what rules to apply upon encountered URLs.
# The rules in this file are applied upon ALL urls, but you can configure the
# rules on a per-site level in the site's configuration file.
#
# PROPERTIES:
#
# jspider.rules.spider.count
# The number of rules that will be applied on all URLs before being taken
# into account for spidering (fetching)
#
# jspider.rules.spider.<number>
# the name of the class that provides the rule implementation by which the
# urls should be handled
#
# jspider.rules.parser.count
# The number of rules that will be applied on all URLs before being taken
# into account for parsing
#
# jspider.rules.parser.<number>
# the name of the class that provides the rule implementation by which the
# urls should be handled
#
# -----------------------------------------------------------------------------
jspider.rules.spider.count=1
jspider.rules.spider.1.class=net.javacoding.jspider.mod.rule.OnlyHttpProtocolRule
jspider.rules.parser.count=1
jspider.rules.parser.1.class=net.javacoding.jspider.mod.rule.TextHtmlMimeTypeOnlyRule