123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436 |
- groups:
- - name: FSCrawler Configuration
- description: Configure FSCrawler
- - name: Network Configuration
- description: Configure Network for FSCrawler
- - name: Storage Configuration
- description: Configure Storage for FSCrawler
- - name: Labels Configuration
- description: Configure Labels for FSCrawler
- - name: Resources Configuration
- description: Configure Resources for FSCrawler
- questions:
- - variable: TZ
- group: FSCrawler Configuration
- label: Timezone
- schema:
- type: string
- default: Etc/UTC
- required: true
- $ref:
- - definitions/timezone
- - variable: fscrawler
- label: ""
- group: FSCrawler Configuration
- schema:
- type: dict
- attrs:
- - variable: image_selector
- label: Image
- description: |
- The image to use for FSCrawler.</br>
- Images with OCR support are a lot larger than images without OCR support.</br>
- Approximate image sizes:</br>
- - With OCR Support: 1.2GB</br>
- - Without OCR Support: 0.5GB
- schema:
- type: string
- default: "image"
- required: true
- enum:
- - value: "image"
- description: With OCR Support - Elasticsearch 7 and 8
- - value: "no_ocr_image"
- description: Without OCR Support - Elasticsearch 7 and 8
- - variable: job_name
- label: Job Name
- description: |
- The name of the FSCrawler job to run. </br>
- A _settings.yaml file in the directory named after the job name will have to be manually created.
- schema:
- type: string
- default: ""
- required: true
- - variable: loop
- label: Loop
- description: |
- The number of times to run the job.</br>
- https://fscrawler.readthedocs.io/en/latest/admin/cli-options.html#loop </br>
- -1 means run forever. </br>
- 0 means never run. </br>
- schema:
- type: int
- default: -1
- required: true
- min: -1
- - variable: restart
- label: Restart
- description: |
- Restart the job from the beginning.</br>
- https://fscrawler.readthedocs.io/en/latest/admin/cli-options.html#restart
- schema:
- type: boolean
- default: false
- - variable: additional_envs
- label: Additional Environment Variables
- schema:
- type: list
- default: []
- items:
- - variable: env
- label: Environment Variable
- schema:
- type: dict
- attrs:
- - variable: name
- label: Name
- schema:
- type: string
- required: true
- - variable: value
- label: Value
- schema:
- type: string
- - variable: network
- label: ""
- group: Network Configuration
- schema:
- type: dict
- attrs:
- - variable: rest_port
- label: REST Port
- description: |
- Enable Rest API Service for FSCrawler by setting bind mode to something other than None.</br>
- https://fscrawler.readthedocs.io/en/latest/admin/fs/rest.html</br>
- Additional configuration is needed in the job file. Check the Notes card
- after installation for more information.
- schema:
- type: dict
- attrs:
- - variable: bind_mode
- label: Port Bind Mode
- description: |
- The port bind mode.</br>
- - Publish: The port will be published on the host for external access.</br>
- - Expose: The port will be exposed for inter-container communication.</br>
- - None: The port will not be exposed or published.</br>
- Note: If the Dockerfile defines an EXPOSE directive,
- the port will still be exposed for inter-container communication regardless of this setting.
- schema:
- type: string
- default: ""
- enum:
- - value: "published"
- description: Publish port on the host for external access
- - value: "exposed"
- description: Expose port for inter-container communication
- - value: ""
- description: None
- - variable: port_number
- label: Port Number
- schema:
- type: int
- default: 30084
- min: 1
- max: 65535
- required: true
- - variable: host_ips
- label: Host IPs
- description: IPs on the host to bind this port
- schema:
- type: list
- show_if: [["bind_mode", "=", "published"]]
- default: []
- items:
- - variable: host_ip
- label: Host IP
- schema:
- type: string
- required: true
- $ref:
- - definitions/node_bind_ip
- - variable: host_network
- label: Host Network
- description: |
- Bind to the host network. It's recommended to keep this disabled.
- schema:
- type: boolean
- default: false
- - variable: storage
- label: ""
- group: Storage Configuration
- schema:
- type: dict
- attrs:
- - variable: jobs
- label: FSCrawler Jobs Storage
- description: The path to store FSCrawler Jobs.
- schema:
- type: dict
- attrs:
- - variable: type
- label: Type
- description: |
- ixVolume: Is dataset created automatically by the system.</br>
- Host Path: Is a path that already exists on the system.
- schema:
- type: string
- required: true
- default: "ix_volume"
- enum:
- - value: "host_path"
- description: Host Path (Path that already exists on the system)
- - value: "ix_volume"
- description: ixVolume (Dataset created automatically by the system)
- - variable: ix_volume_config
- label: ixVolume Configuration
- description: The configuration for the ixVolume dataset.
- schema:
- type: dict
- show_if: [["type", "=", "ix_volume"]]
- $ref:
- - "normalize/ix_volume"
- attrs:
- - variable: acl_enable
- label: Enable ACL
- description: Enable ACL for the storage.
- schema:
- type: boolean
- default: false
- - variable: dataset_name
- label: Dataset Name
- description: The name of the dataset to use for storage.
- schema:
- type: string
- required: true
- hidden: true
- default: "jobs"
- - variable: acl_entries
- label: ACL Configuration
- schema:
- type: dict
- show_if: [["acl_enable", "=", true]]
- attrs: []
- - variable: host_path_config
- label: Host Path Configuration
- schema:
- type: dict
- show_if: [["type", "=", "host_path"]]
- attrs:
- - variable: acl_enable
- label: Enable ACL
- description: Enable ACL for the storage.
- schema:
- type: boolean
- default: false
- - variable: acl
- label: ACL Configuration
- schema:
- type: dict
- show_if: [["acl_enable", "=", true]]
- attrs: []
- $ref:
- - "normalize/acl"
- - variable: path
- label: Host Path
- description: The host path to use for storage.
- schema:
- type: hostpath
- show_if: [["acl_enable", "=", false]]
- required: true
- - variable: additional_storage
- label: Additional Storage
- schema:
- type: list
- default: []
- items:
- - variable: storageEntry
- label: Storage Entry
- schema:
- type: dict
- attrs:
- - variable: type
- label: Type
- description: |
- ixVolume: Is dataset created automatically by the system.</br>
- Host Path: Is a path that already exists on the system.</br>
- SMB Share: Is a SMB share that is mounted to as a volume.
- schema:
- type: string
- required: true
- default: "ix_volume"
- enum:
- - value: "host_path"
- description: Host Path (Path that already exists on the system)
- - value: "ix_volume"
- description: ixVolume (Dataset created automatically by the system)
- - value: "cifs"
- description: SMB/CIFS Share (Mounts a volume to a SMB share)
- - variable: read_only
- label: Read Only
- description: Mount the volume as read only.
- schema:
- type: boolean
- default: false
- - variable: mount_path
- label: Mount Path
- description: The path inside the container to mount the storage.
- schema:
- type: path
- required: true
- - variable: host_path_config
- label: Host Path Configuration
- schema:
- type: dict
- show_if: [["type", "=", "host_path"]]
- attrs:
- - variable: acl_enable
- label: Enable ACL
- description: Enable ACL for the storage.
- schema:
- type: boolean
- default: false
- - variable: acl
- label: ACL Configuration
- schema:
- type: dict
- show_if: [["acl_enable", "=", true]]
- attrs: []
- $ref:
- - "normalize/acl"
- - variable: path
- label: Host Path
- description: The host path to use for storage.
- schema:
- type: hostpath
- show_if: [["acl_enable", "=", false]]
- required: true
- - variable: ix_volume_config
- label: ixVolume Configuration
- description: The configuration for the ixVolume dataset.
- schema:
- type: dict
- show_if: [["type", "=", "ix_volume"]]
- $ref:
- - "normalize/ix_volume"
- attrs:
- - variable: acl_enable
- label: Enable ACL
- description: Enable ACL for the storage.
- schema:
- type: boolean
- default: false
- - variable: dataset_name
- label: Dataset Name
- description: The name of the dataset to use for storage.
- schema:
- type: string
- required: true
- default: "storage_entry"
- - variable: acl_entries
- label: ACL Configuration
- schema:
- type: dict
- show_if: [["acl_enable", "=", true]]
- attrs: []
- - variable: cifs_config
- label: SMB Configuration
- description: The configuration for the SMB dataset.
- schema:
- type: dict
- show_if: [["type", "=", "cifs"]]
- attrs:
- - variable: server
- label: Server
- description: The server to mount the SMB share.
- schema:
- type: string
- required: true
- - variable: path
- label: Path
- description: The path to mount the SMB share.
- schema:
- type: string
- required: true
- - variable: username
- label: Username
- description: The username to use for the SMB share.
- schema:
- type: string
- required: true
- - variable: password
- label: Password
- description: The password to use for the SMB share.
- schema:
- type: string
- required: true
- private: true
- - variable: domain
- label: Domain
- description: The domain to use for the SMB share.
- schema:
- type: string
- - variable: labels
- label: ""
- group: Labels Configuration
- schema:
- type: list
- default: []
- items:
- - variable: label
- label: Label
- schema:
- type: dict
- attrs:
- - variable: key
- label: Key
- schema:
- type: string
- required: true
- - variable: value
- label: Value
- schema:
- type: string
- required: true
- - variable: containers
- label: Containers
- description: Containers where the label should be applied
- schema:
- type: list
- items:
- - variable: container
- label: Container
- schema:
- type: string
- required: true
- enum:
- - value: fscrawler
- description: fscrawler
- - variable: resources
- label: ""
- group: Resources Configuration
- schema:
- type: dict
- attrs:
- - variable: limits
- label: Limits
- schema:
- type: dict
- attrs:
- - variable: cpus
- label: CPUs
- description: CPUs limit for FSCrawler.
- schema:
- type: int
- default: 2
- required: true
- - variable: memory
- label: Memory (in MB)
- description: Memory limit for FSCrawler.
- schema:
- type: int
- default: 4096
- required: true
|