arelle.WebCache#

See COPYRIGHT.md for copyright information.

For SEC EDGAR data access see: https://www.sec.gov/os/accessing-edgar-data e.g., User-Agent: Sample Company Name AdminContact@.com

Module Contents#

Classes#

Functions#

Data#

API#

arelle.WebCache.addServerWebCache#

None

arelle.WebCache.DIRECTORY_INDEX_FILE#

‘!~DirectoryIndex~!’

arelle.WebCache.INF#

‘float(…)’

arelle.WebCache.RETRIEVAL_RETRY_COUNT#

5

arelle.WebCache.HTTP_USER_AGENT#

‘format(…)’

arelle.WebCache.proxyDirFmt(httpProxyTuple)#
arelle.WebCache.proxyTuple(url)#
arelle.WebCache.lastModifiedTime(headers)#
class arelle.WebCache.WebCache(cntlr: arelle.Cntlr.Cntlr, httpProxyTuple: tuple[bool, str, str, str, str] | None)#

Initialization

default_timeout#

None

property timeout#
property recheck#
property logDownloads#
saveUrlCheckTimes() None#
property noCertificateCheck#
property httpUserAgent#
property httpsRedirect#
resetProxies(httpProxyTuple)#
normalizeFilepath(filepath: str, url: str, cacheDir: str = None) str#

Perform any necessary transformations to filepath.

Parameters:
  • filepath – Filepath to normalize.

  • url – Original URL (for http/https redirect).

  • cacheDir – Cache root directory.

Returns:

Normalized filepath.

normalizeUrl(url: Optional[str], base: Optional[str] = None) Any#
encodeForFilename(pathpart)#
urlToCacheFilepath(url: str, cacheDir: str | None = None) str#

Converts url into the corresponding cache filepath in `cacheDir.

Parameters:
  • url – URL to convert.

  • cacheDir – Cache root directory.

Returns:

Cache filepath.

cacheFilepathToUrl(cacheFilepath: str, cacheDir: str | None = None) str#
getfilename(url: str | None, base: str | None = None, reload: bool = False, checkModifiedTime: bool = False, normalize: bool = False, filenameOnly: bool = False) str | None#
_checkIfNewerOnWeb(url: str, filepath: str) bool#
Parameters:
  • url – URL to retrieve web timestamp from

  • filepath – Filepath to retrieve local timestamp from

Returns:

static _getTimeString(timeValue: time.time) str#
Parameters:

timeValue

Returns:

UTC-formatted string representation of timeValue

static _quotedUrl(url: str) str#
Parameters:

url

Returns:

url with scheme-specific-part quoted except for parameter separators

_downloadFile(url: str, filepath: str, retrievingDueToRecheckInterval: bool = False, retryCount: int = 5) bool#

Downloads the file at url to a temporary location before copying it to filepath.

Parameters:
  • url – Web resource to download.

  • filepath – End destination for downloaded file.

  • retrievingDueToRecheckInterval – Determines how errors are handled when download is part of a cache recheck.

  • retryCount – Number of times to retry download.

Returns:

Whether filepath should now be used.

internetRecheckFailedRecovery(url: str, err: str | Exception, timeNowStr: str) None#
reportProgress(blockCount, blockSize, totalSize)#
clear()#
getheaders(url)#
geturl(url)#
retrieve(url, filename=None, filestream=None, reporthook=None, data=None)#