session.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. """PipSession and supporting code, containing all pip-specific
  2. network request configuration and behavior.
  3. """
  4. # The following comment should be removed at some point in the future.
  5. # mypy: disallow-untyped-defs=False
  6. import email.utils
  7. import json
  8. import logging
  9. import mimetypes
  10. import os
  11. import platform
  12. import sys
  13. import warnings
  14. from pip._vendor import requests, six, urllib3
  15. from pip._vendor.cachecontrol import CacheControlAdapter
  16. from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter
  17. from pip._vendor.requests.models import Response
  18. from pip._vendor.requests.structures import CaseInsensitiveDict
  19. from pip._vendor.six.moves.urllib import parse as urllib_parse
  20. from pip._vendor.urllib3.exceptions import InsecureRequestWarning
  21. from pip import __version__
  22. from pip._internal.network.auth import MultiDomainBasicAuth
  23. from pip._internal.network.cache import SafeFileCache
  24. # Import ssl from compat so the initial import occurs in only one place.
  25. from pip._internal.utils.compat import HAS_TLS, ipaddress, ssl
  26. from pip._internal.utils.filesystem import check_path_owner
  27. from pip._internal.utils.glibc import libc_ver
  28. from pip._internal.utils.misc import (
  29. build_url_from_netloc,
  30. get_installed_version,
  31. parse_netloc,
  32. )
  33. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  34. from pip._internal.utils.urls import url_to_path
  35. if MYPY_CHECK_RUNNING:
  36. from typing import (
  37. Iterator, List, Optional, Tuple, Union,
  38. )
  39. from pip._internal.models.link import Link
  40. SecureOrigin = Tuple[str, str, Optional[Union[int, str]]]
  41. logger = logging.getLogger(__name__)
  42. # Ignore warning raised when using --trusted-host.
  43. warnings.filterwarnings("ignore", category=InsecureRequestWarning)
  44. SECURE_ORIGINS = [
  45. # protocol, hostname, port
  46. # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC)
  47. ("https", "*", "*"),
  48. ("*", "localhost", "*"),
  49. ("*", "127.0.0.0/8", "*"),
  50. ("*", "::1/128", "*"),
  51. ("file", "*", None),
  52. # ssh is always secure.
  53. ("ssh", "*", "*"),
  54. ] # type: List[SecureOrigin]
  55. # These are environment variables present when running under various
  56. # CI systems. For each variable, some CI systems that use the variable
  57. # are indicated. The collection was chosen so that for each of a number
  58. # of popular systems, at least one of the environment variables is used.
  59. # This list is used to provide some indication of and lower bound for
  60. # CI traffic to PyPI. Thus, it is okay if the list is not comprehensive.
  61. # For more background, see: https://github.com/pypa/pip/issues/5499
  62. CI_ENVIRONMENT_VARIABLES = (
  63. # Azure Pipelines
  64. 'BUILD_BUILDID',
  65. # Jenkins
  66. 'BUILD_ID',
  67. # AppVeyor, CircleCI, Codeship, Gitlab CI, Shippable, Travis CI
  68. 'CI',
  69. # Explicit environment variable.
  70. 'PIP_IS_CI',
  71. )
  72. def looks_like_ci():
  73. # type: () -> bool
  74. """
  75. Return whether it looks like pip is running under CI.
  76. """
  77. # We don't use the method of checking for a tty (e.g. using isatty())
  78. # because some CI systems mimic a tty (e.g. Travis CI). Thus that
  79. # method doesn't provide definitive information in either direction.
  80. return any(name in os.environ for name in CI_ENVIRONMENT_VARIABLES)
  81. def user_agent():
  82. """
  83. Return a string representing the user agent.
  84. """
  85. data = {
  86. "installer": {"name": "pip", "version": __version__},
  87. "python": platform.python_version(),
  88. "implementation": {
  89. "name": platform.python_implementation(),
  90. },
  91. }
  92. if data["implementation"]["name"] == 'CPython':
  93. data["implementation"]["version"] = platform.python_version()
  94. elif data["implementation"]["name"] == 'PyPy':
  95. if sys.pypy_version_info.releaselevel == 'final':
  96. pypy_version_info = sys.pypy_version_info[:3]
  97. else:
  98. pypy_version_info = sys.pypy_version_info
  99. data["implementation"]["version"] = ".".join(
  100. [str(x) for x in pypy_version_info]
  101. )
  102. elif data["implementation"]["name"] == 'Jython':
  103. # Complete Guess
  104. data["implementation"]["version"] = platform.python_version()
  105. elif data["implementation"]["name"] == 'IronPython':
  106. # Complete Guess
  107. data["implementation"]["version"] = platform.python_version()
  108. if sys.platform.startswith("linux"):
  109. from pip._vendor import distro
  110. distro_infos = dict(filter(
  111. lambda x: x[1],
  112. zip(["name", "version", "id"], distro.linux_distribution()),
  113. ))
  114. libc = dict(filter(
  115. lambda x: x[1],
  116. zip(["lib", "version"], libc_ver()),
  117. ))
  118. if libc:
  119. distro_infos["libc"] = libc
  120. if distro_infos:
  121. data["distro"] = distro_infos
  122. if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
  123. data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
  124. if platform.system():
  125. data.setdefault("system", {})["name"] = platform.system()
  126. if platform.release():
  127. data.setdefault("system", {})["release"] = platform.release()
  128. if platform.machine():
  129. data["cpu"] = platform.machine()
  130. if HAS_TLS:
  131. data["openssl_version"] = ssl.OPENSSL_VERSION
  132. setuptools_version = get_installed_version("setuptools")
  133. if setuptools_version is not None:
  134. data["setuptools_version"] = setuptools_version
  135. # Use None rather than False so as not to give the impression that
  136. # pip knows it is not being run under CI. Rather, it is a null or
  137. # inconclusive result. Also, we include some value rather than no
  138. # value to make it easier to know that the check has been run.
  139. data["ci"] = True if looks_like_ci() else None
  140. user_data = os.environ.get("PIP_USER_AGENT_USER_DATA")
  141. if user_data is not None:
  142. data["user_data"] = user_data
  143. return "{data[installer][name]}/{data[installer][version]} {json}".format(
  144. data=data,
  145. json=json.dumps(data, separators=(",", ":"), sort_keys=True),
  146. )
  147. class LocalFSAdapter(BaseAdapter):
  148. def send(self, request, stream=None, timeout=None, verify=None, cert=None,
  149. proxies=None):
  150. pathname = url_to_path(request.url)
  151. resp = Response()
  152. resp.status_code = 200
  153. resp.url = request.url
  154. try:
  155. stats = os.stat(pathname)
  156. except OSError as exc:
  157. resp.status_code = 404
  158. resp.raw = exc
  159. else:
  160. modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
  161. content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
  162. resp.headers = CaseInsensitiveDict({
  163. "Content-Type": content_type,
  164. "Content-Length": stats.st_size,
  165. "Last-Modified": modified,
  166. })
  167. resp.raw = open(pathname, "rb")
  168. resp.close = resp.raw.close
  169. return resp
  170. def close(self):
  171. pass
  172. class InsecureHTTPAdapter(HTTPAdapter):
  173. def cert_verify(self, conn, url, verify, cert):
  174. conn.cert_reqs = 'CERT_NONE'
  175. conn.ca_certs = None
  176. class PipSession(requests.Session):
  177. timeout = None # type: Optional[int]
  178. def __init__(self, *args, **kwargs):
  179. """
  180. :param trusted_hosts: Domains not to emit warnings for when not using
  181. HTTPS.
  182. """
  183. retries = kwargs.pop("retries", 0)
  184. cache = kwargs.pop("cache", None)
  185. trusted_hosts = kwargs.pop("trusted_hosts", []) # type: List[str]
  186. index_urls = kwargs.pop("index_urls", None)
  187. super(PipSession, self).__init__(*args, **kwargs)
  188. # Namespace the attribute with "pip_" just in case to prevent
  189. # possible conflicts with the base class.
  190. self.pip_trusted_origins = [] # type: List[Tuple[str, Optional[int]]]
  191. # Attach our User Agent to the request
  192. self.headers["User-Agent"] = user_agent()
  193. # Attach our Authentication handler to the session
  194. self.auth = MultiDomainBasicAuth(index_urls=index_urls)
  195. # Create our urllib3.Retry instance which will allow us to customize
  196. # how we handle retries.
  197. retries = urllib3.Retry(
  198. # Set the total number of retries that a particular request can
  199. # have.
  200. total=retries,
  201. # A 503 error from PyPI typically means that the Fastly -> Origin
  202. # connection got interrupted in some way. A 503 error in general
  203. # is typically considered a transient error so we'll go ahead and
  204. # retry it.
  205. # A 500 may indicate transient error in Amazon S3
  206. # A 520 or 527 - may indicate transient error in CloudFlare
  207. status_forcelist=[500, 503, 520, 527],
  208. # Add a small amount of back off between failed requests in
  209. # order to prevent hammering the service.
  210. backoff_factor=0.25,
  211. )
  212. # Check to ensure that the directory containing our cache directory
  213. # is owned by the user current executing pip. If it does not exist
  214. # we will check the parent directory until we find one that does exist.
  215. if cache and not check_path_owner(cache):
  216. logger.warning(
  217. "The directory '%s' or its parent directory is not owned by "
  218. "the current user and the cache has been disabled. Please "
  219. "check the permissions and owner of that directory. If "
  220. "executing pip with sudo, you may want sudo's -H flag.",
  221. cache,
  222. )
  223. cache = None
  224. # We want to _only_ cache responses on securely fetched origins. We do
  225. # this because we can't validate the response of an insecurely fetched
  226. # origin, and we don't want someone to be able to poison the cache and
  227. # require manual eviction from the cache to fix it.
  228. if cache:
  229. secure_adapter = CacheControlAdapter(
  230. cache=SafeFileCache(cache),
  231. max_retries=retries,
  232. )
  233. else:
  234. secure_adapter = HTTPAdapter(max_retries=retries)
  235. # Our Insecure HTTPAdapter disables HTTPS validation. It does not
  236. # support caching (see above) so we'll use it for all http:// URLs as
  237. # well as any https:// host that we've marked as ignoring TLS errors
  238. # for.
  239. insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
  240. # Save this for later use in add_insecure_host().
  241. self._insecure_adapter = insecure_adapter
  242. self.mount("https://", secure_adapter)
  243. self.mount("http://", insecure_adapter)
  244. # Enable file:// urls
  245. self.mount("file://", LocalFSAdapter())
  246. for host in trusted_hosts:
  247. self.add_trusted_host(host, suppress_logging=True)
  248. def add_trusted_host(self, host, source=None, suppress_logging=False):
  249. # type: (str, Optional[str], bool) -> None
  250. """
  251. :param host: It is okay to provide a host that has previously been
  252. added.
  253. :param source: An optional source string, for logging where the host
  254. string came from.
  255. """
  256. if not suppress_logging:
  257. msg = 'adding trusted host: {!r}'.format(host)
  258. if source is not None:
  259. msg += ' (from {})'.format(source)
  260. logger.info(msg)
  261. host_port = parse_netloc(host)
  262. if host_port not in self.pip_trusted_origins:
  263. self.pip_trusted_origins.append(host_port)
  264. self.mount(build_url_from_netloc(host) + '/', self._insecure_adapter)
  265. if not host_port[1]:
  266. # Mount wildcard ports for the same host.
  267. self.mount(
  268. build_url_from_netloc(host) + ':',
  269. self._insecure_adapter
  270. )
  271. def iter_secure_origins(self):
  272. # type: () -> Iterator[SecureOrigin]
  273. for secure_origin in SECURE_ORIGINS:
  274. yield secure_origin
  275. for host, port in self.pip_trusted_origins:
  276. yield ('*', host, '*' if port is None else port)
  277. def is_secure_origin(self, location):
  278. # type: (Link) -> bool
  279. # Determine if this url used a secure transport mechanism
  280. parsed = urllib_parse.urlparse(str(location))
  281. origin_protocol, origin_host, origin_port = (
  282. parsed.scheme, parsed.hostname, parsed.port,
  283. )
  284. # The protocol to use to see if the protocol matches.
  285. # Don't count the repository type as part of the protocol: in
  286. # cases such as "git+ssh", only use "ssh". (I.e., Only verify against
  287. # the last scheme.)
  288. origin_protocol = origin_protocol.rsplit('+', 1)[-1]
  289. # Determine if our origin is a secure origin by looking through our
  290. # hardcoded list of secure origins, as well as any additional ones
  291. # configured on this PackageFinder instance.
  292. for secure_origin in self.iter_secure_origins():
  293. secure_protocol, secure_host, secure_port = secure_origin
  294. if origin_protocol != secure_protocol and secure_protocol != "*":
  295. continue
  296. try:
  297. # We need to do this decode dance to ensure that we have a
  298. # unicode object, even on Python 2.x.
  299. addr = ipaddress.ip_address(
  300. origin_host
  301. if (
  302. isinstance(origin_host, six.text_type) or
  303. origin_host is None
  304. )
  305. else origin_host.decode("utf8")
  306. )
  307. network = ipaddress.ip_network(
  308. secure_host
  309. if isinstance(secure_host, six.text_type)
  310. # setting secure_host to proper Union[bytes, str]
  311. # creates problems in other places
  312. else secure_host.decode("utf8") # type: ignore
  313. )
  314. except ValueError:
  315. # We don't have both a valid address or a valid network, so
  316. # we'll check this origin against hostnames.
  317. if (
  318. origin_host and
  319. origin_host.lower() != secure_host.lower() and
  320. secure_host != "*"
  321. ):
  322. continue
  323. else:
  324. # We have a valid address and network, so see if the address
  325. # is contained within the network.
  326. if addr not in network:
  327. continue
  328. # Check to see if the port matches.
  329. if (
  330. origin_port != secure_port and
  331. secure_port != "*" and
  332. secure_port is not None
  333. ):
  334. continue
  335. # If we've gotten here, then this origin matches the current
  336. # secure origin and we should return True
  337. return True
  338. # If we've gotten to this point, then the origin isn't secure and we
  339. # will not accept it as a valid location to search. We will however
  340. # log a warning that we are ignoring it.
  341. logger.warning(
  342. "The repository located at %s is not a trusted or secure host and "
  343. "is being ignored. If this repository is available via HTTPS we "
  344. "recommend you use HTTPS instead, otherwise you may silence "
  345. "this warning and allow it anyway with '--trusted-host %s'.",
  346. origin_host,
  347. origin_host,
  348. )
  349. return False
  350. def request(self, method, url, *args, **kwargs):
  351. # Allow setting a default timeout on a session
  352. kwargs.setdefault("timeout", self.timeout)
  353. # Dispatch the actual request
  354. return super(PipSession, self).request(method, url, *args, **kwargs)