Package proxy :: Module apacheHandler
[hide private]
[frames] | no frames]

Source Code for Module proxy.apacheHandler

  1  # Main entry point for apacheServer.py for the Spacewalk Proxy 
  2  # and/or SSL Redirect Server. 
  3  # 
  4  # Copyright (c) 2008--2015 Red Hat, Inc. 
  5  # 
  6  # This software is licensed to you under the GNU General Public License, 
  7  # version 2 (GPLv2). There is NO WARRANTY for this software, express or 
  8  # implied, including the implied warranties of MERCHANTABILITY or FITNESS 
  9  # FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2 
 10  # along with this software; if not, see 
 11  # http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. 
 12  # 
 13  # Red Hat trademarks are not licensed under GPLv2. No permission is 
 14  # granted to use or replicate Red Hat trademarks that are incorporated 
 15  # in this software or its documentation. 
 16  # 
 17  # ----------------------------------------------------------------------------- 
 18   
 19  # language imports 
 20  import os 
 21  import base64 
 22  import xmlrpclib 
 23  import re 
 24   
 25  # common imports 
 26  from spacewalk.common.rhnConfig import CFG 
 27  from spacewalk.common.rhnLog import log_debug, log_error 
 28  from spacewalk.common.rhnApache import rhnApache 
 29  from spacewalk.common.rhnTB import Traceback 
 30  from spacewalk.common.rhnException import rhnFault, rhnException 
 31  from spacewalk.common import rhnFlags, apache 
 32  from spacewalk.common.rhnLib import setHeaderValue 
 33  from spacewalk.common import byterange 
 34   
 35  from rhn import rpclib, connections 
 36  from rhn.UserDictCase import UserDictCase 
 37  from rhnConstants import HEADER_ACTUAL_URI, HEADER_EFFECTIVE_URI, \ 
 38      HEADER_CHECKSUM, SCHEME_HTTP, SCHEME_HTTPS, URI_PREFIX_KS, \ 
 39      URI_PREFIX_KS_CHECKSUM, COMPONENT_BROKER, COMPONENT_REDIRECT 
 40   
 41  # local imports 
 42  from proxy.rhnProxyAuth import get_proxy_auth 
43 44 45 -def getComponentType(req):
46 """ 47 Are we a 'proxy.broker' or a 'proxy.redirect'. 48 49 Checks to see if the last visited Spacewalk Proxy was itself. If so, we 50 are a 'proxy.redirect'. If not, then we must be a 'proxy.broker'. 51 """ 52 53 # NOTE: X-RHN-Proxy-Auth described in broker/rhnProxyAuth.py 54 if not req.headers_in.has_key('X-RHN-Proxy-Auth'): 55 # Request comes from a client, Must be the broker 56 return COMPONENT_BROKER 57 58 # pull server id out of "t:o:k:e:n:hostname1,t:o:k:e:n:hostname2,..." 59 proxy_auth = req.headers_in['X-RHN-Proxy-Auth'] 60 last_auth = proxy_auth.split(',')[-1] 61 last_visited = last_auth.split(':')[0] 62 proxy_server_id = get_proxy_auth().getProxyServerId() 63 # is it the same box? 64 try: 65 log_debug(4, "last_visited", last_visited, "; proxy server id", 66 proxy_server_id) 67 # pylint: disable=W0702 68 except: 69 # pylint: disable=W0702 70 # incase called prior to the log files being initialized 71 pass 72 if last_visited == proxy_server_id: 73 # XXX this assumes redirect runs on the same box as the broker 74 return COMPONENT_REDIRECT 75 76 return COMPONENT_BROKER
77
78 79 -class apacheHandler(rhnApache):
80 81 """ Main apache entry point for the proxy. """ 82 _lang_catalog = "proxy" 83
84 - def __init__(self):
85 rhnApache.__init__(self) 86 self.input = None 87 self._component = None
88
89 - def set_component(self, component):
90 self._component = component
91 92 @staticmethod
93 - def _setSessionToken(headers):
94 # extended to always return a token, even if an empty one 95 ret = rhnApache._setSessionToken(headers) 96 if ret: 97 log_debug(4, "Returning", ret) 98 return ret 99 100 # Session token did not verify, we have an empty auth token 101 token = UserDictCase() 102 rhnFlags.set("AUTH_SESSION_TOKEN", token) 103 return token
104
105 - def headerParserHandler(self, req):
106 """ Name-munging if request came from anaconda in response to a 107 kickstart. """ 108 ret = rhnApache.headerParserHandler(self, req) 109 if ret != apache.OK: 110 return ret 111 112 self.input = rpclib.transports.Input(req.headers_in) 113 114 # Before we allow the main handler code to commence, we'll first check 115 # to see if this request came from anaconda in response to a kickstart. 116 # If so, we'll need to do some special name-munging before we continue. 117 118 ret = self._transformKickstartRequest(req) 119 return ret
120
121 - def _transformKickstartRequest(self, req):
122 """ If necessary, this routine will transform a "tinified" anaconda- 123 generated kickstart request into a normalized form capable of being 124 cached effectively by squid. 125 126 This is done by first making a HEAD request 127 to the satellite for the purpose of updating the kickstart progress and 128 retrieving an MD5 sum for the requested file. We then replace the 129 tinyURL part of the URI with the retrieved MD5 sum. This effectively 130 removes session-specific information while allowing us to still cache 131 based on the uniqueness of the file. 132 """ 133 # Kickstart requests only come in the form of a GET, so short-circuit 134 # if that is not the case. 135 136 if (req.method != "GET"): 137 return apache.OK 138 139 log_debug(6, "URI", req.uri) 140 log_debug(6, "COMPONENT", self._component) 141 142 # If we're a broker, we know that this is a kickstart request from 143 # anaconda by checking if the URI begins with /ty/*, otherwise just 144 # return. If we're an SSL redirect, we check that the URI begins with 145 # /ty-cksm/*, otherwise return. 146 147 if self._component == COMPONENT_BROKER: 148 if req.uri.startswith(URI_PREFIX_KS): 149 log_debug(3, "Found a kickstart URI: %s" % req.uri) 150 return self._transformKsRequestForBroker(req) 151 elif self._component == COMPONENT_REDIRECT: 152 if req.uri.startswith(URI_PREFIX_KS_CHECKSUM): 153 log_debug(3, "Found a kickstart checksum URI: %s" % req.uri) 154 return self._transformKsRequestForRedirect(req) 155 156 return apache.OK
157
158 - def _transformKsRequestForBroker(self, req):
159 160 # Get the checksum for the requested resource from the satellite. 161 162 (status, checksum) = self._querySatelliteForChecksum(req) 163 if status != apache.OK or not checksum: 164 return status 165 166 # If we got this far, we have the checksum. Create a new URI based on 167 # the checksum. 168 169 newURI = self._generateCacheableKickstartURI(req.uri, checksum) 170 if not newURI: 171 # Couldn't create a cacheable URI, log an error and revert to 172 # BZ 158236 behavior. 173 174 log_error('Could not create cacheable ks URI from "%s"' % req.uri) 175 return apache.OK 176 177 # Now we must embed the old URI into a header in the original request 178 # so that the SSL Redirect has it available if the resource has not 179 # been cached yet. We will also embed a header that holds the new URI, 180 # so that the content handler can use it later. 181 182 log_debug(3, "Generated new kickstart URI: %s" % newURI) 183 req.headers_in[HEADER_ACTUAL_URI] = req.uri 184 req.headers_in[HEADER_EFFECTIVE_URI] = newURI 185 186 return apache.OK
187 188 @staticmethod
190 191 # If we don't get the actual URI in the headers, we'll decline the 192 # request. 193 194 if not req.headers_in or not req.headers_in.has_key(HEADER_ACTUAL_URI): 195 log_error("Kickstart request header did not include '%s'" 196 % HEADER_ACTUAL_URI) 197 return apache.DECLINED 198 199 # The original URI is embedded in the headers under X-RHN-ActualURI. 200 # Remove it, and place it in the X-RHN-EffectiveURI header. 201 202 req.headers_in[HEADER_EFFECTIVE_URI] = req.headers_in[HEADER_ACTUAL_URI] 203 log_debug(3, "Reverting to old URI: %s" % req.headers_in[HEADER_ACTUAL_URI]) 204 205 return apache.OK
206
207 - def _querySatelliteForChecksum(self, req):
208 """ Sends a HEAD request to the satellite for the purpose of obtaining 209 the checksum for the requested resource. A (status, checksum) 210 tuple is returned. If status is not apache.OK, checksum will be 211 None. If status is OK, and a checksum is not returned, the old 212 BZ 158236 behavior will be used. 213 """ 214 scheme = SCHEME_HTTP 215 if req.server.port == 443: 216 scheme = SCHEME_HTTPS 217 log_debug(6, "Using scheme: %s" % scheme) 218 219 # Initiate a HEAD request to the satellite to retrieve the MD5 sum. 220 # Actually, we make the request through our own proxy first, so 221 # that we don't accidentally bypass necessary authentication 222 # routines. Since it's a HEAD request, the proxy will forward it 223 # directly to the satellite like it would a POST request. 224 225 host = "127.0.0.1" 226 port = req.connection.local_addr[1] 227 228 connection = self._createConnection(host, port, scheme) 229 if not connection: 230 # Couldn't form the connection. Log an error and revert to the 231 # old BZ 158236 behavior. In order to be as robust as possible, 232 # we won't fail here. 233 234 log_error('HEAD req - Could not create connection to %s://%s:%s' 235 % (scheme, host, str(port))) 236 return (apache.OK, None) 237 238 # We obtained the connection successfully. Construct the URL that 239 # we'll connect to. 240 241 pingURL = "%s://%s:%s%s" % (scheme, host, str(port), req.uri) 242 log_debug(6, "Ping URI: %s" % pingURL) 243 244 hdrs = UserDictCase() 245 for k in req.headers_in.keys(): 246 if k.lower() != 'range': # we want checksum of whole file 247 hdrs[k] = re.sub(r'\n(?![ \t])|\r(?![ \t\n])', '', str(req.headers_in[k])) 248 249 log_debug(9, "Using existing headers_in", hdrs) 250 connection.request("HEAD", pingURL, None, hdrs) 251 log_debug(6, "Connection made, awaiting response.") 252 253 # Get the response. 254 255 response = connection.getresponse() 256 log_debug(6, "Received response status: %s" % response.status) 257 connection.close() 258 259 if (response.status != apache.HTTP_OK) and (response.status != apache.HTTP_PARTIAL_CONTENT): 260 # Something bad happened. Return back back to the client. 261 262 log_debug(1, "HEAD req - Received error code in reponse: %s" 263 % (str(response.status))) 264 return (response.status, None) 265 266 # The request was successful. Dig the MD5 checksum out of the headers. 267 268 responseHdrs = response.msg 269 if not responseHdrs: 270 # No headers?! This shouldn't happen at all. But if it does, 271 # revert to the old # BZ 158236 behavior. 272 273 log_error("HEAD response - No HTTP headers!") 274 return (apache.OK, None) 275 276 if not responseHdrs.has_key(HEADER_CHECKSUM): 277 # No checksum was provided. This could happen if a newer 278 # proxy is talking to an older satellite. To keep things 279 # running smoothly, we'll just revert to the BZ 158236 280 # behavior. 281 282 log_debug(1, "HEAD response - No X-RHN-Checksum field provided!") 283 return (apache.OK, None) 284 285 checksum = responseHdrs[HEADER_CHECKSUM] 286 287 return (apache.OK, checksum)
288 289 @staticmethod
290 - def _generateCacheableKickstartURI(oldURI, checksum):
291 """ 292 This routine computes a new cacheable URI based on the old URI and the 293 checksum. For example, if the checksum is 1234ABCD and the oldURI was: 294 295 /ty/AljAmCEt/RedHat/base/comps.xml 296 297 Then, the new URI will be: 298 299 /ty-cksm/1234ABCD/RedHat/base/comps.xml 300 301 If for some reason the new URI could not be generated, return None. 302 """ 303 304 newURI = URI_PREFIX_KS_CHECKSUM + checksum 305 306 # Strip the first two path pieces off of the oldURI. 307 308 uriParts = oldURI.split('/') 309 numParts = 0 310 for part in uriParts: 311 if len(part) is not 0: # Account for double slashes ("//") 312 numParts += 1 313 if numParts > 2: 314 newURI += "/" + part 315 316 # If the URI didn't have enough parts, return None. 317 318 if numParts <= 2: 319 newURI = None 320 321 return newURI
322 323 @staticmethod
324 - def _createConnection(host, port, scheme):
325 params = {'host': host, 326 'port': port} 327 328 if CFG.has_key('timeout'): 329 params['timeout'] = CFG.TIMEOUT 330 331 if scheme == SCHEME_HTTPS: 332 conn_class = connections.HTTPSConnection 333 else: 334 conn_class = connections.HTTPConnection 335 336 return conn_class(**params)
337
338 - def handler(self, req):
339 """ Main handler to handle all requests pumped through this server. """ 340 341 ret = rhnApache.handler(self, req) 342 if ret != apache.OK: 343 return ret 344 345 log_debug(4, "METHOD", req.method) 346 log_debug(4, "PATH_INFO", req.path_info) 347 log_debug(4, "URI (full path info)", req.uri) 348 log_debug(4, "Component", self._component) 349 350 if self._component == COMPONENT_BROKER: 351 from broker import rhnBroker 352 handlerObj = rhnBroker.BrokerHandler(req) 353 else: 354 # Redirect 355 from redirect import rhnRedirect 356 handlerObj = rhnRedirect.RedirectHandler(req) 357 358 try: 359 ret = handlerObj.handler() 360 except rhnFault, e: 361 return self.response(req, e) 362 363 if rhnFlags.test("NeedEncoding"): 364 return self.response(req, ret) 365 366 # All good; we expect ret to be an HTTP return code 367 if not isinstance(ret, type(1)): 368 raise rhnException("Invalid status code type %s" % type(ret)) 369 log_debug(1, "Leaving with status code %s" % ret) 370 return ret
371 372 @staticmethod
373 - def normalize(response):
374 """ convert a response to the right type for passing back to 375 rpclib.xmlrpclib.dumps 376 """ 377 if isinstance(response, xmlrpclib.Fault): 378 return response 379 return (response,)
380 381 @staticmethod
382 - def response_file(req, response):
383 """ send a file out """ 384 log_debug(3, response.name) 385 # We may set the content type remotely 386 if rhnFlags.test("Content-Type"): 387 req.content_type = rhnFlags.get("Content-Type") 388 else: 389 # Safe default 390 req.content_type = "application/octet-stream" 391 392 # find out the size of the file 393 if response.length == 0: 394 response.file_obj.seek(0, 2) 395 file_size = response.file_obj.tell() 396 response.file_obj.seek(0, 0) 397 else: 398 file_size = response.length 399 400 success_response = apache.OK 401 response_size = file_size 402 403 # Serve up the requested byte range 404 if req.headers_in.has_key("Range"): 405 try: 406 range_start, range_end = \ 407 byterange.parse_byteranges(req.headers_in["Range"], 408 file_size) 409 response_size = range_end - range_start 410 req.headers_out["Content-Range"] = \ 411 byterange.get_content_range(range_start, range_end, file_size) 412 req.headers_out["Accept-Ranges"] = "bytes" 413 414 response.file_obj.seek(range_start) 415 416 # We'll want to send back a partial content rather than ok 417 # if this works 418 req.status = apache.HTTP_PARTIAL_CONTENT 419 success_response = apache.HTTP_PARTIAL_CONTENT 420 421 # For now we will just return the file file on the following exceptions 422 except byterange.InvalidByteRangeException: 423 pass 424 except byterange.UnsatisfyableByteRangeException: 425 pass 426 427 req.headers_out["Content-Length"] = str(response_size) 428 429 # if we loaded this from a real fd, set it as the X-Replace-Content 430 # check for "name" since sometimes we get xmlrpclib.transports.File's that have 431 # a stringIO as the file_obj, and they dont have a .name (ie, 432 # fileLists...) 433 if response.name: 434 req.headers_out["X-Package-FileName"] = response.name 435 436 xrepcon = req.headers_in.has_key("X-Replace-Content-Active") \ 437 and rhnFlags.test("Download-Accelerator-Path") 438 if xrepcon: 439 fpath = rhnFlags.get("Download-Accelerator-Path") 440 log_debug(1, "Serving file %s" % fpath) 441 req.headers_out["X-Replace-Content"] = fpath 442 # Only set a byte rate if xrepcon is active 443 byte_rate = rhnFlags.get("QOS-Max-Bandwidth") 444 if byte_rate: 445 req.headers_out["X-Replace-Content-Throttle"] = str(byte_rate) 446 447 # send the headers 448 req.send_http_header() 449 450 if req.headers_in.has_key("Range"): 451 # and the file 452 read = 0 453 while read < response_size: 454 # We check the size here in case we're not asked for the entire file. 455 buf = response.read(CFG.BUFFER_SIZE) 456 if not buf: 457 break 458 try: 459 req.write(buf) 460 read = read + CFG.BUFFER_SIZE 461 except IOError: 462 if xrepcon: 463 # We're talking to a proxy, so don't bother to report 464 # a SIGPIPE 465 break 466 return apache.HTTP_BAD_REQUEST 467 response.close() 468 else: 469 if 'wsgi.file_wrapper' in req.headers_in: 470 req.output = req.headers_in['wsgi.file_wrapper'](response, CFG.BUFFER_SIZE) 471 else: 472 req.output = iter(lambda: response.read(CFG.BUFFER_SIZE), '') 473 return success_response
474
475 - def response(self, req, response):
476 """ send the response (common code) """ 477 478 # Send the xml-rpc response back 479 log_debug(5, "Response type", type(response)) 480 481 needs_xmlrpc_encoding = rhnFlags.test("NeedEncoding") 482 compress_response = rhnFlags.test("compress_response") 483 # Init an output object; we'll use it for sending data in various 484 # formats 485 if isinstance(response, rpclib.transports.File): 486 if not hasattr(response.file_obj, 'fileno') and compress_response: 487 # This is a StringIO that has to be compressed, so read it in 488 # memory; mark that we don't have to do any xmlrpc encoding 489 response = response.file_obj.read() 490 needs_xmlrpc_encoding = 0 491 else: 492 # Just treat is as a file 493 return self.response_file(req, response) 494 495 is_fault = 0 496 if isinstance(response, rhnFault): 497 if req.method == 'GET': 498 return self._response_fault_get(req, response.getxml()) 499 # Need to encode the response as xmlrpc 500 response = response.getxml() 501 is_fault = 1 502 # No compression 503 compress_response = 0 504 # This is an xmlrpc Fault, so we have to encode it 505 needs_xmlrpc_encoding = 1 506 507 output = rpclib.transports.Output() 508 509 if not is_fault: 510 # First, use the same encoding/transfer that the client used 511 output.set_transport_flags( 512 transfer=rpclib.transports.lookupTransfer(self.input.transfer), 513 encoding=rpclib.transports.lookupEncoding(self.input.encoding)) 514 515 if compress_response: 516 # check if we have to compress this result 517 log_debug(4, "Compression on for client version", self.clientVersion) 518 if self.clientVersion > 0: 519 output.set_transport_flags(output.TRANSFER_BINARY, 520 output.ENCODE_ZLIB) 521 else: # original clients had the binary transport support broken 522 output.set_transport_flags(output.TRANSFER_BASE64, 523 output.ENCODE_ZLIB) 524 525 # We simply add the transport options to the output headers 526 output.headers.update(rhnFlags.get('outputTransportOptions').dict()) 527 528 if needs_xmlrpc_encoding: 529 # Normalize the response 530 response = self.normalize(response) 531 try: 532 response = rpclib.xmlrpclib.dumps(response, methodresponse=1) 533 except TypeError, e: 534 log_debug(-1, "Error \"%s\" encoding response = %s" % (e, response)) 535 Traceback("apacheHandler.response", req, 536 extra="Error \"%s\" encoding response = %s" % (e, response), 537 severity="notification") 538 return apache.HTTP_INTERNAL_SERVER_ERROR 539 except Exception: # pylint: disable=E0012, W0703 540 # Uncaught exception; signal the error 541 Traceback("apacheHandler.response", req, 542 severity="unhandled") 543 return apache.HTTP_INTERNAL_SERVER_ERROR 544 545 # we're about done here, patch up the headers 546 output.process(response) 547 # Copy the rest of the fields 548 for k, v in output.headers.items(): 549 if k.lower() == 'content-type': 550 # Content-type 551 req.content_type = v 552 else: 553 setHeaderValue(req.headers_out, k, v) 554 555 if CFG.DEBUG == 4: 556 # I wrap this in an "if" so we don't parse a large file for no reason. 557 log_debug(4, "The response: %s[...SNIP (for sanity) SNIP...]%s" % 558 (response[:100], response[-100:])) 559 elif CFG.DEBUG >= 5: 560 # if you absolutely must have that whole response in the log file 561 log_debug(5, "The response: %s" % response) 562 563 # send the headers 564 req.send_http_header() 565 try: 566 # XXX: in case data is really large maybe we should split 567 # it in smaller chunks instead of blasting everything at 568 # once. Not yet a problem... 569 req.write(output.data) 570 except IOError: 571 # send_http_header is already sent, so it doesn't make a lot of 572 # sense to return a non-200 error; but there is no better solution 573 return apache.HTTP_BAD_REQUEST 574 del output 575 return apache.OK
576 577 @staticmethod
578 - def _response_fault_get(req, response):
579 req.headers_out["X-RHN-Fault-Code"] = str(response.faultCode) 580 faultString = base64.encodestring(response.faultString).strip() 581 # Split the faultString into multiple lines 582 for line in faultString.split('\n'): 583 req.headers_out.add("X-RHN-Fault-String", line.strip()) 584 # And then send all the other things 585 for k, v in rhnFlags.get('outputTransportOptions').items(): 586 setHeaderValue(req.headers_out, k, v) 587 return apache.HTTP_NOT_FOUND
588
589 - def cleanupHandler(self, req):
590 """ Clean up stuff before we close down the session when we are 591 called from apacheServer.Cleanup() 592 """ 593 594 log_debug(1) 595 self.input = None 596 # kill all of our child processes (if any) 597 while 1: 598 pid = status = -1 599 try: 600 (pid, status) = os.waitpid(-1, 0) 601 except OSError: 602 break 603 else: 604 log_error("Reaped child process %d with status %d" % (pid, status)) 605 ret = rhnApache.cleanupHandler(self, req) 606 return ret
607 608 # ============================================================================= 609