Node.js 在 http.request 循环中处理内存不足

2024-05-07

在我的 Node.js 服务器中，我无法弄清楚为什么它会耗尽内存。我的node.js 服务器为它收到的每个http 请求发出一个远程http 请求，因此我尝试使用下面的示例脚本来复制该问题，该脚本也耗尽了内存。

仅当 for 循环中的迭代次数非常高时才会发生这种情况。

从我的角度来看，该问题与 Node.js 正在对远程 http 请求进行排队这一事实有关。如何避免这种情况？

这是示例脚本：

(function() {
  var http, i, mypost, post_data;
  http = require('http');
  post_data = 'signature=XXX%7CPSFA%7Cxxxxx_value%7CMyclass%7CMysubclass%7CMxxxxx&schedule=schedule_name_6569&company=XXXX';
  mypost = function(post_data, cb) {
    var post_options, req;
    post_options = {
      host: 'myhost.com',
      port: 8000,
      path: '/set_xxxx',
      method: 'POST',
      headers: {
        'Content-Length': post_data.length
      }
    };
    req = http.request(post_options, function(res) {
      var res_data;
      res.setEncoding('utf-8');
      res_data = '';
      res.on('data', function(chunk) {
        return res_data += chunk;
      });
      return res.on('end', function() {
        return cb();
      });
    });
    req.on('error', function(e) {
      return console.debug('TM problem with request: ' + e.message);
    });
    req.write(post_data);
    return req.end;
  };
  for (i = 1; i <= 1000000; i++) {
    mypost(post_data, function() {});
  }
}).call(this);


$ node -v
v0.4.9
$ node sample.js
FATAL ERROR: CALL_AND_RETRY_2 Allocation failed - process out of memory

提前致谢

古尔登PT

限制进入服务器的请求流

可以防止内置过载Server及其 HTTP/HTTPS 变体，通过设置maxConnections实例上的属性。设置此属性将导致节点停止accept()ing 连接并强制操作系统在以下情况下丢弃请求listen()积压已满，申请已在处理中maxConnections要求。

限制传出请求

有时，有必要限制传出请求，如问题中的示例脚本所示。

直接使用节点或使用通用池

正如问题所示，未经检查地直接使用节点网络子系统可能会导致内存不足错误。就像是node-pool使得主动池管理很有吸引力，但它并没有解决无约束排队的根本问题。这样做的原因是node-pool不提供有关客户端池状态的任何反馈。

UPDATE：从 v1.0.7 开始，节点池包含受这篇文章启发的补丁，用于添加布尔返回值acquire()。以下部分中的代码不再需要，流模式的示例是节点池的工作代码。

破解抽象

正如所证明的安德烈·西多罗夫 https://stackoverflow.com/questions/6623683/node-js-process-out-of-memory-in-http-request-loop/6624173#6624173，可以通过显式跟踪队列大小并将排队代码与请求代码混合来达到解决方案：

var useExplicitThrottling = function () {
  var active = 0
  var remaining = 10
  var queueRequests = function () {
    while(active < 2 && --remaining >= 0) {
      active++;
      pool.acquire(function (err, client) {
        if (err) {
          console.log("Error acquiring from pool")
          if (--active < 2) queueRequests()
          return
        }
        console.log("Handling request with client " + client)
        setTimeout(function () {
          pool.release(client)
          if(--active < 2) {
            queueRequests()
          }
        }, 1000)
      })
    }
  }
  queueRequests(10)
  console.log("Finished!")
}

借用流模式

The streams http://nodejs.org/docs/v0.4.12/api/streams.html#streams模式是节点中惯用的解决方案。流有一个write返回的操作false当流无法缓冲更多数据时。相同的模式可以应用于池对象acquire()返回false当获得最大数量的客户时。 Adrain当活动客户端数量低于最大值时，会发出事件。池抽象再次关闭，并且可以省略对池大小的显式引用。

var useStreams = function () {
  var queueRequests = function (remaining) {
    var full = false
    pool.once('drain', function() {
        if (remaining) queueRequests(remaining)
    })

    while(!full && --remaining >= 0) {
      console.log("Sending request...")
      full = !pool.acquire(function (err, client) {
        if (err) {
          console.log("Error acquiring from pool")
          return
        }
        console.log("Handling request with client " + client)
        setTimeout(pool.release, 1000, client)
      })
    }
  }
  queueRequests(10)
  console.log("Finished!")
}

Fibers

可以通过在队列顶部提供阻塞抽象来获得替代解决方案。这fibers https://github.com/laverdet/node-fibers模块暴露协程 https://secure.wikimedia.org/wikipedia/en/wiki/Coroutine是用 C++ 实现的。通过使用纤程，可以在不阻塞节点事件循环的情况下阻塞执行上下文。虽然我发现这种方法非常优雅，但由于对所有看起来同步的事物的好奇厌恶，它在节点社区中经常被忽视。请注意，排除callcc实用程序，实际的循环逻辑非常简洁。

/* This is the call-with-current-continuation found in Scheme and other
 * Lisps. It captures the current call context and passes a callback to
 * resume it as an argument to the function. Here, I've modified it to fit
 * JavaScript and node.js paradigms by making it a method on Function
 * objects and using function (err, result) style callbacks.
 */
Function.prototype.callcc = function(context  /* args... */) {
  var that = this,
      caller = Fiber.current,
      fiber = Fiber(function () {
        that.apply(context, Array.prototype.slice.call(arguments, 1).concat(
          function (err, result) {
            if (err)
              caller.throwInto(err)
            else
              caller.run(result)
          }
        ))
      })
  process.nextTick(fiber.run.bind(fiber))
  return Fiber.yield()
}

var useFibers = function () {
  var remaining = 10
  while(--remaining >= 0) {
    console.log("Sending request...")
    try {
      client = pool.acquire.callcc(this)
      console.log("Handling request with client " + client);
      setTimeout(pool.release, 1000, client)
    } catch (x) {
      console.log("Error acquiring from pool")
    }
  }
  console.log("Finished!")
}

结论

有许多正确的方法可以解决这个问题。但是，对于需要在多个上下文中共享单个池的库作者或应用程序，最好正确封装该池。这样做有助于防止错误并生成更清晰、更模块化的代码。防止无约束排队就变成了事件舞蹈或协程模式。我希望这个答案能够消除对阻塞式代码和异步行为的大量 FUD 和困惑，并鼓励您编写让您满意的代码。

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)

nodejs