由于数据量太大,我不会存储整个 IP 地址。将它们存储在数组中BitSet
会消耗更少的内存。
edit以前的代码版本已删除,它不正确
下面的版本生成随机地址并将它们保存在文件中。如果找到先前运行的持久性文件,则从该文件中恢复所见地址的信息。
初始版本中未正确处理以下情况:
assuming that no address was already seen
1.0.0.1 - seen false
2.0.0.2 - seen false
2.0.0.1 - seen true, which was wrong and is correctly handled by code below
请参阅代码中的注释以获取更多信息。
public class KeepSeenAddresses {
static final int FILE_BUFFER_SIZE = 81_920;
static final int RANGES_SIZE = 256;
// to store 256 ranges of 255*255*255+1 addresses
static BitSet[] ranges;
// Random(1) is taken only for demonstration purpose, so the second
// application run will find the same seen addresses from previous run
static Random random = new Random(1);
// for normal use it's better to have better randomness
//static Random random = new Random(System.currentTimeMillis());
public static void main(String[] args)
throws IOException, ClassNotFoundException {
if (!readRanges()) {
initRanges();
}
// this case was failing in the initial solution
// uncomment this block to see how all edge cases
// which where mentioned in other comments are handled
/*
byte[][] addresses = {
{1, 0, 0, 1},
{2, 0, 0, 2},
{2, 0, 0, 1},
{1, 2, 3, 4},
{4, 3, 2, 1},
{(byte)128, 0, 0, 0},
{(byte)255, (byte)255, (byte)255, (byte)255}
};
seenAddress(addresses[0]);
seenAddress(addresses[1]);
seenAddress(addresses[3]);
seenAddress(addresses[5]);
seenAddress(addresses[6]);
for (byte[] addressBytes : addresses) {
System.out.printf("seen %s before: %s%n",
prettyAddress(addressBytes),
seenBefore(addressBytes)
);
}
*/
processAddresses();
persistRanges();
}
/**
* Read the seen addresses from a file.
*
* @return <code>true</code> if the file was found and has the expected
* number of ranges, otherwise <code>false</code>
* @throws IOException
* @throws ClassNotFoundException
*/
private static boolean readRanges() throws IOException, ClassNotFoundException {
File rangesStore = new File("addresses.bin");
if (!rangesStore.exists()) {
return false;
}
System.out.print("found previous rangesStore... ");
try (ObjectInputStream ois = new ObjectInputStream(
new BufferedInputStream(
new FileInputStream(rangesStore), FILE_BUFFER_SIZE
)
)) {
ranges = (BitSet[]) ois.readObject();
}
if (ranges.length != RANGES_SIZE) {
System.out.printf("wrong size of rangesStore: expected %d"
+ " found: %d%n", RANGES_SIZE, ranges.length);
return false;
} else {
System.out.printf("restored ranges: %d%n", ranges.length);
return true;
}
}
/**
* Initialize the address ranges array. All address flags will be set to
* <code>false</code>.
*/
private static void initRanges() {
System.out.print("initialize new rangesStore... ");
ranges = new BitSet[RANGES_SIZE];
for (int i = 0; i < RANGES_SIZE; i++) {
BitSet bitSet = new BitSet(255 * 255 * 255 + 1);
for (int j = 0; j < 255 * 255 * 255 + 1; j++) {
bitSet.clear(j);
}
ranges[i] = bitSet;
}
System.out.printf("initialized ranges: %d%n", RANGES_SIZE);
}
/**
* For demonstration purpose.<br>
* Generates some random IPv4 addresses. If the address was not seen before
* the flag for this address will be set to <code>true</code>.
*/
private static void processAddresses() {
for (int i = 0; i < 10; i++) {
byte[] addrBytes = randomAddress();
boolean seenBefore = seenBefore(addrBytes);
if (!seenBefore) {
seenAddress(addrBytes);
seenBefore = false;
}
System.out.printf("seen %s before: %s%n",
prettyAddress(addrBytes),
seenBefore
);
}
}
/**
* Persist the address ranges array. The file size is around 500MB.
*
* @throws IOException
*/
private static void persistRanges() throws IOException {
System.out.print("persist rangesStore... ");
try (ObjectOutputStream oos = new ObjectOutputStream(
new BufferedOutputStream(
new FileOutputStream("addresses.bin"), FILE_BUFFER_SIZE)
)) {
oos.writeObject(ranges);
}
System.out.printf("written ranges: %d%n", ranges.length);
}
/**
* Keep a flag which address has been seen already.
*
* @param addrBytes IPv4 address in four bytes
*/
static void seenAddress(byte[] addrBytes) {
int rangeIndex = (int) addrBytes[0] & 0xff;
int rangeOffset = ((int) addrBytes[1] & 0xff * 0xffff)
+ ((int) addrBytes[2] & 0xff * 0xff)
+ ((int) addrBytes[3] & 0xff);
ranges[rangeIndex].set(rangeOffset);
}
/**
* Check if the passed address was seen before.
*
* @param addrBytes IPv4 address in four bytes
* @return <code>true</code> if the address was seen before, otherwise
* <code>false</code>
*/
static boolean seenBefore(byte[] addrBytes) {
int rangeIndex = (int) addrBytes[0] & 0xff;
int rangeOffset = ((int) addrBytes[1] & 0xff * 0xffff) + ((int) addrBytes[2] & 0xff * 0xff) + ((int) addrBytes[3] & 0xff);
return ranges[rangeIndex].get(rangeOffset);
}
/**
* Convert the IPv4 address into pretty string.
*
* @param addrBytes IPv4 address in four bytes
* @return pretty String of the IPv4 address
*/
static String prettyAddress(byte[] addrBytes) {
return String.format("%03d.%03d.%03d.%03d",
(int) addrBytes[0] & 0xff,
(int) addrBytes[1] & 0xff,
(int) addrBytes[2] & 0xff,
(int) addrBytes[3] & 0xff);
}
/**
* Generate a random IPv4 address.
*
* @return four bytes of a random generated IPv4 address
*/
private static byte[] randomAddress() {
byte[] bytes = new byte[4];
for (int i = 0; i < bytes.length; i++) {
bytes[i] = (byte) random.nextInt(256);
}
return bytes;
}
}