如何通过 Reasoning 实现 Apache Jena 中命名空间之间的映射？

2024-03-07

Goal:

我不想在本体之间实现基于规则的映射，以完成数据迁移的常见任务。

实现目标的方式：

为了实现这一点，我开发了一个抽象数据结构，它能够存储任何数据类型的 xml 表示形式提供的所有信息。然后我编写了一个解析器，它根据目标文档类型定义构造了一个本体。现在，当我读取其中的数据时，首先将其与抽象数据类型命名空间，我们称之为aS. The 目标数据结构位于命名空间中tS.

Problem:

如果我尝试通过这样的规则来表达具有相同名称但不同命名空间的两个资源之间的类型公平：

[mappingRule1: (aS:?a rdf:type aS:?b) (tS:?c rdf:type tS:?b) -> (aS:?a rdf:type tS:?b)]

推理者不明白。也许规则有错误，应该解释为：如果存在与 aS 中相同的类型名称映射到不同的命名空间 tS，则所有个体aS也得到相同的类型tS另一个问题是，如果没有某种类型的个体，这种规则可能不起作用，而且我被告知这样表达可能还不够。几乎可以选择，我也可以创建 SubClassOf 规则来执行所有组合之间的映射，但这会产生很多dirt在模型中，我希望能够添加更多过滤条件，而不是变得更通用。

然而，如果有人对基于规则的本体映射有一些经验，我将很高兴获得一些见解。

这是一个 java 单元测试，演示了不工作映射问题：

import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.junit.Before;
import org.junit.Test;

import com.hp.hpl.jena.rdf.model.InfModel;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.reasoner.Derivation;
import com.hp.hpl.jena.reasoner.Reasoner;
import com.hp.hpl.jena.reasoner.ReasonerRegistry;
import com.hp.hpl.jena.reasoner.rulesys.GenericRuleReasoner;
import com.hp.hpl.jena.reasoner.rulesys.Rule;
import com.hp.hpl.jena.util.PrintUtil;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;

public class ReasonerTest {

    String aS = "http://www.custom.eu/abstractDatascheme#";
    String tS = "http://www.custom.eu/targetDatascheme#";

    Model model = ModelFactory.createDefaultModel();
    InfModel inf;

    Resource AA = model.createResource(aS + "A");
    Resource AB = model.createResource(aS + "B");
    Resource AC = model.createResource(aS + "C");
    Resource AD = model.createResource(aS + "D");

    Resource TA = model.createResource(tS + "A");
    Resource TB = model.createResource(tS + "B");

    Property p = model.createProperty(aS, "p");
    Property q = model.createProperty(aS, "q");


    @Before
    public void init() {

        PrintUtil.registerPrefix("aS", aS);
        PrintUtil.registerPrefix("tS", tS);

        AA.addProperty(p, "foo");

        // Get an RDFS reasoner
        GenericRuleReasoner rdfsReasoner = (GenericRuleReasoner) ReasonerRegistry.getRDFSReasoner();
        // Steal its rules, and add one of our own, and create a reasoner with these rules
        List<Rule> rdfRules = new ArrayList<>( rdfsReasoner.getRules() );
        List<Rule> rules = new ArrayList<>();
        String customRules  = "[transitiveRule: (?a aS:p ?b) (?b aS:p ?c) -> (?a aS:p ?c)] \n" +
                                      "[mappingRule1: (aS:?a rdf:type aS:?b) (tS:?c rdf:type tS:?b) -> (aS:?a rdf:type tS:?b)] \n" +
                                      "[mappingRule2a: -> (aS:?a rdfs:subClassOf tS:?a)] \n" +
                                      "[mappingRule2b: -> (tS:?a rdfs:subClassOf aS:?a)]";
        rules.addAll(rdfRules);
        rules.add(Rule.parseRule(customRules));
        Reasoner reasoner = new GenericRuleReasoner(rules);
        reasoner.setDerivationLogging(true);
        inf = ModelFactory.createInfModel(reasoner, model);
    }


    @Test
    public void mapping() {
        AA.addProperty(RDF.type, model.createResource(aS + "CommonType"));
        TA.addProperty(RDF.type, model.createResource(tS + "CommonType"));

        String trace = null;
        trace = getDerivations(trace, AA, RDF.type, TA);
        assertNotNull(trace);
    }


    private String getDerivations(String trace, Resource subject, Property predicate, Resource object) {
        PrintWriter out = new PrintWriter(System.out);
        for (StmtIterator i = inf.listStatements(subject, predicate, object); i.hasNext(); ) {
            Statement s = i.nextStatement();
            System.out.println("Statement is " + s);
            for (Iterator<Derivation> id = inf.getDerivation(s); id.hasNext(); ) {
                Derivation deriv = (Derivation) id.next();
                deriv.printTrace(out, true);
                trace += deriv.toString();
            }
        }
        out.flush();
        return trace;
    }


    @Test
    public void subProperty() {

        // Hierarchy
        model.add(p, RDFS.subPropertyOf, q);

        StmtIterator stmts = inf.listStatements(AA, q, (RDFNode) null);
        assertTrue(stmts.hasNext());
        while (stmts.hasNext()) {
            System.out.println("Statement: " + stmts.next());
        }
    }


    @Test
    public void derivation() {

        // Derivations
        AA.addProperty(p, AB);
        AB.addProperty(p, AC);
        AC.addProperty(p, AD);

        String trace = null;
        trace = getDerivations(trace, AA, p, AD);
        assertNotNull(trace);
    }


    @Test
    public void derivations() {
        String trace = null;
        PrintWriter out = new PrintWriter(System.out);
        for (StmtIterator i = inf.listStatements(); i.hasNext(); ) {
            Statement s = i.nextStatement();
            System.out.println("Statement is " + s);
            for (Iterator<Derivation> id = inf.getDerivation(s); id.hasNext(); ) {
                Derivation deriv = (Derivation) id.next();
                deriv.printTrace(out, true);
                trace += deriv.toString();
            }
        }
        out.flush();
        assertNotNull(trace);
    }


    @Test
    public void listStatements() {
        StmtIterator stmtIterator = inf.listStatements();
        while (stmtIterator.hasNext()) {
            System.out.println(stmtIterator.nextStatement());
        }
    }


    @Test
    public void listRules() {
        List<Rule> rules = ((GenericRuleReasoner) inf.getReasoner()).getRules();
        for (Rule rule : rules) {
            System.out.println(rule.toString());
        }
    }


    @Test
    public void saveDerivation() {
        DataOutputStream out1;
        try {
            out1 = new DataOutputStream(new BufferedOutputStream(new FileOutputStream("target/test-output/testOnto.owl")));
            inf.write(out1);
        }
        catch (IOException ex) {
            Logger.getLogger(ReasonerTest.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    @Test
    public void printRdfRules() {

        GenericRuleReasoner rdfsReasoner = (GenericRuleReasoner) ReasonerRegistry.getRDFSReasoner();
        List<Rule> customRules = new ArrayList<>(rdfsReasoner.getRules());

        PrintWriter writer = null;
        try {
            File directory = new File("target/test-output/");
            if (!directory.exists()) {
                directory.mkdir();
            }
            writer = new PrintWriter("target/test-output/rfd.rules", "UTF-8");
        }
        catch (IOException ex) {
            Logger.getLogger(ReasonerTest.class.getName()).log(Level.SEVERE, null, ex);
        }
        for (Rule customRule : customRules) {
            writer.println(customRule.toString());
        }
        writer.close();


    }



}

你不能只是做ns:?x并期望它匹配字符串形式以任何内容开头的 URI 资源ns:代表并约束?x到其余部分（或整个事情）。如果您想使用查看 URI 字符串形式的规则，则必须使用 strConcat 获取其字符串形式，并使用正则表达式进行一些匹配和提取。这是一个例子m:Person用作类型，并且x:a a n:Person是在数据中，并且m:Person and n:Person具有与前缀相同的后缀n: and m:，并推断x:a a m:Person因此。

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;

import com.hp.hpl.jena.rdf.model.InfModel;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.reasoner.Reasoner;
import com.hp.hpl.jena.reasoner.rulesys.GenericRuleReasoner;
import com.hp.hpl.jena.reasoner.rulesys.Rule;
import com.hp.hpl.jena.util.PrintUtil;

public class TypeMappingExample {
    public static void main(String[] args) throws IOException {
        PrintUtil.registerPrefix( "n", "urn:ex:n/" );
        PrintUtil.registerPrefix( "m", "urn:ex:m/" );
        String content = "\n" +
                "@prefix n: <urn:ex:n/>.\n" +
                "@prefix m: <urn:ex:m/>.\n" +
                "@prefix x: <urn:ex:x/>" +
                "\n" +
                "x:a a n:Person.\n" +
                "x:b a m:Person.\n" +
                "";
        Model model = ModelFactory.createDefaultModel();
        try ( InputStream in = new ByteArrayInputStream( content.getBytes() )) {
            model.read( in, null, "TTL" );
        }
        String rule = "\n" +
                "[strConcat(n:,'(.*)',?nprefix),\n" +
                " strConcat(m:,'(.*)',?mprefix),\n" +
                " (?x rdf:type ?ntype), strConcat(?ntype,?ntypestr),\n" +
                " (?y rdf:type ?mtype), strConcat(?mtype,?mtypestr)," +
                " regex(?ntypestr,?nprefix,?nsuffix),\n" +
                " regex(?mtypestr,?mprefix,?msuffix),\n" +
                " equal(?nsuffix,?msuffix)\n" +
                " -> \n" +
                "(?x rdf:type ?mtype)]";
        Reasoner reasoner = new GenericRuleReasoner( Rule.parseRules( rule ));
        InfModel imodel = ModelFactory.createInfModel( reasoner, model );
        imodel.write( System.out, "TTL" );
    }
}

@prefix n:     <urn:ex:n/> .
@prefix m:     <urn:ex:m/> .
@prefix x:     <urn:ex:x/> .

x:a     a       m:Person , n:Person .

x:b     a       m:Person .

正如你所看到的，字符串处理相当粗糙； Jena 的内置函数实际上是为从 URI 等获取字符串而设计的。一些 SPARQL 函数会使这变得更容易，但它仍然有点不优雅，因为 IRI 确实应该是opaque身份标识。

一种更简单的解决方案是确保所有类都有标签，并说两个类具有相同的标签，那么一个类的实例就是另一个类的实例。如果您很好地利用了 rdfs:isDefinedBy，您可以使用如下命令将其变得非常流畅：

[(?c1 a rdfs:Class) (?c1 rdfs:isDefinedBy ?ont1) (?c1 rdfs:label ?name)
 (?c2 a rdfs:Class) (?c2 rdfs:isDefinedBy ?ont2) (?c2 rdfs:label ?name)
 ->
 [(?x rdf:type ?c1) -> (?x rdf:type ?c2)]]

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)