将数据库列拆分为多值Solr字段 [英] Splitting database column into multivalued Solr field

查看：108 发布时间：2020/6/11 19:31:26 solr dih

本文介绍了将数据库列拆分为多值Solr字段的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我很努力地试图弄清楚如何使数据导入处理程序的splitBy构造起作用.我期望它将输入列拆分为多值字段.这是一个重现该问题的测试用例:

I'm going nuts trying to figure out how to get the Data Import Handler's splitBy construct to work. I was expecting it to split the input column into a multivalued field. Here's a test case to reproduce the problem:

import java.io.File;
import java.io.IOException;
import java.sql.SQLException;

import static org.junit.Assert.*;

import javax.sql.DataSource;

import org.apache.commons.dbutils.QueryRunner;
import org.apache.commons.io.FileUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.core.CoreContainer;
import org.hsqldb.jdbc.JDBCDataSource;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class TestSplitBy {

  SolrServer server;
  File configPath = new File(FileUtils.getTempDirectory(), Long.toString(System.nanoTime()));

  String solrconfig_xml = "<config><luceneMatchVersion>LUCENE_41</luceneMatchVersion><requestHandler name=\"search\" class=\"solr.SearchHandler\" default=\"true\"><lst name=\"defaults\"><str name=\"fl\">*</str><str name=\"df\">id</str></lst></requestHandler><requestHandler name=\"/dataimport\" class=\"org.apache.solr.handler.dataimport.DataImportHandler\"><lst name=\"defaults\"><str name=\"config\">data-config.xml</str></lst></requestHandler></config>";

  String data_config_xml = "<dataConfig>" +
      "<dataSource url=\"jdbc:hsqldb:mem:testdb\" user=\"SA\" driver=\"org.hsqldb.jdbc.JDBCDriver\" />" +
      "<document>" +
      "<entity name=\"item\" transformer=\"RegexTransformer\" query=\"SELECT * FROM test\">" +
      "<field column=\"type\" name=\"type\" splitBy=\",\" />" +
      "</entity>" +
      "</document>" +
      "</dataConfig>";

  String schema_xml = "<schema version=\"1.3\" name=\"test\">" +
      "<types>" +
      "<fieldType name=\"string\" class=\"solr.StrField\" sortMissingLast=\"true\" omitNorms=\"true\" />" +
      "</types>" +
      "<fields>" +
      "<field stored=\"true\" name=\"id\" type=\"string\" />" +
      "<field stored=\"true\" name=\"type\" type=\"string\" multiValued=\"true\"/>" +
      "</fields>" +
      "<uniqueKey>id</uniqueKey>" +
      "</schema>";

  DataSource getDataSource() {
    JDBCDataSource ds = new JDBCDataSource();
    ds.setUser("SA");
    ds.setUrl("mem:testdb");
    return ds;
  }

  void populateDb(DataSource ds) {
    QueryRunner runner = new QueryRunner(ds);
    try {
      runner.update("DROP TABLE test IF EXISTS");
      runner.update("CREATE TABLE test(id INTEGER, type VARCHAR(256));");
      runner.update("INSERT INTO test VALUES 1, 'foo,bar,baz'");
    } catch (SQLException e) {
      System.err.println(e);
    }
  }

  void writeSolrConfig() throws IOException {
    File corePath = new File(configPath, "collection1");
    corePath.mkdir();
    File confPath = new File(corePath, "conf");
    confPath.mkdir();
    FileUtils.write(new File(confPath, "data-config.xml"), data_config_xml);
    FileUtils.write(new File(confPath, "schema.xml"), schema_xml);
    FileUtils.write(new File(confPath, "solrconfig.xml"), solrconfig_xml);
  }

  void startSolr() {
    System.setProperty("solr.solr.home", configPath.getAbsolutePath());
    CoreContainer.Initializer initializer = new CoreContainer.Initializer();
    CoreContainer coreContainer = initializer.initialize();
    server = new EmbeddedSolrServer(coreContainer, "collection1");
  }

  @Before
  public void setup() throws IOException {
    populateDb(getDataSource());
    writeSolrConfig();
    startSolr();
  }

  @After
  public void tearDown() {
    server.shutdown();
    FileUtils.deleteQuietly(configPath);
  }

  @Test
  public void testSplitBy() throws Exception {
    SolrQuery query = new SolrQuery();
    query.set("qt", "/dataimport");
    query.setParam("command", "full-import");
    QueryResponse response = server.query(query);
    Thread.sleep(500);

    response = server.query(new SolrQuery("*:*"));
    for (SolrDocument doc: response.getResults()) {
      assertNotNull(doc.getFieldValues("type"));
      assertEquals(3, doc.getFieldValues("type").size());
    }
  }
}

以及测试用例的POM:

And the POM for the test case:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>org.example</groupId>
  <artifactId>solr</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <name>Solr Sanity</name>

  <build>
    <plugins>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-compiler-plugin</artifactId>
        <version>2.3.2</version>
        <configuration>
          <source>1.6</source>
          <target>1.6</target>
        </configuration>
      </plugin>
    </plugins>
  </build>

  <dependencies>
    <dependency>
      <groupId>org.apache.solr</groupId>
      <artifactId>solr</artifactId>
      <version>4.1.0</version>
      <type>war</type>
    </dependency>
    <dependency>
      <groupId>org.apache.solr</groupId>
      <artifactId>solr-dataimporthandler</artifactId>
      <version>4.1.0</version>
      <type>jar</type>
    </dependency>
    <dependency>
      <groupId>org.apache.solr</groupId>
      <artifactId>solr-solrj</artifactId>
      <version>4.1.0</version>
      <type>jar</type>
    </dependency>
    <dependency>
      <groupId>commons-dbutils</groupId>
      <artifactId>commons-dbutils</artifactId>
      <version>1.5</version>
      <type>jar</type>
    </dependency>
    <dependency>
      <groupId>org.hsqldb</groupId>
      <artifactId>hsqldb</artifactId>
      <version>2.2.9</version>
      <type>jar</type>
      <scope>runtime</scope>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.11</version>
    </dependency>
    <dependency>
      <groupId>javax.servlet</groupId>
      <artifactId>servlet-api</artifactId>
      <version>2.5</version>
    </dependency>
  </dependencies>
</project>

是否有关于如何将这些类型正确拆分为多个值的见解?

Any insight on how to get those types to split correctly into multiple values?

将数据库列拆分为多值Solr字段 [英] Splitting database column into multivalued Solr field

问题描述

推荐答案

相关文章

其他开发最新文章

热门教程

热门工具

登录关闭

将数据库列拆分为多值Solr字段 [英] Splitting database column into multivalued Solr field

问题描述

推荐答案

相关文章

其他开发最新文章

热门教程

热门工具

登录 关闭

登录关闭