Package com.datasalt.pangool.examples.topnhashtags

Source Code of com.datasalt.pangool.examples.topnhashtags.TopNHashTagsGenerateData

/**
* Copyright [2012] [Datasalt Systems S.L.]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasalt.pangool.examples.topnhashtags;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.codehaus.jackson.map.ObjectMapper;

import com.datasalt.pangool.examples.topnhashtags.Beans.Entities;
import com.datasalt.pangool.examples.topnhashtags.Beans.HashTag;
import com.datasalt.pangool.examples.topnhashtags.Beans.SimpleTweet;
import com.datasalt.pangool.examples.topnhashtags.Beans.UserInfo;
import com.datasalt.pangool.utils.TestUtils;

/**
* Input data generator for the {@link TopNHashTags} example.
*/
public class TopNHashTagsGenerateData {

  public static void main(String[] args) throws IOException, ParseException {
    if(args.length != 5) {
      System.err.println();
      System.err.println("Five arguments are needed.");
      System.err
          .println("Usage: [out-tweets-file] [nTweets] [nHashtags] [nLocations] [nDates]");
      System.err.println();
      System.err
          .println("Example: tweets.txt 100 10 10 10 -> Will generate a file 'tweets.txt' with 100 tweets. There will be 10 different hashtags, 10 different locations and 10 different dates used among all generated tweets.");
      System.err.println();
      System.exit(-1);
    }
    String outFile = args[0];
    int nTweets = Integer.parseInt(args[1]), nHashTags = Integer.parseInt(args[2]), nLocations = Integer.parseInt(args[3]),
    nDates = Integer.parseInt(args[4]);
   
    List<String> hashTags = new ArrayList<String>(nHashTags);
    List<String> locations = new ArrayList<String>(nLocations);
    List<Long> dates = new ArrayList<Long>(nDates);
   
    // Pregenerate data that will be used to generate tweets
   
    for(int i = 0; i < nHashTags; i++) {
      hashTags.add("hashtag" + TestUtils.randomString(10));
    }
   
    for(int i = 0; i < nLocations; i++) {
      locations.add("location" + TestUtils.randomString(10));
    }

    long currDate = System.currentTimeMillis();
    for(int i = 0; i < nDates; i++) {
      dates.add(currDate);
      currDate -= 1000 * 60 * 60 * 24;
    }

    BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
    ObjectMapper mapper = new ObjectMapper();
   
    for(int i = 0; i < nTweets; i++) {
      long date = dates.get((int)(Math.random() * dates.size()));
      String location = locations.get((int)(Math.random() * locations.size()));
      String hashTag = hashTags.get((int)(Math.random() * hashTags.size()));
      SimpleTweet tweet = new SimpleTweet();
      tweet.setCreated_at(SimpleTweet.dateFormat.format(new Date(date)));
      tweet.setEntities(new Entities());
      tweet.getEntities().setHashtags(new ArrayList<HashTag>());
      tweet.getEntities().getHashtags().add(new HashTag());
      tweet.getEntities().getHashtags().get(0).setText(hashTag);
      tweet.setUser(new UserInfo());
      tweet.getUser().setLocation(location);
      writer.write(mapper.writeValueAsString(tweet) + "\n");
    }
   
    writer.close();
  }
}
TOP

Related Classes of com.datasalt.pangool.examples.topnhashtags.TopNHashTagsGenerateData

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.