Lexical Feature Stream

/detail

Returns the SMA Universe data.

Request Example

 curl --data "api_key=***bf3957e823316e5dc18c031c6ddc8074b***&function=stream" "https://api3.socialmarketanalytics.com/api/detail?subject=all&ontology=ticker&items=feature&sourcestring=twitter&dates=datetime+eq+realtime&timezone=UTC" 
                    
     
package sma.stream;
 
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sma.stream.facility.URIFactory;
import sma.stream.facility.URIFactoryImpl;
import sma.stream.facility.URLConnectionFactory;
import sma.stream.facility.URLConnectionFactoryImpl;
import sma.stream.model.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.*;
 
/**
 * Copyright (c) 2015 Social Market Analytics 
 *
 * All rights reserved.
 */
 
 
/**
 * Application to parse json response from SMA stream.
 *
 * 

* To Run Main *

* In order to run application: *

* The following command line arguments are required. *

  • API Key Param: 40 characters long string
  • *
  • API Function Param: e.g. stream
  • *
  • API Subject Param: e.g. all
  • *
  • API Ontology Param: e.g. ticker
  • *
  • API Items Param: e.g. sentiment
  • *

    * @author Hisham Javed * */ public class SMASentimentStreamConsumer { /** Logger */ protected static Logger logger = LoggerFactory.getLogger(SMASentimentStreamConsumer.class); // SMA stream feed /** {@link sma.stream.facility.URLConnectionFactory} to manage interactions with SMA Stream */ URLConnectionFactory urlFactory; /** * Main Constructor * @param urlFactory * @throws java.io.IOException */ public SMASentimentStreamConsumer(URLConnectionFactory urlFactory) throws IOException { this.urlFactory = urlFactory; } public URLConnectionFactory getURLConnectionFactory() { return urlFactory; } public void run() { try { System.out.println("Begin stream"); InputStream is = this.urlFactory.createStreamConnection(); BufferedReader in = new BufferedReader(new InputStreamReader(is)); String line = null; /** * JSON parser. */ JSONParser parser = new JSONParser(); while((line = in.readLine()) != null) { if(!line.equals("")){ try{ System.out.println(line); JSONObject jsonObject =(JSONObject) parser.parse(line); Sentiment sentiment = new Sentiment((JSONObject)jsonObject.get("SENTIMENT")); if(sentiment!=null){ System.out.println("Parsed successfully"); } } catch(ParseException ex){ logger.error("Invalid json object"); } } } System.out.println("End stream"); } catch (IOException e) { e.printStackTrace(); System.out.println("End Stream with exception"); System.exit(1); } } /** * @param args */ public static void main(String[] args) { Date startDate = new Date(); int exitValue = 0; if (args.length < 5) { String className = Thread.currentThread().getStackTrace()[1].getClassName(); System.err.println("Usage: " + className + " domain topic systemName apiKey function subject ontology"); System.exit(2); } try { //SMA Sample Arguments //"5723c1397f109dd7d62515b49b8b74e9a263c386" "stream" "all" "ticker" "feature" //Use the following VM Param in case of SSLProtocolException to connect with SMA API only in development env. //-Djsse.enableSNIExtension=false // Stream params String apiKey = args[0]; // apiKey:40 characters long string String function = args[1]; // function:stream String subject = args[2]; // subject:all String ontology = args[3]; // ontology:ticker String queryOptions = args[4]; //items:feature // Create SMA API URL Connection Factory URIFactory uriFactory = new URIFactoryImpl(subject,ontology,queryOptions); URLConnectionFactory urlFactory = new URLConnectionFactoryImpl(uriFactory,apiKey,function); // Create Extractor SMASentimentStreamConsumer consumer = new SMASentimentStreamConsumer(urlFactory); consumer.run(); } catch(Exception ex) { exitValue = 1; ex.printStackTrace(System.err); } finally { Date endDate = new Date(); System.out.println("Exiting after " + (endDate.getTime() - startDate.getTime()) / 1000 + " seconds ..."); System.exit(exitValue); } } } /* URIFactoryImpl.java */ package sma.stream.facility; import java.net.MalformedURLException; import java.net.URI; /** * Implementation of {@link URIFactory} that creates {@link URI}s to connect * to the SMA end-points. */ /** * @author Hisham Javed * */ public class URIFactoryImpl implements URIFactory { public static final String BASE_SMA_STREAM_URI = "https://api3.socialmarketanalytics.com/api/detail?subject=%s&ontology=%s&items=%s"; final String subject; final String ontology; final String queryOptions; /** * Default constructor. */ public URIFactoryImpl() { this("all", "ticker","feature"); } /** * * @param subject * @param ontology * @param queryOptions */ public URIFactoryImpl(String subject, String ontology,String queryOptions) { this.subject = subject.trim(); this.ontology = ontology.trim(); this.queryOptions = queryOptions.trim(); } @Override public URI createStreamURI() { return createSourceStreamURI(subject, ontology, queryOptions); } public URI createSourceStreamURI(String subject, String ontology,String queryOptions) { if (subject == null || subject.trim().isEmpty()) { throw new IllegalArgumentException("The subject cannot be null or empty"); } if (ontology == null || ontology.trim().isEmpty()) { throw new IllegalArgumentException("The ontology cannot be null or empty"); } if (queryOptions == null || queryOptions.trim().isEmpty()) { throw new IllegalArgumentException("The queryOptions cannot be null or empty"); } return URI.create(String.format(BASE_SMA_STREAM_URI, subject.trim(), ontology.trim(),queryOptions.trim())); } /** * Unit test * @param args */ public static void main(String[] args) { URIFactory uriFactory = new URIFactoryImpl(); try { URI uri = uriFactory.createStreamURI(); System.out.println("Default Stream URL: " + uri.toURL().toString()); } catch (MalformedURLException e) { e.printStackTrace(); System.exit(1); } System.exit(0); } } /* URLConnectionFactoryImpl.java */ package sma.stream.facility; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.naming.AuthenticationException; import java.io.*; import java.net.*; import java.util.zip.GZIPInputStream; import java.util.zip.Inflater; import java.util.zip.InflaterInputStream; /** * A factory to create {@link java.net.URI}s to connect to SMA API Stream. * * @author Hisham Javed * */ public class URLConnectionFactoryImpl implements URLConnectionFactory { private final Logger logger = LoggerFactory.getLogger(getClass()); final String USER_AGENT = "SMA (https://www.socialmarketanalytics.com/)"; final int connectTimeout = 10000; final int readTimeout = 30000; // Recommended a 30 second read timeout /** * Default query URL. */ protected URIFactory uriFactory; protected String apiKey; protected String function = "all"; /** * Standard HTTP query options. */ protected String queryOptions = null; /** * Instantiate default {@link tm.facility.stream.URLConnectionFactoryImpl} with default connection parameters. */ public URLConnectionFactoryImpl() { } /** * Instantiate {@link tm.facility.stream.URLConnectionFactoryImpl} with customized OAuth connection parameters. */ public URLConnectionFactoryImpl(URIFactory factory,String apiKey) { this.uriFactory = factory; this.apiKey = apiKey; } /** * Instantiate {@link tm.facility.stream.URLConnectionFactoryImpl} with customized OAuth connection parameters. */ public URLConnectionFactoryImpl(URIFactory factory,String apiKey,String function) { this.uriFactory = factory; this.apiKey=apiKey; this.function=function; } /** validate responses * @throws java.io.IOException * @throws javax.naming.AuthenticationException */ public final void validateStatusLine(final URI uri, final int statusCode, final String reason) throws IOException, AuthenticationException { logger.info(uri.toURL().toString()); if (statusCode == 200 || statusCode == 201) { // nothing to do } else if (statusCode == 401) { String message = String.format("Connection to %s: AuthenticationException code: %s %s", uri, statusCode, reason); logger.info(message); throw new AuthenticationException(message); } else { String message = String.format("Connection to %s: Unexpected status code: %s %s", uri, statusCode, reason); logger.info(message); throw new IOException(message); } } protected void doConfiguration(final URLConnection uc) { // Not sure what to do here. } public final InputStream getResource(final URI uri) throws MalformedURLException, IOException { HttpURLConnection huc = (HttpURLConnection) uri.toURL().openConnection(); huc.setAllowUserInteraction(false); huc.setDefaultUseCaches(false); huc.setConnectTimeout(connectTimeout); huc.setReadTimeout(readTimeout); huc.setRequestMethod("POST"); huc.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); huc.setRequestProperty("Accept-Encoding", "gzip,deflate"); huc.setRequestProperty("User-Agent", USER_AGENT); huc.setRequestProperty("Content-Language", "en-US"); String urlParameters = "api_key="+ URLEncoder.encode(apiKey,"UTF-8") +"&function="+URLEncoder.encode(function,"UTF-8"); huc.setRequestProperty("Content-Length", "" + Integer.toString(urlParameters.getBytes().length)); huc.setUseCaches(false); huc.setDoOutput(true); huc.setDoInput(true); DataOutputStream wr = new DataOutputStream ( huc.getOutputStream ()); wr.writeBytes(urlParameters); wr.flush (); wr.close (); doConfiguration(huc); huc.connect(); if(huc != null) { try { validateStatusLine(uri, huc.getResponseCode(), huc.getResponseMessage()); } catch (AuthenticationException e) { // TODO Auto-generated catch block e.printStackTrace(); logger.info(e.getExplanation()); throw new IOException(e.getMessage()); }; } InputStream is = huc.getInputStream(); final String encoding = huc.getContentEncoding(); if (encoding != null && encoding.equalsIgnoreCase("gzip")) { is = new GZIPInputStream(is); } else if (encoding != null && encoding.equalsIgnoreCase("deflate")) { is = new InflaterInputStream(is, new Inflater(true)); } return is; } @Override public InputStream createStreamConnection() throws MalformedURLException, IOException { URI uri = uriFactory.createStreamURI(); return getResource(uri); } }
    {"SENTIMENT": {
      "id": "55916166",
      "postId": "791637317589012480",
      "job": "119",
      "duplicateCount": "1",
      "postedTS": "2016-10-27 13:46:59 UTC",
      "createdTS": "2016-10-27 13:47:26 UTC",
      "post": {
        "externalAccount": {
          "accountRating": {
            "accountId": "374838563",
            "value": "0.000108"
          },
          "externalSystem": {
            "id": "1",
            "name": "Twitter"
          }
        }
      },
      "hitScore": {
        "id": "49510471",
        "averageSentiment": -0.05,
        "lowSentiment": -0.1,
        "highSentiment": 0,
        "sentimentCount": "2",
        "wordCount": "15"
      },
      "terms": [
        {
          "id": "4277",
          "type": "StockSymbol",
          "value": "ABBV"
        },
        {
                    "id": 1636,
                    "type": "StockSymbol",
                    "value": "HLF"
                },
                {
                    "id": 2172,
                    "type": "StockSymbol",
                    "value": "NUS"
                }
      ]
    },
    "FEATURE": [
      {
        "sentiPhraseId": "12802",
        "score": "-0.1"
      },
      {
        "sentiPhraseId": "241605",
        "score": "0.0"
      },            {
                    "phraseId": 239795,
                    "type": "IGNORE"
                },
                {
                    "phraseId": 236751,
                    "type": "IGNORE"
                },
                {
                    "phraseId": 4235,
                    "type": "HIT",
                    "score": "0.3750"
                },
                {
                    "phraseId": "",
                    "type": "MISS"
                },
                {
                    "phraseId": 18712,
                    "type": "IGNORE"
                },
                {
                    "phraseId": 126712,
                    "type": "IGNORE"
                },
                {
                    "phraseId": 19129,
                    "type": "IGNORE"
                },
                {
                    "phraseId": 19135,
                    "type": "IGNORE"
                },
                {
                    "phraseId": 18737,
                    "type": "IGNORE"
                },
                {
                    "phraseId": 73510,
                    "type": "MISS"
                },
                {
                    "phraseId": 19429,
                    "type": "IGNORE"
                },
                {
                    "phraseId": 263237209471013250,
                    "type": "URL"
                },
                {
                    "phraseId": "",
                    "type": "MISS"
                },
                {
                    "phraseId": 1636,
                    "type": "TERM"
                },
                {
                    "phraseId": 2172,
                    "type": "TERM"
                },
                {
                    "phraseId": 1334,
                    "type": "TERM"
                }
    ]
    }

    Example of tweet text broken down into features

    Phrase Feature
    Facebook Inc {"phraseId":4023,"type":"TERM_LABEL"}
    (FB) {"phraseId":70000,"type":"MISS"}
    Mobile {"phraseId":139831,"type":"IGNORE"}
    Minutes {"phraseId":77532,"type":"MISS"}
    Up {"phraseId":3423,"type":"HIT","score":"0.3750"}
    129 {"phraseId":"","type":"MISS"}
    Percent {"phraseId":160486,"type":"IGNORE"}
    http://t.co/v9YCQmRmDL {"phraseId":944856543579386481,"type":"URL"}
    $FB {"phraseId":4023,"type":"TERM"}
         {
            "response": {
            "tokendetails": {
            "api_token": "94dc4c510764c1e678417af015f0f293",
            "function": "stream",
            "expires": "2015-05-21 01:23:45",
            "ip_address": "116.58.8.162"
            },
            "stream_params": {
            "subject": "all",
            "items": "feature",
            "ontology": "ticker1",
            "sourcestring": "twitter",
            "dates": "datetime eq realtime",
            "format": "json",
            "limit": "",
            "timezone": "UTC"
            },
            "error": {
            "error_code": 2112,
            "error_message": "Invalid ontology",
            "description": "The ontology is not recognized. Ontology must be one of ticker or text.",
            "invalid_params": {
            "type": "ontology",
            "params": "ticker1"
            }
            }
            }
            } 
        

    Parameters

    Parameters Required Description
    key string API level Parameter required

    Key must be sent using POST method. Key parameter is required to call the API.

    function string API level Parameter required

    Function must be sent using POST method. Function parameter is required to tell the API which function needs to be called.

    subject string required

    Use all or any for complete SMA universe.

    ontology string optional

    Default value for ontology is ticker.

    items string required

    Possible value is feature.Items parameter is case sensitive.

    dates string optional

    Default value for dates is datetime+eq+realtime.

    sourcestring string optional

    Possible sourcestring values are twitter and stocktwits.

    Default value for sourcestring is twitter.

    format string optional

    Possible format value is json.

    Default value for format is json.

    limit integer optional

    The limit parameter constrains the maximum number of records returned by a query.

    Possible limit value is any postive integer.

    timezone string optional

    Possible timezone value is any valid timezone.

    Default value for timezone is UTC.

    JSON data Response JSON data shall contain requested item data.
    SENTIMENT
    Id Internal Id of the sentiment.
    postId Unique Identifier of the post.
    job Internal Job settings identifier.
    duplicateCount 0 = Original Tweet, greater than 1 represent running duplicate count of originalId if available along with the response.
    postedTS Timestamp of the post.
    createdTS Timestamp when post was received.
    post
    externalAccount
    accountRating
    accountId AccountId of the Poster.
    value Rating value.
    externalSystem
    id AccountId of the Poster.
    name External source of the post
    originalId ID of the original post in case of Retweet or Duplicate. (When Applicable).
    hitScore
    id Internal ID of the Hit Score.
    averageSentiment Average Sentiment Score.
    lowSentiment Lowest Sentiment Score.
    highSentiment Highest Sentiment Score.
    sentimentCount Number of words or phrases matched with our sentiments dictionary.
    wordCount Total number of words in the Post.
    terms array
    id A unique SMA assigned identifier.
    type Legal values are: StockSymbol
    value Term matched, for instance “AAPL”.
    FEATURE
    phraseId A unique SMA assigned identifier of the feature.
    type Type of the feature detected. Possible values are HIT,IGNORE,MISS,TERM,TERM_LABEL and URL.
    score (Optional) Sentiment Score against Type 'HIT'
    JSON Array Response JSON array shall contain error (optional), API token details and stream parameters.
    response Array
    tokendetails array Provide the information about API internal handshaking mechanism.
    api_token string Token for internal handshaking.
    function string API function.
    expires datetime Date & time at which, token shall expire.
    ip_address string Ip address used by the current token.
    stream_params array List of query parameters.
    subject string API query parameter.
    ontologystring API query parameter.
    itemsstring API query parameter.
    datesstring API query parameter.
    sourcestringstring API query parameter.
    formatstring API query parameter.
    limit integer API query parameter.
    timezone string API query parameter.
    error array Details of error that occurred.
    error_codestring Error code number.
    error_message string Error message.
    description string Detailed error message.
    invalid_params string
    typestring Search Error parameter.
    params array Search Error parameter value.
    Feature Type Description
    HIT A word, phrase or symbol in our dictionary associated with sentiment score.
    IGNORE A word, phrase or symbol in our dictionary not associated with sentiment score.
    MISS A word, phrase or symbol that does not appear in our dictionary.
    TERM Ticker symbol.
    TERM_LABEL Company name associated with a ticker symbol.
    URL A URL.
    HASHTAG A word that starts with a # symbol
    UNRECOGNIZED_TERM Are Cashtags, words that start with a $ symbol, and not recognized by Social Market Analytics in any asset class
    STOP_WORD List of very common english words e.g. I, a, am, is, it, how, to, while ..

    Example of tweet text broken down into features

    Phrase Feature
    Facebook Inc {"phraseId":4023,"type":"TERM_LABEL"}
    (FB) {"phraseId":70000,"type":"MISS"}
    Mobile {"phraseId":139831,"type":"IGNORE"}
    Minutes {"phraseId":77532,"type":"MISS"}
    Up {"phraseId":3423,"type":"HIT","score":"0.3750"}
    129 {"phraseId":"","type":"MISS"}
    Percent {"phraseId":160486,"type":"IGNORE"}
    http://t.co/v9YCQmRmDL {"phraseId":944856543579386481,"type":"URL"}
    $FB {"phraseId":4023,"type":"TERM"}

    Errors

    Error Code Error Message
    1000 Authentication error.
    1001 Forbidden.
    1002 Your API access is suspended.
    1003 Secure connection required. Please use https to connect.
    1004 Must use POST to send API key & function.
    1005 Your API seat access is suspended.
    1011 API key is missing.
    1012 Invalid API key.
    1013 Expired API token.
    1016 unauthorized access.
    1050 Gone. API endpoint is not available.
    2000 Missing required parameter.
    2001 API function required.
    2011 Ontology is required.
    2012 Subject is required.
    2013 Items required.
    2100 Syntax error.
    2101 Unknown parameter.
    2112 Invalid ontology.
    2113 Invalid subject.
    2114 Invalid item.
    2118 Invalid limit.
    2119 Invalid Format.
    2120 Invalid date.
    2121 Invalid API function.
    2122 Invalid time zone.
    2123 Invalid source string.
    2125 Invalid item combination.
    3000 Internal Error.
    3001 Service Unavailable. API is down or being upgraded.
    3002 Server capacity exceeded.
    3003 Database server is down. We expect to be back shortly.
    3103 Gateway timeout. Please try again.